1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that RISCV uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "RISCVISelLowering.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAGISel.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "riscv-lower"
39 
40 STATISTIC(NumTailCalls, "Number of tail calls");
41 
42 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
43                                          const RISCVSubtarget &STI)
44     : TargetLowering(TM), Subtarget(STI) {
45 
46   MVT XLenVT = Subtarget.getXLenVT();
47 
48   // Set up the register classes.
49   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
50 
51   if (Subtarget.hasStdExtF())
52     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
53   if (Subtarget.hasStdExtD())
54     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
55 
56   // Compute derived properties from the register classes.
57   computeRegisterProperties(STI.getRegisterInfo());
58 
59   setStackPointerRegisterToSaveRestore(RISCV::X2);
60 
61   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
62     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
63 
64   // TODO: add all necessary setOperationAction calls.
65   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
66 
67   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
68   setOperationAction(ISD::BR_CC, XLenVT, Expand);
69   setOperationAction(ISD::SELECT, XLenVT, Custom);
70   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
71 
72   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
73   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
74 
75   setOperationAction(ISD::VASTART, MVT::Other, Custom);
76   setOperationAction(ISD::VAARG, MVT::Other, Expand);
77   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
78   setOperationAction(ISD::VAEND, MVT::Other, Expand);
79 
80   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
81     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
82 
83   if (!Subtarget.hasStdExtM()) {
84     setOperationAction(ISD::MUL, XLenVT, Expand);
85     setOperationAction(ISD::MULHS, XLenVT, Expand);
86     setOperationAction(ISD::MULHU, XLenVT, Expand);
87     setOperationAction(ISD::SDIV, XLenVT, Expand);
88     setOperationAction(ISD::UDIV, XLenVT, Expand);
89     setOperationAction(ISD::SREM, XLenVT, Expand);
90     setOperationAction(ISD::UREM, XLenVT, Expand);
91   }
92 
93   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
94   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
95   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
96   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
97 
98   setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
99   setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
100   setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
101 
102   setOperationAction(ISD::ROTL, XLenVT, Expand);
103   setOperationAction(ISD::ROTR, XLenVT, Expand);
104   setOperationAction(ISD::BSWAP, XLenVT, Expand);
105   setOperationAction(ISD::CTTZ, XLenVT, Expand);
106   setOperationAction(ISD::CTLZ, XLenVT, Expand);
107   setOperationAction(ISD::CTPOP, XLenVT, Expand);
108 
109   ISD::CondCode FPCCToExtend[] = {
110       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO,   ISD::SETUEQ,
111       ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
112       ISD::SETGT,  ISD::SETGE,  ISD::SETNE};
113 
114   // TODO: add proper support for the various FMA variants
115   // (FMADD.S, FMSUB.S, FNMSUB.S, FNMADD.S).
116   ISD::NodeType FPOpToExtend[] = {
117       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FMA};
118 
119   if (Subtarget.hasStdExtF()) {
120     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
121     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
122     for (auto CC : FPCCToExtend)
123       setCondCodeAction(CC, MVT::f32, Expand);
124     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
125     setOperationAction(ISD::SELECT, MVT::f32, Custom);
126     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
127     for (auto Op : FPOpToExtend)
128       setOperationAction(Op, MVT::f32, Expand);
129   }
130 
131   if (Subtarget.hasStdExtD()) {
132     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
133     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
134     for (auto CC : FPCCToExtend)
135       setCondCodeAction(CC, MVT::f64, Expand);
136     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
137     setOperationAction(ISD::SELECT, MVT::f64, Custom);
138     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
139     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
140     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
141     for (auto Op : FPOpToExtend)
142       setOperationAction(Op, MVT::f64, Expand);
143   }
144 
145   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
146   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
147   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
148 
149   if (Subtarget.hasStdExtA()) {
150     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
151     setMinCmpXchgSizeInBits(32);
152   } else {
153     setMaxAtomicSizeInBitsSupported(0);
154   }
155 
156   setBooleanContents(ZeroOrOneBooleanContent);
157 
158   // Function alignments (log2).
159   unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2;
160   setMinFunctionAlignment(FunctionAlignment);
161   setPrefFunctionAlignment(FunctionAlignment);
162 
163   // Effectively disable jump table generation.
164   setMinimumJumpTableEntries(INT_MAX);
165 }
166 
167 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
168                                             EVT VT) const {
169   if (!VT.isVector())
170     return getPointerTy(DL);
171   return VT.changeVectorElementTypeToInteger();
172 }
173 
174 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
175                                              const CallInst &I,
176                                              MachineFunction &MF,
177                                              unsigned Intrinsic) const {
178   switch (Intrinsic) {
179   default:
180     return false;
181   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
182   case Intrinsic::riscv_masked_atomicrmw_add_i32:
183   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
184   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
185   case Intrinsic::riscv_masked_atomicrmw_max_i32:
186   case Intrinsic::riscv_masked_atomicrmw_min_i32:
187   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
188   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
189     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
190     Info.opc = ISD::INTRINSIC_W_CHAIN;
191     Info.memVT = MVT::getVT(PtrTy->getElementType());
192     Info.ptrVal = I.getArgOperand(0);
193     Info.offset = 0;
194     Info.align = 4;
195     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
196                  MachineMemOperand::MOVolatile;
197     return true;
198   }
199 }
200 
201 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
202                                                 const AddrMode &AM, Type *Ty,
203                                                 unsigned AS,
204                                                 Instruction *I) const {
205   // No global is ever allowed as a base.
206   if (AM.BaseGV)
207     return false;
208 
209   // Require a 12-bit signed offset.
210   if (!isInt<12>(AM.BaseOffs))
211     return false;
212 
213   switch (AM.Scale) {
214   case 0: // "r+i" or just "i", depending on HasBaseReg.
215     break;
216   case 1:
217     if (!AM.HasBaseReg) // allow "r+i".
218       break;
219     return false; // disallow "r+r" or "r+r+i".
220   default:
221     return false;
222   }
223 
224   return true;
225 }
226 
227 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
228   return isInt<12>(Imm);
229 }
230 
231 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
232   return isInt<12>(Imm);
233 }
234 
235 // On RV32, 64-bit integers are split into their high and low parts and held
236 // in two different registers, so the trunc is free since the low register can
237 // just be used.
238 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
239   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
240     return false;
241   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
242   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
243   return (SrcBits == 64 && DestBits == 32);
244 }
245 
246 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
247   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
248       !SrcVT.isInteger() || !DstVT.isInteger())
249     return false;
250   unsigned SrcBits = SrcVT.getSizeInBits();
251   unsigned DestBits = DstVT.getSizeInBits();
252   return (SrcBits == 64 && DestBits == 32);
253 }
254 
255 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
256   // Zexts are free if they can be combined with a load.
257   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
258     EVT MemVT = LD->getMemoryVT();
259     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
260          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
261         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
262          LD->getExtensionType() == ISD::ZEXTLOAD))
263       return true;
264   }
265 
266   return TargetLowering::isZExtFree(Val, VT2);
267 }
268 
269 // Changes the condition code and swaps operands if necessary, so the SetCC
270 // operation matches one of the comparisons supported directly in the RISC-V
271 // ISA.
272 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
273   switch (CC) {
274   default:
275     break;
276   case ISD::SETGT:
277   case ISD::SETLE:
278   case ISD::SETUGT:
279   case ISD::SETULE:
280     CC = ISD::getSetCCSwappedOperands(CC);
281     std::swap(LHS, RHS);
282     break;
283   }
284 }
285 
286 // Return the RISC-V branch opcode that matches the given DAG integer
287 // condition code. The CondCode must be one of those supported by the RISC-V
288 // ISA (see normaliseSetCC).
289 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
290   switch (CC) {
291   default:
292     llvm_unreachable("Unsupported CondCode");
293   case ISD::SETEQ:
294     return RISCV::BEQ;
295   case ISD::SETNE:
296     return RISCV::BNE;
297   case ISD::SETLT:
298     return RISCV::BLT;
299   case ISD::SETGE:
300     return RISCV::BGE;
301   case ISD::SETULT:
302     return RISCV::BLTU;
303   case ISD::SETUGE:
304     return RISCV::BGEU;
305   }
306 }
307 
308 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
309                                             SelectionDAG &DAG) const {
310   switch (Op.getOpcode()) {
311   default:
312     report_fatal_error("unimplemented operand");
313   case ISD::GlobalAddress:
314     return lowerGlobalAddress(Op, DAG);
315   case ISD::BlockAddress:
316     return lowerBlockAddress(Op, DAG);
317   case ISD::ConstantPool:
318     return lowerConstantPool(Op, DAG);
319   case ISD::SELECT:
320     return lowerSELECT(Op, DAG);
321   case ISD::VASTART:
322     return lowerVASTART(Op, DAG);
323   case ISD::FRAMEADDR:
324     return lowerFRAMEADDR(Op, DAG);
325   case ISD::RETURNADDR:
326     return lowerRETURNADDR(Op, DAG);
327   }
328 }
329 
330 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
331                                                 SelectionDAG &DAG) const {
332   SDLoc DL(Op);
333   EVT Ty = Op.getValueType();
334   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
335   const GlobalValue *GV = N->getGlobal();
336   int64_t Offset = N->getOffset();
337   MVT XLenVT = Subtarget.getXLenVT();
338 
339   if (isPositionIndependent())
340     report_fatal_error("Unable to lowerGlobalAddress");
341   // In order to maximise the opportunity for common subexpression elimination,
342   // emit a separate ADD node for the global address offset instead of folding
343   // it in the global address node. Later peephole optimisations may choose to
344   // fold it back in when profitable.
345   SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI);
346   SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);
347   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
348   SDValue MNLo =
349     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
350   if (Offset != 0)
351     return DAG.getNode(ISD::ADD, DL, Ty, MNLo,
352                        DAG.getConstant(Offset, DL, XLenVT));
353   return MNLo;
354 }
355 
356 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
357                                                SelectionDAG &DAG) const {
358   SDLoc DL(Op);
359   EVT Ty = Op.getValueType();
360   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
361   const BlockAddress *BA = N->getBlockAddress();
362   int64_t Offset = N->getOffset();
363 
364   if (isPositionIndependent())
365     report_fatal_error("Unable to lowerBlockAddress");
366 
367   SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
368   SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO);
369   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0);
370   SDValue MNLo =
371     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0);
372   return MNLo;
373 }
374 
375 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
376                                                SelectionDAG &DAG) const {
377   SDLoc DL(Op);
378   EVT Ty = Op.getValueType();
379   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
380   const Constant *CPA = N->getConstVal();
381   int64_t Offset = N->getOffset();
382   unsigned Alignment = N->getAlignment();
383 
384   if (!isPositionIndependent()) {
385     SDValue CPAHi =
386         DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI);
387     SDValue CPALo =
388         DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO);
389     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0);
390     SDValue MNLo =
391         SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0);
392     return MNLo;
393   } else {
394     report_fatal_error("Unable to lowerConstantPool");
395   }
396 }
397 
398 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
399   SDValue CondV = Op.getOperand(0);
400   SDValue TrueV = Op.getOperand(1);
401   SDValue FalseV = Op.getOperand(2);
402   SDLoc DL(Op);
403   MVT XLenVT = Subtarget.getXLenVT();
404 
405   // If the result type is XLenVT and CondV is the output of a SETCC node
406   // which also operated on XLenVT inputs, then merge the SETCC node into the
407   // lowered RISCVISD::SELECT_CC to take advantage of the integer
408   // compare+branch instructions. i.e.:
409   // (select (setcc lhs, rhs, cc), truev, falsev)
410   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
411   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
412       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
413     SDValue LHS = CondV.getOperand(0);
414     SDValue RHS = CondV.getOperand(1);
415     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
416     ISD::CondCode CCVal = CC->get();
417 
418     normaliseSetCC(LHS, RHS, CCVal);
419 
420     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
421     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
422     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
423     return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
424   }
425 
426   // Otherwise:
427   // (select condv, truev, falsev)
428   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
429   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
430   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
431 
432   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
433   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
434 
435   return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
436 }
437 
438 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
439   MachineFunction &MF = DAG.getMachineFunction();
440   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
441 
442   SDLoc DL(Op);
443   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
444                                  getPointerTy(MF.getDataLayout()));
445 
446   // vastart just stores the address of the VarArgsFrameIndex slot into the
447   // memory location argument.
448   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
449   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
450                       MachinePointerInfo(SV));
451 }
452 
453 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
454                                             SelectionDAG &DAG) const {
455   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
456   MachineFunction &MF = DAG.getMachineFunction();
457   MachineFrameInfo &MFI = MF.getFrameInfo();
458   MFI.setFrameAddressIsTaken(true);
459   unsigned FrameReg = RI.getFrameRegister(MF);
460   int XLenInBytes = Subtarget.getXLen() / 8;
461 
462   EVT VT = Op.getValueType();
463   SDLoc DL(Op);
464   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
465   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
466   while (Depth--) {
467     int Offset = -(XLenInBytes * 2);
468     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
469                               DAG.getIntPtrConstant(Offset, DL));
470     FrameAddr =
471         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
472   }
473   return FrameAddr;
474 }
475 
476 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
477                                              SelectionDAG &DAG) const {
478   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
479   MachineFunction &MF = DAG.getMachineFunction();
480   MachineFrameInfo &MFI = MF.getFrameInfo();
481   MFI.setReturnAddressIsTaken(true);
482   MVT XLenVT = Subtarget.getXLenVT();
483   int XLenInBytes = Subtarget.getXLen() / 8;
484 
485   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
486     return SDValue();
487 
488   EVT VT = Op.getValueType();
489   SDLoc DL(Op);
490   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
491   if (Depth) {
492     int Off = -XLenInBytes;
493     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
494     SDValue Offset = DAG.getConstant(Off, DL, VT);
495     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
496                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
497                        MachinePointerInfo());
498   }
499 
500   // Return the value of the return address register, marking it an implicit
501   // live-in.
502   unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
503   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
504 }
505 
506 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
507                                                DAGCombinerInfo &DCI) const {
508   switch (N->getOpcode()) {
509   default:
510     break;
511   case RISCVISD::SplitF64: {
512     // If the input to SplitF64 is just BuildPairF64 then the operation is
513     // redundant. Instead, use BuildPairF64's operands directly.
514     SDValue Op0 = N->getOperand(0);
515     if (Op0->getOpcode() != RISCVISD::BuildPairF64)
516       break;
517     return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
518   }
519   }
520 
521   return SDValue();
522 }
523 
524 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
525                                              MachineBasicBlock *BB) {
526   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
527 
528   MachineFunction &MF = *BB->getParent();
529   DebugLoc DL = MI.getDebugLoc();
530   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
531   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
532   unsigned LoReg = MI.getOperand(0).getReg();
533   unsigned HiReg = MI.getOperand(1).getReg();
534   unsigned SrcReg = MI.getOperand(2).getReg();
535   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
536   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
537 
538   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
539                           RI);
540   MachineMemOperand *MMO =
541       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
542                               MachineMemOperand::MOLoad, 8, 8);
543   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
544       .addFrameIndex(FI)
545       .addImm(0)
546       .addMemOperand(MMO);
547   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
548       .addFrameIndex(FI)
549       .addImm(4)
550       .addMemOperand(MMO);
551   MI.eraseFromParent(); // The pseudo instruction is gone now.
552   return BB;
553 }
554 
555 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
556                                                  MachineBasicBlock *BB) {
557   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
558          "Unexpected instruction");
559 
560   MachineFunction &MF = *BB->getParent();
561   DebugLoc DL = MI.getDebugLoc();
562   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
563   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
564   unsigned DstReg = MI.getOperand(0).getReg();
565   unsigned LoReg = MI.getOperand(1).getReg();
566   unsigned HiReg = MI.getOperand(2).getReg();
567   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
568   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
569 
570   MachineMemOperand *MMO =
571       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
572                               MachineMemOperand::MOStore, 8, 8);
573   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
574       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
575       .addFrameIndex(FI)
576       .addImm(0)
577       .addMemOperand(MMO);
578   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
579       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
580       .addFrameIndex(FI)
581       .addImm(4)
582       .addMemOperand(MMO);
583   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
584   MI.eraseFromParent(); // The pseudo instruction is gone now.
585   return BB;
586 }
587 
588 MachineBasicBlock *
589 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
590                                                  MachineBasicBlock *BB) const {
591   switch (MI.getOpcode()) {
592   default:
593     llvm_unreachable("Unexpected instr type to insert");
594   case RISCV::Select_GPR_Using_CC_GPR:
595   case RISCV::Select_FPR32_Using_CC_GPR:
596   case RISCV::Select_FPR64_Using_CC_GPR:
597     break;
598   case RISCV::BuildPairF64Pseudo:
599     return emitBuildPairF64Pseudo(MI, BB);
600   case RISCV::SplitF64Pseudo:
601     return emitSplitF64Pseudo(MI, BB);
602   }
603 
604   // To "insert" a SELECT instruction, we actually have to insert the triangle
605   // control-flow pattern.  The incoming instruction knows the destination vreg
606   // to set, the condition code register to branch on, the true/false values to
607   // select between, and the condcode to use to select the appropriate branch.
608   //
609   // We produce the following control flow:
610   //     HeadMBB
611   //     |  \
612   //     |  IfFalseMBB
613   //     | /
614   //    TailMBB
615   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
616   const BasicBlock *LLVM_BB = BB->getBasicBlock();
617   DebugLoc DL = MI.getDebugLoc();
618   MachineFunction::iterator I = ++BB->getIterator();
619 
620   MachineBasicBlock *HeadMBB = BB;
621   MachineFunction *F = BB->getParent();
622   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
623   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
624 
625   F->insert(I, IfFalseMBB);
626   F->insert(I, TailMBB);
627   // Move all remaining instructions to TailMBB.
628   TailMBB->splice(TailMBB->begin(), HeadMBB,
629                   std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end());
630   // Update machine-CFG edges by transferring all successors of the current
631   // block to the new block which will contain the Phi node for the select.
632   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
633   // Set the successors for HeadMBB.
634   HeadMBB->addSuccessor(IfFalseMBB);
635   HeadMBB->addSuccessor(TailMBB);
636 
637   // Insert appropriate branch.
638   unsigned LHS = MI.getOperand(1).getReg();
639   unsigned RHS = MI.getOperand(2).getReg();
640   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
641   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
642 
643   BuildMI(HeadMBB, DL, TII.get(Opcode))
644     .addReg(LHS)
645     .addReg(RHS)
646     .addMBB(TailMBB);
647 
648   // IfFalseMBB just falls through to TailMBB.
649   IfFalseMBB->addSuccessor(TailMBB);
650 
651   // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
652   BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI),
653           MI.getOperand(0).getReg())
654       .addReg(MI.getOperand(4).getReg())
655       .addMBB(HeadMBB)
656       .addReg(MI.getOperand(5).getReg())
657       .addMBB(IfFalseMBB);
658 
659   MI.eraseFromParent(); // The pseudo instruction is gone now.
660   return TailMBB;
661 }
662 
663 // Calling Convention Implementation.
664 // The expectations for frontend ABI lowering vary from target to target.
665 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
666 // details, but this is a longer term goal. For now, we simply try to keep the
667 // role of the frontend as simple and well-defined as possible. The rules can
668 // be summarised as:
669 // * Never split up large scalar arguments. We handle them here.
670 // * If a hardfloat calling convention is being used, and the struct may be
671 // passed in a pair of registers (fp+fp, int+fp), and both registers are
672 // available, then pass as two separate arguments. If either the GPRs or FPRs
673 // are exhausted, then pass according to the rule below.
674 // * If a struct could never be passed in registers or directly in a stack
675 // slot (as it is larger than 2*XLEN and the floating point rules don't
676 // apply), then pass it using a pointer with the byval attribute.
677 // * If a struct is less than 2*XLEN, then coerce to either a two-element
678 // word-sized array or a 2*XLEN scalar (depending on alignment).
679 // * The frontend can determine whether a struct is returned by reference or
680 // not based on its size and fields. If it will be returned by reference, the
681 // frontend must modify the prototype so a pointer with the sret annotation is
682 // passed as the first argument. This is not necessary for large scalar
683 // returns.
684 // * Struct return values and varargs should be coerced to structs containing
685 // register-size fields in the same situations they would be for fixed
686 // arguments.
687 
688 static const MCPhysReg ArgGPRs[] = {
689   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
690   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
691 };
692 
693 // Pass a 2*XLEN argument that has been split into two XLEN values through
694 // registers or the stack as necessary.
695 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
696                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
697                                 MVT ValVT2, MVT LocVT2,
698                                 ISD::ArgFlagsTy ArgFlags2) {
699   unsigned XLenInBytes = XLen / 8;
700   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
701     // At least one half can be passed via register.
702     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
703                                      VA1.getLocVT(), CCValAssign::Full));
704   } else {
705     // Both halves must be passed on the stack, with proper alignment.
706     unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
707     State.addLoc(
708         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
709                             State.AllocateStack(XLenInBytes, StackAlign),
710                             VA1.getLocVT(), CCValAssign::Full));
711     State.addLoc(CCValAssign::getMem(
712         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
713         CCValAssign::Full));
714     return false;
715   }
716 
717   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
718     // The second half can also be passed via register.
719     State.addLoc(
720         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
721   } else {
722     // The second half is passed via the stack, without additional alignment.
723     State.addLoc(CCValAssign::getMem(
724         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
725         CCValAssign::Full));
726   }
727 
728   return false;
729 }
730 
731 // Implements the RISC-V calling convention. Returns true upon failure.
732 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
733                      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
734                      CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {
735   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
736   assert(XLen == 32 || XLen == 64);
737   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
738   if (ValVT == MVT::f32) {
739     LocVT = MVT::i32;
740     LocInfo = CCValAssign::BCvt;
741   }
742 
743   // Any return value split in to more than two values can't be returned
744   // directly.
745   if (IsRet && ValNo > 1)
746     return true;
747 
748   // If this is a variadic argument, the RISC-V calling convention requires
749   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
750   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
751   // be used regardless of whether the original argument was split during
752   // legalisation or not. The argument will not be passed by registers if the
753   // original type is larger than 2*XLEN, so the register alignment rule does
754   // not apply.
755   unsigned TwoXLenInBytes = (2 * XLen) / 8;
756   if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
757       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
758     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
759     // Skip 'odd' register if necessary.
760     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
761       State.AllocateReg(ArgGPRs);
762   }
763 
764   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
765   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
766       State.getPendingArgFlags();
767 
768   assert(PendingLocs.size() == PendingArgFlags.size() &&
769          "PendingLocs and PendingArgFlags out of sync");
770 
771   // Handle passing f64 on RV32D with a soft float ABI.
772   if (XLen == 32 && ValVT == MVT::f64) {
773     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
774            "Can't lower f64 if it is split");
775     // Depending on available argument GPRS, f64 may be passed in a pair of
776     // GPRs, split between a GPR and the stack, or passed completely on the
777     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
778     // cases.
779     unsigned Reg = State.AllocateReg(ArgGPRs);
780     LocVT = MVT::i32;
781     if (!Reg) {
782       unsigned StackOffset = State.AllocateStack(8, 8);
783       State.addLoc(
784           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
785       return false;
786     }
787     if (!State.AllocateReg(ArgGPRs))
788       State.AllocateStack(4, 4);
789     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
790     return false;
791   }
792 
793   // Split arguments might be passed indirectly, so keep track of the pending
794   // values.
795   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
796     LocVT = XLenVT;
797     LocInfo = CCValAssign::Indirect;
798     PendingLocs.push_back(
799         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
800     PendingArgFlags.push_back(ArgFlags);
801     if (!ArgFlags.isSplitEnd()) {
802       return false;
803     }
804   }
805 
806   // If the split argument only had two elements, it should be passed directly
807   // in registers or on the stack.
808   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
809     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
810     // Apply the normal calling convention rules to the first half of the
811     // split argument.
812     CCValAssign VA = PendingLocs[0];
813     ISD::ArgFlagsTy AF = PendingArgFlags[0];
814     PendingLocs.clear();
815     PendingArgFlags.clear();
816     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
817                                ArgFlags);
818   }
819 
820   // Allocate to a register if possible, or else a stack slot.
821   unsigned Reg = State.AllocateReg(ArgGPRs);
822   unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
823 
824   // If we reach this point and PendingLocs is non-empty, we must be at the
825   // end of a split argument that must be passed indirectly.
826   if (!PendingLocs.empty()) {
827     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
828     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
829 
830     for (auto &It : PendingLocs) {
831       if (Reg)
832         It.convertToReg(Reg);
833       else
834         It.convertToMem(StackOffset);
835       State.addLoc(It);
836     }
837     PendingLocs.clear();
838     PendingArgFlags.clear();
839     return false;
840   }
841 
842   assert(LocVT == XLenVT && "Expected an XLenVT at this stage");
843 
844   if (Reg) {
845     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
846     return false;
847   }
848 
849   if (ValVT == MVT::f32) {
850     LocVT = MVT::f32;
851     LocInfo = CCValAssign::Full;
852   }
853   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
854   return false;
855 }
856 
857 void RISCVTargetLowering::analyzeInputArgs(
858     MachineFunction &MF, CCState &CCInfo,
859     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
860   unsigned NumArgs = Ins.size();
861   FunctionType *FType = MF.getFunction().getFunctionType();
862 
863   for (unsigned i = 0; i != NumArgs; ++i) {
864     MVT ArgVT = Ins[i].VT;
865     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
866 
867     Type *ArgTy = nullptr;
868     if (IsRet)
869       ArgTy = FType->getReturnType();
870     else if (Ins[i].isOrigArg())
871       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
872 
873     if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
874                  ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
875       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
876                         << EVT(ArgVT).getEVTString() << '\n');
877       llvm_unreachable(nullptr);
878     }
879   }
880 }
881 
882 void RISCVTargetLowering::analyzeOutputArgs(
883     MachineFunction &MF, CCState &CCInfo,
884     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
885     CallLoweringInfo *CLI) const {
886   unsigned NumArgs = Outs.size();
887 
888   for (unsigned i = 0; i != NumArgs; i++) {
889     MVT ArgVT = Outs[i].VT;
890     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
891     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
892 
893     if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
894                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
895       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
896                         << EVT(ArgVT).getEVTString() << "\n");
897       llvm_unreachable(nullptr);
898     }
899   }
900 }
901 
902 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
903 // values.
904 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
905                                    const CCValAssign &VA, const SDLoc &DL) {
906   switch (VA.getLocInfo()) {
907   default:
908     llvm_unreachable("Unexpected CCValAssign::LocInfo");
909   case CCValAssign::Full:
910     break;
911   case CCValAssign::BCvt:
912     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
913     break;
914   }
915   return Val;
916 }
917 
918 // The caller is responsible for loading the full value if the argument is
919 // passed with CCValAssign::Indirect.
920 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
921                                 const CCValAssign &VA, const SDLoc &DL) {
922   MachineFunction &MF = DAG.getMachineFunction();
923   MachineRegisterInfo &RegInfo = MF.getRegInfo();
924   EVT LocVT = VA.getLocVT();
925   SDValue Val;
926 
927   unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
928   RegInfo.addLiveIn(VA.getLocReg(), VReg);
929   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
930 
931   if (VA.getLocInfo() == CCValAssign::Indirect)
932     return Val;
933 
934   return convertLocVTToValVT(DAG, Val, VA, DL);
935 }
936 
937 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
938                                    const CCValAssign &VA, const SDLoc &DL) {
939   EVT LocVT = VA.getLocVT();
940 
941   switch (VA.getLocInfo()) {
942   default:
943     llvm_unreachable("Unexpected CCValAssign::LocInfo");
944   case CCValAssign::Full:
945     break;
946   case CCValAssign::BCvt:
947     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
948     break;
949   }
950   return Val;
951 }
952 
953 // The caller is responsible for loading the full value if the argument is
954 // passed with CCValAssign::Indirect.
955 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
956                                 const CCValAssign &VA, const SDLoc &DL) {
957   MachineFunction &MF = DAG.getMachineFunction();
958   MachineFrameInfo &MFI = MF.getFrameInfo();
959   EVT LocVT = VA.getLocVT();
960   EVT ValVT = VA.getValVT();
961   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
962   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
963                                  VA.getLocMemOffset(), /*Immutable=*/true);
964   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
965   SDValue Val;
966 
967   ISD::LoadExtType ExtType;
968   switch (VA.getLocInfo()) {
969   default:
970     llvm_unreachable("Unexpected CCValAssign::LocInfo");
971   case CCValAssign::Full:
972   case CCValAssign::Indirect:
973     ExtType = ISD::NON_EXTLOAD;
974     break;
975   }
976   Val = DAG.getExtLoad(
977       ExtType, DL, LocVT, Chain, FIN,
978       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
979   return Val;
980 }
981 
982 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
983                                        const CCValAssign &VA, const SDLoc &DL) {
984   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
985          "Unexpected VA");
986   MachineFunction &MF = DAG.getMachineFunction();
987   MachineFrameInfo &MFI = MF.getFrameInfo();
988   MachineRegisterInfo &RegInfo = MF.getRegInfo();
989 
990   if (VA.isMemLoc()) {
991     // f64 is passed on the stack.
992     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
993     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
994     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
995                        MachinePointerInfo::getFixedStack(MF, FI));
996   }
997 
998   assert(VA.isRegLoc() && "Expected register VA assignment");
999 
1000   unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1001   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
1002   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
1003   SDValue Hi;
1004   if (VA.getLocReg() == RISCV::X17) {
1005     // Second half of f64 is passed on the stack.
1006     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
1007     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1008     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
1009                      MachinePointerInfo::getFixedStack(MF, FI));
1010   } else {
1011     // Second half of f64 is passed in another GPR.
1012     unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1013     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
1014     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
1015   }
1016   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1017 }
1018 
1019 // Transform physical registers into virtual registers.
1020 SDValue RISCVTargetLowering::LowerFormalArguments(
1021     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1022     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1023     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1024 
1025   switch (CallConv) {
1026   default:
1027     report_fatal_error("Unsupported calling convention");
1028   case CallingConv::C:
1029   case CallingConv::Fast:
1030     break;
1031   }
1032 
1033   MachineFunction &MF = DAG.getMachineFunction();
1034 
1035   const Function &Func = MF.getFunction();
1036   if (Func.hasFnAttribute("interrupt")) {
1037     if (!Func.arg_empty())
1038       report_fatal_error(
1039         "Functions with the interrupt attribute cannot have arguments!");
1040 
1041     StringRef Kind =
1042       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1043 
1044     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
1045       report_fatal_error(
1046         "Function interrupt attribute argument not supported!");
1047   }
1048 
1049   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1050   MVT XLenVT = Subtarget.getXLenVT();
1051   unsigned XLenInBytes = Subtarget.getXLen() / 8;
1052   // Used with vargs to acumulate store chains.
1053   std::vector<SDValue> OutChains;
1054 
1055   // Assign locations to all of the incoming arguments.
1056   SmallVector<CCValAssign, 16> ArgLocs;
1057   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1058   analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
1059 
1060   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1061     CCValAssign &VA = ArgLocs[i];
1062     SDValue ArgValue;
1063     // Passing f64 on RV32D with a soft float ABI must be handled as a special
1064     // case.
1065     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
1066       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
1067     else if (VA.isRegLoc())
1068       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
1069     else
1070       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
1071 
1072     if (VA.getLocInfo() == CCValAssign::Indirect) {
1073       // If the original argument was split and passed by reference (e.g. i128
1074       // on RV32), we need to load all parts of it here (using the same
1075       // address).
1076       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1077                                    MachinePointerInfo()));
1078       unsigned ArgIndex = Ins[i].OrigArgIndex;
1079       assert(Ins[i].PartOffset == 0);
1080       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
1081         CCValAssign &PartVA = ArgLocs[i + 1];
1082         unsigned PartOffset = Ins[i + 1].PartOffset;
1083         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1084                                       DAG.getIntPtrConstant(PartOffset, DL));
1085         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1086                                      MachinePointerInfo()));
1087         ++i;
1088       }
1089       continue;
1090     }
1091     InVals.push_back(ArgValue);
1092   }
1093 
1094   if (IsVarArg) {
1095     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
1096     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
1097     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1098     MachineFrameInfo &MFI = MF.getFrameInfo();
1099     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1100     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1101 
1102     // Offset of the first variable argument from stack pointer, and size of
1103     // the vararg save area. For now, the varargs save area is either zero or
1104     // large enough to hold a0-a7.
1105     int VaArgOffset, VarArgsSaveSize;
1106 
1107     // If all registers are allocated, then all varargs must be passed on the
1108     // stack and we don't need to save any argregs.
1109     if (ArgRegs.size() == Idx) {
1110       VaArgOffset = CCInfo.getNextStackOffset();
1111       VarArgsSaveSize = 0;
1112     } else {
1113       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
1114       VaArgOffset = -VarArgsSaveSize;
1115     }
1116 
1117     // Record the frame index of the first variable argument
1118     // which is a value necessary to VASTART.
1119     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1120     RVFI->setVarArgsFrameIndex(FI);
1121 
1122     // If saving an odd number of registers then create an extra stack slot to
1123     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
1124     // offsets to even-numbered registered remain 2*XLEN-aligned.
1125     if (Idx % 2) {
1126       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes,
1127                                  true);
1128       VarArgsSaveSize += XLenInBytes;
1129     }
1130 
1131     // Copy the integer registers that may have been used for passing varargs
1132     // to the vararg save area.
1133     for (unsigned I = Idx; I < ArgRegs.size();
1134          ++I, VaArgOffset += XLenInBytes) {
1135       const unsigned Reg = RegInfo.createVirtualRegister(RC);
1136       RegInfo.addLiveIn(ArgRegs[I], Reg);
1137       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
1138       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1139       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1140       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
1141                                    MachinePointerInfo::getFixedStack(MF, FI));
1142       cast<StoreSDNode>(Store.getNode())
1143           ->getMemOperand()
1144           ->setValue((Value *)nullptr);
1145       OutChains.push_back(Store);
1146     }
1147     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
1148   }
1149 
1150   // All stores are grouped in one node to allow the matching between
1151   // the size of Ins and InVals. This only happens for vararg functions.
1152   if (!OutChains.empty()) {
1153     OutChains.push_back(Chain);
1154     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1155   }
1156 
1157   return Chain;
1158 }
1159 
1160 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
1161 /// for tail call optimization.
1162 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
1163 bool RISCVTargetLowering::IsEligibleForTailCallOptimization(
1164   CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
1165   const SmallVector<CCValAssign, 16> &ArgLocs) const {
1166 
1167   auto &Callee = CLI.Callee;
1168   auto CalleeCC = CLI.CallConv;
1169   auto IsVarArg = CLI.IsVarArg;
1170   auto &Outs = CLI.Outs;
1171   auto &Caller = MF.getFunction();
1172   auto CallerCC = Caller.getCallingConv();
1173 
1174   // Do not tail call opt functions with "disable-tail-calls" attribute.
1175   if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
1176     return false;
1177 
1178   // Exception-handling functions need a special set of instructions to
1179   // indicate a return to the hardware. Tail-calling another function would
1180   // probably break this.
1181   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
1182   // should be expanded as new function attributes are introduced.
1183   if (Caller.hasFnAttribute("interrupt"))
1184     return false;
1185 
1186   // Do not tail call opt functions with varargs.
1187   if (IsVarArg)
1188     return false;
1189 
1190   // Do not tail call opt if the stack is used to pass parameters.
1191   if (CCInfo.getNextStackOffset() != 0)
1192     return false;
1193 
1194   // Do not tail call opt if any parameters need to be passed indirectly.
1195   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
1196   // passed indirectly. So the address of the value will be passed in a
1197   // register, or if not available, then the address is put on the stack. In
1198   // order to pass indirectly, space on the stack often needs to be allocated
1199   // in order to store the value. In this case the CCInfo.getNextStackOffset()
1200   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
1201   // are passed CCValAssign::Indirect.
1202   for (auto &VA : ArgLocs)
1203     if (VA.getLocInfo() == CCValAssign::Indirect)
1204       return false;
1205 
1206   // Do not tail call opt if either caller or callee uses struct return
1207   // semantics.
1208   auto IsCallerStructRet = Caller.hasStructRetAttr();
1209   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
1210   if (IsCallerStructRet || IsCalleeStructRet)
1211     return false;
1212 
1213   // Externally-defined functions with weak linkage should not be
1214   // tail-called. The behaviour of branch instructions in this situation (as
1215   // used for tail calls) is implementation-defined, so we cannot rely on the
1216   // linker replacing the tail call with a return.
1217   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1218     const GlobalValue *GV = G->getGlobal();
1219     if (GV->hasExternalWeakLinkage())
1220       return false;
1221   }
1222 
1223   // The callee has to preserve all registers the caller needs to preserve.
1224   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
1225   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1226   if (CalleeCC != CallerCC) {
1227     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1228     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1229       return false;
1230   }
1231 
1232   // Byval parameters hand the function a pointer directly into the stack area
1233   // we want to reuse during a tail call. Working around this *is* possible
1234   // but less efficient and uglier in LowerCall.
1235   for (auto &Arg : Outs)
1236     if (Arg.Flags.isByVal())
1237       return false;
1238 
1239   return true;
1240 }
1241 
1242 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
1243 // and output parameter nodes.
1244 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1245                                        SmallVectorImpl<SDValue> &InVals) const {
1246   SelectionDAG &DAG = CLI.DAG;
1247   SDLoc &DL = CLI.DL;
1248   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1249   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1250   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1251   SDValue Chain = CLI.Chain;
1252   SDValue Callee = CLI.Callee;
1253   bool &IsTailCall = CLI.IsTailCall;
1254   CallingConv::ID CallConv = CLI.CallConv;
1255   bool IsVarArg = CLI.IsVarArg;
1256   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1257   MVT XLenVT = Subtarget.getXLenVT();
1258 
1259   MachineFunction &MF = DAG.getMachineFunction();
1260 
1261   // Analyze the operands of the call, assigning locations to each operand.
1262   SmallVector<CCValAssign, 16> ArgLocs;
1263   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1264   analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
1265 
1266   // Check if it's really possible to do a tail call.
1267   if (IsTailCall)
1268     IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF,
1269                                                    ArgLocs);
1270 
1271   if (IsTailCall)
1272     ++NumTailCalls;
1273   else if (CLI.CS && CLI.CS.isMustTailCall())
1274     report_fatal_error("failed to perform tail call elimination on a call "
1275                        "site marked musttail");
1276 
1277   // Get a count of how many bytes are to be pushed on the stack.
1278   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1279 
1280   // Create local copies for byval args
1281   SmallVector<SDValue, 8> ByValArgs;
1282   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1283     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1284     if (!Flags.isByVal())
1285       continue;
1286 
1287     SDValue Arg = OutVals[i];
1288     unsigned Size = Flags.getByValSize();
1289     unsigned Align = Flags.getByValAlign();
1290 
1291     int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false);
1292     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1293     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
1294 
1295     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
1296                           /*IsVolatile=*/false,
1297                           /*AlwaysInline=*/false,
1298                           IsTailCall, MachinePointerInfo(),
1299                           MachinePointerInfo());
1300     ByValArgs.push_back(FIPtr);
1301   }
1302 
1303   if (!IsTailCall)
1304     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
1305 
1306   // Copy argument values to their designated locations.
1307   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
1308   SmallVector<SDValue, 8> MemOpChains;
1309   SDValue StackPtr;
1310   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
1311     CCValAssign &VA = ArgLocs[i];
1312     SDValue ArgValue = OutVals[i];
1313     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1314 
1315     // Handle passing f64 on RV32D with a soft float ABI as a special case.
1316     bool IsF64OnRV32DSoftABI =
1317         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
1318     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
1319       SDValue SplitF64 = DAG.getNode(
1320           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
1321       SDValue Lo = SplitF64.getValue(0);
1322       SDValue Hi = SplitF64.getValue(1);
1323 
1324       unsigned RegLo = VA.getLocReg();
1325       RegsToPass.push_back(std::make_pair(RegLo, Lo));
1326 
1327       if (RegLo == RISCV::X17) {
1328         // Second half of f64 is passed on the stack.
1329         // Work out the address of the stack slot.
1330         if (!StackPtr.getNode())
1331           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1332         // Emit the store.
1333         MemOpChains.push_back(
1334             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
1335       } else {
1336         // Second half of f64 is passed in another GPR.
1337         unsigned RegHigh = RegLo + 1;
1338         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
1339       }
1340       continue;
1341     }
1342 
1343     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
1344     // as any other MemLoc.
1345 
1346     // Promote the value if needed.
1347     // For now, only handle fully promoted and indirect arguments.
1348     if (VA.getLocInfo() == CCValAssign::Indirect) {
1349       // Store the argument in a stack slot and pass its address.
1350       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
1351       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1352       MemOpChains.push_back(
1353           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1354                        MachinePointerInfo::getFixedStack(MF, FI)));
1355       // If the original argument was split (e.g. i128), we need
1356       // to store all parts of it here (and pass just one address).
1357       unsigned ArgIndex = Outs[i].OrigArgIndex;
1358       assert(Outs[i].PartOffset == 0);
1359       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
1360         SDValue PartValue = OutVals[i + 1];
1361         unsigned PartOffset = Outs[i + 1].PartOffset;
1362         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1363                                       DAG.getIntPtrConstant(PartOffset, DL));
1364         MemOpChains.push_back(
1365             DAG.getStore(Chain, DL, PartValue, Address,
1366                          MachinePointerInfo::getFixedStack(MF, FI)));
1367         ++i;
1368       }
1369       ArgValue = SpillSlot;
1370     } else {
1371       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
1372     }
1373 
1374     // Use local copy if it is a byval arg.
1375     if (Flags.isByVal())
1376       ArgValue = ByValArgs[j++];
1377 
1378     if (VA.isRegLoc()) {
1379       // Queue up the argument copies and emit them at the end.
1380       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1381     } else {
1382       assert(VA.isMemLoc() && "Argument not register or memory");
1383       assert(!IsTailCall && "Tail call not allowed if stack is used "
1384                             "for passing parameters");
1385 
1386       // Work out the address of the stack slot.
1387       if (!StackPtr.getNode())
1388         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1389       SDValue Address =
1390           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1391                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
1392 
1393       // Emit the store.
1394       MemOpChains.push_back(
1395           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1396     }
1397   }
1398 
1399   // Join the stores, which are independent of one another.
1400   if (!MemOpChains.empty())
1401     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1402 
1403   SDValue Glue;
1404 
1405   // Build a sequence of copy-to-reg nodes, chained and glued together.
1406   for (auto &Reg : RegsToPass) {
1407     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
1408     Glue = Chain.getValue(1);
1409   }
1410 
1411   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
1412   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
1413   // split it and then direct call can be matched by PseudoCALL.
1414   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
1415     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0);
1416   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1417     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0);
1418   }
1419 
1420   // The first call operand is the chain and the second is the target address.
1421   SmallVector<SDValue, 8> Ops;
1422   Ops.push_back(Chain);
1423   Ops.push_back(Callee);
1424 
1425   // Add argument registers to the end of the list so that they are
1426   // known live into the call.
1427   for (auto &Reg : RegsToPass)
1428     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1429 
1430   if (!IsTailCall) {
1431     // Add a register mask operand representing the call-preserved registers.
1432     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1433     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1434     assert(Mask && "Missing call preserved mask for calling convention");
1435     Ops.push_back(DAG.getRegisterMask(Mask));
1436   }
1437 
1438   // Glue the call to the argument copies, if any.
1439   if (Glue.getNode())
1440     Ops.push_back(Glue);
1441 
1442   // Emit the call.
1443   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1444 
1445   if (IsTailCall) {
1446     MF.getFrameInfo().setHasTailCall();
1447     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
1448   }
1449 
1450   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
1451   Glue = Chain.getValue(1);
1452 
1453   // Mark the end of the call, which is glued to the call itself.
1454   Chain = DAG.getCALLSEQ_END(Chain,
1455                              DAG.getConstant(NumBytes, DL, PtrVT, true),
1456                              DAG.getConstant(0, DL, PtrVT, true),
1457                              Glue, DL);
1458   Glue = Chain.getValue(1);
1459 
1460   // Assign locations to each value returned by this call.
1461   SmallVector<CCValAssign, 16> RVLocs;
1462   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
1463   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
1464 
1465   // Copy all of the result registers out of their specified physreg.
1466   for (auto &VA : RVLocs) {
1467     // Copy the value out
1468     SDValue RetValue =
1469         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
1470     // Glue the RetValue to the end of the call sequence
1471     Chain = RetValue.getValue(1);
1472     Glue = RetValue.getValue(2);
1473 
1474     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1475       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
1476       SDValue RetValue2 =
1477           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
1478       Chain = RetValue2.getValue(1);
1479       Glue = RetValue2.getValue(2);
1480       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
1481                              RetValue2);
1482     }
1483 
1484     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
1485 
1486     InVals.push_back(RetValue);
1487   }
1488 
1489   return Chain;
1490 }
1491 
1492 bool RISCVTargetLowering::CanLowerReturn(
1493     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
1494     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
1495   SmallVector<CCValAssign, 16> RVLocs;
1496   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
1497   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1498     MVT VT = Outs[i].VT;
1499     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1500     if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags,
1501                  CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
1502       return false;
1503   }
1504   return true;
1505 }
1506 
1507 SDValue
1508 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1509                                  bool IsVarArg,
1510                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
1511                                  const SmallVectorImpl<SDValue> &OutVals,
1512                                  const SDLoc &DL, SelectionDAG &DAG) const {
1513   // Stores the assignment of the return value to a location.
1514   SmallVector<CCValAssign, 16> RVLocs;
1515 
1516   // Info about the registers and stack slot.
1517   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
1518                  *DAG.getContext());
1519 
1520   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
1521                     nullptr);
1522 
1523   SDValue Glue;
1524   SmallVector<SDValue, 4> RetOps(1, Chain);
1525 
1526   // Copy the result values into the output registers.
1527   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
1528     SDValue Val = OutVals[i];
1529     CCValAssign &VA = RVLocs[i];
1530     assert(VA.isRegLoc() && "Can only return in registers!");
1531 
1532     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1533       // Handle returning f64 on RV32D with a soft float ABI.
1534       assert(VA.isRegLoc() && "Expected return via registers");
1535       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
1536                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
1537       SDValue Lo = SplitF64.getValue(0);
1538       SDValue Hi = SplitF64.getValue(1);
1539       unsigned RegLo = VA.getLocReg();
1540       unsigned RegHi = RegLo + 1;
1541       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
1542       Glue = Chain.getValue(1);
1543       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
1544       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
1545       Glue = Chain.getValue(1);
1546       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
1547     } else {
1548       // Handle a 'normal' return.
1549       Val = convertValVTToLocVT(DAG, Val, VA, DL);
1550       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
1551 
1552       // Guarantee that all emitted copies are stuck together.
1553       Glue = Chain.getValue(1);
1554       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
1555     }
1556   }
1557 
1558   RetOps[0] = Chain; // Update chain.
1559 
1560   // Add the glue node if we have it.
1561   if (Glue.getNode()) {
1562     RetOps.push_back(Glue);
1563   }
1564 
1565   // Interrupt service routines use different return instructions.
1566   const Function &Func = DAG.getMachineFunction().getFunction();
1567   if (Func.hasFnAttribute("interrupt")) {
1568     if (!Func.getReturnType()->isVoidTy())
1569       report_fatal_error(
1570           "Functions with the interrupt attribute must have void return type!");
1571 
1572     MachineFunction &MF = DAG.getMachineFunction();
1573     StringRef Kind =
1574       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1575 
1576     unsigned RetOpc;
1577     if (Kind == "user")
1578       RetOpc = RISCVISD::URET_FLAG;
1579     else if (Kind == "supervisor")
1580       RetOpc = RISCVISD::SRET_FLAG;
1581     else
1582       RetOpc = RISCVISD::MRET_FLAG;
1583 
1584     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
1585   }
1586 
1587   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
1588 }
1589 
1590 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
1591   switch ((RISCVISD::NodeType)Opcode) {
1592   case RISCVISD::FIRST_NUMBER:
1593     break;
1594   case RISCVISD::RET_FLAG:
1595     return "RISCVISD::RET_FLAG";
1596   case RISCVISD::URET_FLAG:
1597     return "RISCVISD::URET_FLAG";
1598   case RISCVISD::SRET_FLAG:
1599     return "RISCVISD::SRET_FLAG";
1600   case RISCVISD::MRET_FLAG:
1601     return "RISCVISD::MRET_FLAG";
1602   case RISCVISD::CALL:
1603     return "RISCVISD::CALL";
1604   case RISCVISD::SELECT_CC:
1605     return "RISCVISD::SELECT_CC";
1606   case RISCVISD::BuildPairF64:
1607     return "RISCVISD::BuildPairF64";
1608   case RISCVISD::SplitF64:
1609     return "RISCVISD::SplitF64";
1610   case RISCVISD::TAIL:
1611     return "RISCVISD::TAIL";
1612   }
1613   return nullptr;
1614 }
1615 
1616 std::pair<unsigned, const TargetRegisterClass *>
1617 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1618                                                   StringRef Constraint,
1619                                                   MVT VT) const {
1620   // First, see if this is a constraint that directly corresponds to a
1621   // RISCV register class.
1622   if (Constraint.size() == 1) {
1623     switch (Constraint[0]) {
1624     case 'r':
1625       return std::make_pair(0U, &RISCV::GPRRegClass);
1626     default:
1627       break;
1628     }
1629   }
1630 
1631   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1632 }
1633 
1634 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
1635                                                    Instruction *Inst,
1636                                                    AtomicOrdering Ord) const {
1637   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
1638     return Builder.CreateFence(Ord);
1639   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
1640     return Builder.CreateFence(AtomicOrdering::Release);
1641   return nullptr;
1642 }
1643 
1644 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
1645                                                     Instruction *Inst,
1646                                                     AtomicOrdering Ord) const {
1647   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
1648     return Builder.CreateFence(AtomicOrdering::Acquire);
1649   return nullptr;
1650 }
1651 
1652 TargetLowering::AtomicExpansionKind
1653 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1654   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
1655   if (Size == 8 || Size == 16)
1656     return AtomicExpansionKind::MaskedIntrinsic;
1657   return AtomicExpansionKind::None;
1658 }
1659 
1660 static Intrinsic::ID
1661 getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
1662   switch (BinOp) {
1663   default:
1664     llvm_unreachable("Unexpected AtomicRMW BinOp");
1665   case AtomicRMWInst::Xchg:
1666     return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
1667   case AtomicRMWInst::Add:
1668     return Intrinsic::riscv_masked_atomicrmw_add_i32;
1669   case AtomicRMWInst::Sub:
1670     return Intrinsic::riscv_masked_atomicrmw_sub_i32;
1671   case AtomicRMWInst::Nand:
1672     return Intrinsic::riscv_masked_atomicrmw_nand_i32;
1673   case AtomicRMWInst::Max:
1674     return Intrinsic::riscv_masked_atomicrmw_max_i32;
1675   case AtomicRMWInst::Min:
1676     return Intrinsic::riscv_masked_atomicrmw_min_i32;
1677   case AtomicRMWInst::UMax:
1678     return Intrinsic::riscv_masked_atomicrmw_umax_i32;
1679   case AtomicRMWInst::UMin:
1680     return Intrinsic::riscv_masked_atomicrmw_umin_i32;
1681   }
1682 }
1683 
1684 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
1685     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
1686     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
1687   Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
1688   Type *Tys[] = {AlignedAddr->getType()};
1689   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
1690       AI->getModule(),
1691       getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);
1692 
1693   // Must pass the shift amount needed to sign extend the loaded value prior
1694   // to performing a signed comparison for min/max. ShiftAmt is the number of
1695   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
1696   // is the number of bits to left+right shift the value in order to
1697   // sign-extend.
1698   if (AI->getOperation() == AtomicRMWInst::Min ||
1699       AI->getOperation() == AtomicRMWInst::Max) {
1700     const DataLayout &DL = AI->getModule()->getDataLayout();
1701     unsigned ValWidth =
1702         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
1703     Value *SextShamt = Builder.CreateSub(
1704         Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
1705     return Builder.CreateCall(LrwOpScwLoop,
1706                               {AlignedAddr, Incr, Mask, SextShamt, Ordering});
1707   }
1708 
1709   return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
1710 }
1711