1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "llvm/ADT/SmallSet.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAGISel.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "riscv-lower"
39 
40 STATISTIC(NumTailCalls, "Number of tail calls");
41 
42 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
43                                          const RISCVSubtarget &STI)
44     : TargetLowering(TM), Subtarget(STI) {
45 
46   if (Subtarget.isRV32E())
47     report_fatal_error("Codegen not yet implemented for RV32E");
48 
49   RISCVABI::ABI ABI = Subtarget.getTargetABI();
50   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
51 
52   if (ABI != RISCVABI::ABI_ILP32 && ABI != RISCVABI::ABI_LP64)
53     report_fatal_error("Don't know how to lower this ABI");
54 
55   MVT XLenVT = Subtarget.getXLenVT();
56 
57   // Set up the register classes.
58   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
59 
60   if (Subtarget.hasStdExtF())
61     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
62   if (Subtarget.hasStdExtD())
63     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
64 
65   // Compute derived properties from the register classes.
66   computeRegisterProperties(STI.getRegisterInfo());
67 
68   setStackPointerRegisterToSaveRestore(RISCV::X2);
69 
70   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
71     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
72 
73   // TODO: add all necessary setOperationAction calls.
74   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
75 
76   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
77   setOperationAction(ISD::BR_CC, XLenVT, Expand);
78   setOperationAction(ISD::SELECT, XLenVT, Custom);
79   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
80 
81   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
82   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
83 
84   setOperationAction(ISD::VASTART, MVT::Other, Custom);
85   setOperationAction(ISD::VAARG, MVT::Other, Expand);
86   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
87   setOperationAction(ISD::VAEND, MVT::Other, Expand);
88 
89   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
90     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
91 
92   if (Subtarget.is64Bit()) {
93     setOperationAction(ISD::SHL, MVT::i32, Custom);
94     setOperationAction(ISD::SRA, MVT::i32, Custom);
95     setOperationAction(ISD::SRL, MVT::i32, Custom);
96   }
97 
98   if (!Subtarget.hasStdExtM()) {
99     setOperationAction(ISD::MUL, XLenVT, Expand);
100     setOperationAction(ISD::MULHS, XLenVT, Expand);
101     setOperationAction(ISD::MULHU, XLenVT, Expand);
102     setOperationAction(ISD::SDIV, XLenVT, Expand);
103     setOperationAction(ISD::UDIV, XLenVT, Expand);
104     setOperationAction(ISD::SREM, XLenVT, Expand);
105     setOperationAction(ISD::UREM, XLenVT, Expand);
106   }
107 
108   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
109     setOperationAction(ISD::SDIV, MVT::i32, Custom);
110     setOperationAction(ISD::UDIV, MVT::i32, Custom);
111     setOperationAction(ISD::UREM, MVT::i32, Custom);
112   }
113 
114   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
115   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
116   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
117   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
118 
119   setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
120   setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
121   setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
122 
123   setOperationAction(ISD::ROTL, XLenVT, Expand);
124   setOperationAction(ISD::ROTR, XLenVT, Expand);
125   setOperationAction(ISD::BSWAP, XLenVT, Expand);
126   setOperationAction(ISD::CTTZ, XLenVT, Expand);
127   setOperationAction(ISD::CTLZ, XLenVT, Expand);
128   setOperationAction(ISD::CTPOP, XLenVT, Expand);
129 
130   ISD::CondCode FPCCToExtend[] = {
131       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO,   ISD::SETUEQ,
132       ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
133       ISD::SETGT,  ISD::SETGE,  ISD::SETNE};
134 
135   ISD::NodeType FPOpToExtend[] = {
136       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
137 
138   if (Subtarget.hasStdExtF()) {
139     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
140     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
141     for (auto CC : FPCCToExtend)
142       setCondCodeAction(CC, MVT::f32, Expand);
143     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
144     setOperationAction(ISD::SELECT, MVT::f32, Custom);
145     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
146     for (auto Op : FPOpToExtend)
147       setOperationAction(Op, MVT::f32, Expand);
148   }
149 
150   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
151     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
152 
153   if (Subtarget.hasStdExtD()) {
154     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
155     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
156     for (auto CC : FPCCToExtend)
157       setCondCodeAction(CC, MVT::f64, Expand);
158     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
159     setOperationAction(ISD::SELECT, MVT::f64, Custom);
160     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
161     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
162     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
163     for (auto Op : FPOpToExtend)
164       setOperationAction(Op, MVT::f64, Expand);
165   }
166 
167   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
168   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
169   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
170 
171   if (Subtarget.hasStdExtA()) {
172     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
173     setMinCmpXchgSizeInBits(32);
174   } else {
175     setMaxAtomicSizeInBitsSupported(0);
176   }
177 
178   setBooleanContents(ZeroOrOneBooleanContent);
179 
180   // Function alignments (log2).
181   unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2;
182   setMinFunctionAlignment(FunctionAlignment);
183   setPrefFunctionAlignment(FunctionAlignment);
184 
185   // Effectively disable jump table generation.
186   setMinimumJumpTableEntries(INT_MAX);
187 }
188 
189 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
190                                             EVT VT) const {
191   if (!VT.isVector())
192     return getPointerTy(DL);
193   return VT.changeVectorElementTypeToInteger();
194 }
195 
196 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
197                                              const CallInst &I,
198                                              MachineFunction &MF,
199                                              unsigned Intrinsic) const {
200   switch (Intrinsic) {
201   default:
202     return false;
203   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
204   case Intrinsic::riscv_masked_atomicrmw_add_i32:
205   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
206   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
207   case Intrinsic::riscv_masked_atomicrmw_max_i32:
208   case Intrinsic::riscv_masked_atomicrmw_min_i32:
209   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
210   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
211   case Intrinsic::riscv_masked_cmpxchg_i32:
212     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
213     Info.opc = ISD::INTRINSIC_W_CHAIN;
214     Info.memVT = MVT::getVT(PtrTy->getElementType());
215     Info.ptrVal = I.getArgOperand(0);
216     Info.offset = 0;
217     Info.align = 4;
218     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
219                  MachineMemOperand::MOVolatile;
220     return true;
221   }
222 }
223 
224 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
225                                                 const AddrMode &AM, Type *Ty,
226                                                 unsigned AS,
227                                                 Instruction *I) const {
228   // No global is ever allowed as a base.
229   if (AM.BaseGV)
230     return false;
231 
232   // Require a 12-bit signed offset.
233   if (!isInt<12>(AM.BaseOffs))
234     return false;
235 
236   switch (AM.Scale) {
237   case 0: // "r+i" or just "i", depending on HasBaseReg.
238     break;
239   case 1:
240     if (!AM.HasBaseReg) // allow "r+i".
241       break;
242     return false; // disallow "r+r" or "r+r+i".
243   default:
244     return false;
245   }
246 
247   return true;
248 }
249 
250 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
251   return isInt<12>(Imm);
252 }
253 
254 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
255   return isInt<12>(Imm);
256 }
257 
258 // On RV32, 64-bit integers are split into their high and low parts and held
259 // in two different registers, so the trunc is free since the low register can
260 // just be used.
261 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
262   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
263     return false;
264   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
265   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
266   return (SrcBits == 64 && DestBits == 32);
267 }
268 
269 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
270   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
271       !SrcVT.isInteger() || !DstVT.isInteger())
272     return false;
273   unsigned SrcBits = SrcVT.getSizeInBits();
274   unsigned DestBits = DstVT.getSizeInBits();
275   return (SrcBits == 64 && DestBits == 32);
276 }
277 
278 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
279   // Zexts are free if they can be combined with a load.
280   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
281     EVT MemVT = LD->getMemoryVT();
282     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
283          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
284         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
285          LD->getExtensionType() == ISD::ZEXTLOAD))
286       return true;
287   }
288 
289   return TargetLowering::isZExtFree(Val, VT2);
290 }
291 
292 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
293   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
294 }
295 
296 // Changes the condition code and swaps operands if necessary, so the SetCC
297 // operation matches one of the comparisons supported directly in the RISC-V
298 // ISA.
299 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
300   switch (CC) {
301   default:
302     break;
303   case ISD::SETGT:
304   case ISD::SETLE:
305   case ISD::SETUGT:
306   case ISD::SETULE:
307     CC = ISD::getSetCCSwappedOperands(CC);
308     std::swap(LHS, RHS);
309     break;
310   }
311 }
312 
313 // Return the RISC-V branch opcode that matches the given DAG integer
314 // condition code. The CondCode must be one of those supported by the RISC-V
315 // ISA (see normaliseSetCC).
316 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
317   switch (CC) {
318   default:
319     llvm_unreachable("Unsupported CondCode");
320   case ISD::SETEQ:
321     return RISCV::BEQ;
322   case ISD::SETNE:
323     return RISCV::BNE;
324   case ISD::SETLT:
325     return RISCV::BLT;
326   case ISD::SETGE:
327     return RISCV::BGE;
328   case ISD::SETULT:
329     return RISCV::BLTU;
330   case ISD::SETUGE:
331     return RISCV::BGEU;
332   }
333 }
334 
335 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
336                                             SelectionDAG &DAG) const {
337   switch (Op.getOpcode()) {
338   default:
339     report_fatal_error("unimplemented operand");
340   case ISD::GlobalAddress:
341     return lowerGlobalAddress(Op, DAG);
342   case ISD::BlockAddress:
343     return lowerBlockAddress(Op, DAG);
344   case ISD::ConstantPool:
345     return lowerConstantPool(Op, DAG);
346   case ISD::SELECT:
347     return lowerSELECT(Op, DAG);
348   case ISD::VASTART:
349     return lowerVASTART(Op, DAG);
350   case ISD::FRAMEADDR:
351     return lowerFRAMEADDR(Op, DAG);
352   case ISD::RETURNADDR:
353     return lowerRETURNADDR(Op, DAG);
354   case ISD::BITCAST: {
355     assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
356            "Unexpected custom legalisation");
357     SDLoc DL(Op);
358     SDValue Op0 = Op.getOperand(0);
359     if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
360       return SDValue();
361     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
362     SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
363     return FPConv;
364   }
365   }
366 }
367 
368 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
369                                                 SelectionDAG &DAG) const {
370   SDLoc DL(Op);
371   EVT Ty = Op.getValueType();
372   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
373   const GlobalValue *GV = N->getGlobal();
374   int64_t Offset = N->getOffset();
375   MVT XLenVT = Subtarget.getXLenVT();
376 
377   if (isPositionIndependent())
378     report_fatal_error("Unable to lowerGlobalAddress");
379   // In order to maximise the opportunity for common subexpression elimination,
380   // emit a separate ADD node for the global address offset instead of folding
381   // it in the global address node. Later peephole optimisations may choose to
382   // fold it back in when profitable.
383   SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI);
384   SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);
385   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
386   SDValue MNLo =
387     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
388   if (Offset != 0)
389     return DAG.getNode(ISD::ADD, DL, Ty, MNLo,
390                        DAG.getConstant(Offset, DL, XLenVT));
391   return MNLo;
392 }
393 
394 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
395                                                SelectionDAG &DAG) const {
396   SDLoc DL(Op);
397   EVT Ty = Op.getValueType();
398   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
399   const BlockAddress *BA = N->getBlockAddress();
400   int64_t Offset = N->getOffset();
401 
402   if (isPositionIndependent())
403     report_fatal_error("Unable to lowerBlockAddress");
404 
405   SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
406   SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO);
407   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0);
408   SDValue MNLo =
409     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0);
410   return MNLo;
411 }
412 
413 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
414                                                SelectionDAG &DAG) const {
415   SDLoc DL(Op);
416   EVT Ty = Op.getValueType();
417   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
418   const Constant *CPA = N->getConstVal();
419   int64_t Offset = N->getOffset();
420   unsigned Alignment = N->getAlignment();
421 
422   if (!isPositionIndependent()) {
423     SDValue CPAHi =
424         DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI);
425     SDValue CPALo =
426         DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO);
427     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0);
428     SDValue MNLo =
429         SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0);
430     return MNLo;
431   } else {
432     report_fatal_error("Unable to lowerConstantPool");
433   }
434 }
435 
436 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
437   SDValue CondV = Op.getOperand(0);
438   SDValue TrueV = Op.getOperand(1);
439   SDValue FalseV = Op.getOperand(2);
440   SDLoc DL(Op);
441   MVT XLenVT = Subtarget.getXLenVT();
442 
443   // If the result type is XLenVT and CondV is the output of a SETCC node
444   // which also operated on XLenVT inputs, then merge the SETCC node into the
445   // lowered RISCVISD::SELECT_CC to take advantage of the integer
446   // compare+branch instructions. i.e.:
447   // (select (setcc lhs, rhs, cc), truev, falsev)
448   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
449   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
450       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
451     SDValue LHS = CondV.getOperand(0);
452     SDValue RHS = CondV.getOperand(1);
453     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
454     ISD::CondCode CCVal = CC->get();
455 
456     normaliseSetCC(LHS, RHS, CCVal);
457 
458     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
459     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
460     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
461     return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
462   }
463 
464   // Otherwise:
465   // (select condv, truev, falsev)
466   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
467   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
468   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
469 
470   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
471   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
472 
473   return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
474 }
475 
476 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
477   MachineFunction &MF = DAG.getMachineFunction();
478   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
479 
480   SDLoc DL(Op);
481   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
482                                  getPointerTy(MF.getDataLayout()));
483 
484   // vastart just stores the address of the VarArgsFrameIndex slot into the
485   // memory location argument.
486   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
487   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
488                       MachinePointerInfo(SV));
489 }
490 
491 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
492                                             SelectionDAG &DAG) const {
493   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
494   MachineFunction &MF = DAG.getMachineFunction();
495   MachineFrameInfo &MFI = MF.getFrameInfo();
496   MFI.setFrameAddressIsTaken(true);
497   unsigned FrameReg = RI.getFrameRegister(MF);
498   int XLenInBytes = Subtarget.getXLen() / 8;
499 
500   EVT VT = Op.getValueType();
501   SDLoc DL(Op);
502   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
503   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
504   while (Depth--) {
505     int Offset = -(XLenInBytes * 2);
506     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
507                               DAG.getIntPtrConstant(Offset, DL));
508     FrameAddr =
509         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
510   }
511   return FrameAddr;
512 }
513 
514 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
515                                              SelectionDAG &DAG) const {
516   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
517   MachineFunction &MF = DAG.getMachineFunction();
518   MachineFrameInfo &MFI = MF.getFrameInfo();
519   MFI.setReturnAddressIsTaken(true);
520   MVT XLenVT = Subtarget.getXLenVT();
521   int XLenInBytes = Subtarget.getXLen() / 8;
522 
523   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
524     return SDValue();
525 
526   EVT VT = Op.getValueType();
527   SDLoc DL(Op);
528   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
529   if (Depth) {
530     int Off = -XLenInBytes;
531     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
532     SDValue Offset = DAG.getConstant(Off, DL, VT);
533     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
534                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
535                        MachinePointerInfo());
536   }
537 
538   // Return the value of the return address register, marking it an implicit
539   // live-in.
540   unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
541   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
542 }
543 
544 // Returns the opcode of the target-specific SDNode that implements the 32-bit
545 // form of the given Opcode.
546 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
547   switch (Opcode) {
548   default:
549     llvm_unreachable("Unexpected opcode");
550   case ISD::SHL:
551     return RISCVISD::SLLW;
552   case ISD::SRA:
553     return RISCVISD::SRAW;
554   case ISD::SRL:
555     return RISCVISD::SRLW;
556   case ISD::SDIV:
557     return RISCVISD::DIVW;
558   case ISD::UDIV:
559     return RISCVISD::DIVUW;
560   case ISD::UREM:
561     return RISCVISD::REMUW;
562   }
563 }
564 
565 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
566 // Because i32 isn't a legal type for RV64, these operations would otherwise
567 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
568 // later one because the fact the operation was originally of type i32 is
569 // lost.
570 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
571   SDLoc DL(N);
572   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
573   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
574   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
575   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
576   // ReplaceNodeResults requires we maintain the same type for the return value.
577   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
578 }
579 
580 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
581                                              SmallVectorImpl<SDValue> &Results,
582                                              SelectionDAG &DAG) const {
583   SDLoc DL(N);
584   switch (N->getOpcode()) {
585   default:
586     llvm_unreachable("Don't know how to custom type legalize this operation!");
587   case ISD::SHL:
588   case ISD::SRA:
589   case ISD::SRL:
590     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
591            "Unexpected custom legalisation");
592     if (N->getOperand(1).getOpcode() == ISD::Constant)
593       return;
594     Results.push_back(customLegalizeToWOp(N, DAG));
595     break;
596   case ISD::SDIV:
597   case ISD::UDIV:
598   case ISD::UREM:
599     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
600            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
601     if (N->getOperand(0).getOpcode() == ISD::Constant ||
602         N->getOperand(1).getOpcode() == ISD::Constant)
603       return;
604     Results.push_back(customLegalizeToWOp(N, DAG));
605     break;
606   case ISD::BITCAST: {
607     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
608            Subtarget.hasStdExtF() && "Unexpected custom legalisation");
609     SDLoc DL(N);
610     SDValue Op0 = N->getOperand(0);
611     if (Op0.getValueType() != MVT::f32)
612       return;
613     SDValue FPConv =
614         DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
615     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
616     break;
617   }
618   }
619 }
620 
621 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
622                                                DAGCombinerInfo &DCI) const {
623   SelectionDAG &DAG = DCI.DAG;
624 
625   switch (N->getOpcode()) {
626   default:
627     break;
628   case RISCVISD::SplitF64: {
629     SDValue Op0 = N->getOperand(0);
630     // If the input to SplitF64 is just BuildPairF64 then the operation is
631     // redundant. Instead, use BuildPairF64's operands directly.
632     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
633       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
634 
635     SDLoc DL(N);
636     // This is a target-specific version of a DAGCombine performed in
637     // DAGCombiner::visitBITCAST. It performs the equivalent of:
638     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
639     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
640     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
641         !Op0.getNode()->hasOneUse())
642       break;
643     SDValue NewSplitF64 =
644         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
645                     Op0.getOperand(0));
646     SDValue Lo = NewSplitF64.getValue(0);
647     SDValue Hi = NewSplitF64.getValue(1);
648     APInt SignBit = APInt::getSignMask(32);
649     if (Op0.getOpcode() == ISD::FNEG) {
650       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
651                                   DAG.getConstant(SignBit, DL, MVT::i32));
652       return DCI.CombineTo(N, Lo, NewHi);
653     }
654     assert(Op0.getOpcode() == ISD::FABS);
655     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
656                                 DAG.getConstant(~SignBit, DL, MVT::i32));
657     return DCI.CombineTo(N, Lo, NewHi);
658   }
659   case RISCVISD::SLLW:
660   case RISCVISD::SRAW:
661   case RISCVISD::SRLW: {
662     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
663     SDValue LHS = N->getOperand(0);
664     SDValue RHS = N->getOperand(1);
665     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
666     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
667     if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
668         (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
669       return SDValue();
670     break;
671   }
672   case RISCVISD::FMV_X_ANYEXTW_RV64: {
673     SDLoc DL(N);
674     SDValue Op0 = N->getOperand(0);
675     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
676     // conversion is unnecessary and can be replaced with an ANY_EXTEND
677     // of the FMV_W_X_RV64 operand.
678     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
679       SDValue AExtOp =
680           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
681       return DCI.CombineTo(N, AExtOp);
682     }
683 
684     // This is a target-specific version of a DAGCombine performed in
685     // DAGCombiner::visitBITCAST. It performs the equivalent of:
686     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
687     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
688     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
689         !Op0.getNode()->hasOneUse())
690       break;
691     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
692                                  Op0.getOperand(0));
693     APInt SignBit = APInt::getSignMask(32).sext(64);
694     if (Op0.getOpcode() == ISD::FNEG) {
695       return DCI.CombineTo(N,
696                            DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
697                                        DAG.getConstant(SignBit, DL, MVT::i64)));
698     }
699     assert(Op0.getOpcode() == ISD::FABS);
700     return DCI.CombineTo(N,
701                          DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
702                                      DAG.getConstant(~SignBit, DL, MVT::i64)));
703   }
704   }
705 
706   return SDValue();
707 }
708 
709 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
710     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
711     unsigned Depth) const {
712   switch (Op.getOpcode()) {
713   default:
714     break;
715   case RISCVISD::SLLW:
716   case RISCVISD::SRAW:
717   case RISCVISD::SRLW:
718   case RISCVISD::DIVW:
719   case RISCVISD::DIVUW:
720   case RISCVISD::REMUW:
721     // TODO: As the result is sign-extended, this is conservatively correct. A
722     // more precise answer could be calculated for SRAW depending on known
723     // bits in the shift amount.
724     return 33;
725   }
726 
727   return 1;
728 }
729 
730 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
731                                              MachineBasicBlock *BB) {
732   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
733 
734   MachineFunction &MF = *BB->getParent();
735   DebugLoc DL = MI.getDebugLoc();
736   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
737   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
738   unsigned LoReg = MI.getOperand(0).getReg();
739   unsigned HiReg = MI.getOperand(1).getReg();
740   unsigned SrcReg = MI.getOperand(2).getReg();
741   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
742   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
743 
744   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
745                           RI);
746   MachineMemOperand *MMO =
747       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
748                               MachineMemOperand::MOLoad, 8, 8);
749   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
750       .addFrameIndex(FI)
751       .addImm(0)
752       .addMemOperand(MMO);
753   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
754       .addFrameIndex(FI)
755       .addImm(4)
756       .addMemOperand(MMO);
757   MI.eraseFromParent(); // The pseudo instruction is gone now.
758   return BB;
759 }
760 
761 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
762                                                  MachineBasicBlock *BB) {
763   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
764          "Unexpected instruction");
765 
766   MachineFunction &MF = *BB->getParent();
767   DebugLoc DL = MI.getDebugLoc();
768   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
769   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
770   unsigned DstReg = MI.getOperand(0).getReg();
771   unsigned LoReg = MI.getOperand(1).getReg();
772   unsigned HiReg = MI.getOperand(2).getReg();
773   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
774   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
775 
776   MachineMemOperand *MMO =
777       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
778                               MachineMemOperand::MOStore, 8, 8);
779   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
780       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
781       .addFrameIndex(FI)
782       .addImm(0)
783       .addMemOperand(MMO);
784   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
785       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
786       .addFrameIndex(FI)
787       .addImm(4)
788       .addMemOperand(MMO);
789   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
790   MI.eraseFromParent(); // The pseudo instruction is gone now.
791   return BB;
792 }
793 
794 static bool isSelectPseudo(MachineInstr &MI) {
795   switch (MI.getOpcode()) {
796   default:
797     return false;
798   case RISCV::Select_GPR_Using_CC_GPR:
799   case RISCV::Select_FPR32_Using_CC_GPR:
800   case RISCV::Select_FPR64_Using_CC_GPR:
801     return true;
802   }
803 }
804 
805 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
806                                            MachineBasicBlock *BB) {
807   // To "insert" Select_* instructions, we actually have to insert the triangle
808   // control-flow pattern.  The incoming instructions know the destination vreg
809   // to set, the condition code register to branch on, the true/false values to
810   // select between, and the condcode to use to select the appropriate branch.
811   //
812   // We produce the following control flow:
813   //     HeadMBB
814   //     |  \
815   //     |  IfFalseMBB
816   //     | /
817   //    TailMBB
818   //
819   // When we find a sequence of selects we attempt to optimize their emission
820   // by sharing the control flow. Currently we only handle cases where we have
821   // multiple selects with the exact same condition (same LHS, RHS and CC).
822   // The selects may be interleaved with other instructions if the other
823   // instructions meet some requirements we deem safe:
824   // - They are debug instructions. Otherwise,
825   // - They do not have side-effects, do not access memory and their inputs do
826   //   not depend on the results of the select pseudo-instructions.
827   // The TrueV/FalseV operands of the selects cannot depend on the result of
828   // previous selects in the sequence.
829   // These conditions could be further relaxed. See the X86 target for a
830   // related approach and more information.
831   unsigned LHS = MI.getOperand(1).getReg();
832   unsigned RHS = MI.getOperand(2).getReg();
833   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
834 
835   SmallVector<MachineInstr *, 4> SelectDebugValues;
836   SmallSet<unsigned, 4> SelectDests;
837   SelectDests.insert(MI.getOperand(0).getReg());
838 
839   MachineInstr *LastSelectPseudo = &MI;
840 
841   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
842        SequenceMBBI != E; ++SequenceMBBI) {
843     if (SequenceMBBI->isDebugInstr())
844       continue;
845     else if (isSelectPseudo(*SequenceMBBI)) {
846       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
847           SequenceMBBI->getOperand(2).getReg() != RHS ||
848           SequenceMBBI->getOperand(3).getImm() != CC ||
849           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
850           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
851         break;
852       LastSelectPseudo = &*SequenceMBBI;
853       SequenceMBBI->collectDebugValues(SelectDebugValues);
854       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
855     } else {
856       if (SequenceMBBI->hasUnmodeledSideEffects() ||
857           SequenceMBBI->mayLoadOrStore())
858         break;
859       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
860             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
861           }))
862         break;
863     }
864   }
865 
866   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
867   const BasicBlock *LLVM_BB = BB->getBasicBlock();
868   DebugLoc DL = MI.getDebugLoc();
869   MachineFunction::iterator I = ++BB->getIterator();
870 
871   MachineBasicBlock *HeadMBB = BB;
872   MachineFunction *F = BB->getParent();
873   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
874   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
875 
876   F->insert(I, IfFalseMBB);
877   F->insert(I, TailMBB);
878 
879   // Transfer debug instructions associated with the selects to TailMBB.
880   for (MachineInstr *DebugInstr : SelectDebugValues) {
881     TailMBB->push_back(DebugInstr->removeFromParent());
882   }
883 
884   // Move all instructions after the sequence to TailMBB.
885   TailMBB->splice(TailMBB->end(), HeadMBB,
886                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
887   // Update machine-CFG edges by transferring all successors of the current
888   // block to the new block which will contain the Phi nodes for the selects.
889   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
890   // Set the successors for HeadMBB.
891   HeadMBB->addSuccessor(IfFalseMBB);
892   HeadMBB->addSuccessor(TailMBB);
893 
894   // Insert appropriate branch.
895   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
896 
897   BuildMI(HeadMBB, DL, TII.get(Opcode))
898     .addReg(LHS)
899     .addReg(RHS)
900     .addMBB(TailMBB);
901 
902   // IfFalseMBB just falls through to TailMBB.
903   IfFalseMBB->addSuccessor(TailMBB);
904 
905   // Create PHIs for all of the select pseudo-instructions.
906   auto SelectMBBI = MI.getIterator();
907   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
908   auto InsertionPoint = TailMBB->begin();
909   while (SelectMBBI != SelectEnd) {
910     auto Next = std::next(SelectMBBI);
911     if (isSelectPseudo(*SelectMBBI)) {
912       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
913       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
914               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
915           .addReg(SelectMBBI->getOperand(4).getReg())
916           .addMBB(HeadMBB)
917           .addReg(SelectMBBI->getOperand(5).getReg())
918           .addMBB(IfFalseMBB);
919       SelectMBBI->eraseFromParent();
920     }
921     SelectMBBI = Next;
922   }
923 
924   return TailMBB;
925 }
926 
927 MachineBasicBlock *
928 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
929                                                  MachineBasicBlock *BB) const {
930   switch (MI.getOpcode()) {
931   default:
932     llvm_unreachable("Unexpected instr type to insert");
933   case RISCV::Select_GPR_Using_CC_GPR:
934   case RISCV::Select_FPR32_Using_CC_GPR:
935   case RISCV::Select_FPR64_Using_CC_GPR:
936     return emitSelectPseudo(MI, BB);
937   case RISCV::BuildPairF64Pseudo:
938     return emitBuildPairF64Pseudo(MI, BB);
939   case RISCV::SplitF64Pseudo:
940     return emitSplitF64Pseudo(MI, BB);
941   }
942 }
943 
944 // Calling Convention Implementation.
945 // The expectations for frontend ABI lowering vary from target to target.
946 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
947 // details, but this is a longer term goal. For now, we simply try to keep the
948 // role of the frontend as simple and well-defined as possible. The rules can
949 // be summarised as:
950 // * Never split up large scalar arguments. We handle them here.
951 // * If a hardfloat calling convention is being used, and the struct may be
952 // passed in a pair of registers (fp+fp, int+fp), and both registers are
953 // available, then pass as two separate arguments. If either the GPRs or FPRs
954 // are exhausted, then pass according to the rule below.
955 // * If a struct could never be passed in registers or directly in a stack
956 // slot (as it is larger than 2*XLEN and the floating point rules don't
957 // apply), then pass it using a pointer with the byval attribute.
958 // * If a struct is less than 2*XLEN, then coerce to either a two-element
959 // word-sized array or a 2*XLEN scalar (depending on alignment).
960 // * The frontend can determine whether a struct is returned by reference or
961 // not based on its size and fields. If it will be returned by reference, the
962 // frontend must modify the prototype so a pointer with the sret annotation is
963 // passed as the first argument. This is not necessary for large scalar
964 // returns.
965 // * Struct return values and varargs should be coerced to structs containing
966 // register-size fields in the same situations they would be for fixed
967 // arguments.
968 
969 static const MCPhysReg ArgGPRs[] = {
970   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
971   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
972 };
973 
974 // Pass a 2*XLEN argument that has been split into two XLEN values through
975 // registers or the stack as necessary.
976 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
977                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
978                                 MVT ValVT2, MVT LocVT2,
979                                 ISD::ArgFlagsTy ArgFlags2) {
980   unsigned XLenInBytes = XLen / 8;
981   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
982     // At least one half can be passed via register.
983     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
984                                      VA1.getLocVT(), CCValAssign::Full));
985   } else {
986     // Both halves must be passed on the stack, with proper alignment.
987     unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
988     State.addLoc(
989         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
990                             State.AllocateStack(XLenInBytes, StackAlign),
991                             VA1.getLocVT(), CCValAssign::Full));
992     State.addLoc(CCValAssign::getMem(
993         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
994         CCValAssign::Full));
995     return false;
996   }
997 
998   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
999     // The second half can also be passed via register.
1000     State.addLoc(
1001         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
1002   } else {
1003     // The second half is passed via the stack, without additional alignment.
1004     State.addLoc(CCValAssign::getMem(
1005         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
1006         CCValAssign::Full));
1007   }
1008 
1009   return false;
1010 }
1011 
1012 // Implements the RISC-V calling convention. Returns true upon failure.
1013 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
1014                      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
1015                      CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {
1016   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
1017   assert(XLen == 32 || XLen == 64);
1018   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
1019 
1020   // Any return value split in to more than two values can't be returned
1021   // directly.
1022   if (IsRet && ValNo > 1)
1023     return true;
1024 
1025   if (ValVT == MVT::f32) {
1026     LocVT = XLenVT;
1027     LocInfo = CCValAssign::BCvt;
1028   } else if (XLen == 64 && ValVT == MVT::f64) {
1029     LocVT = MVT::i64;
1030     LocInfo = CCValAssign::BCvt;
1031   }
1032 
1033   // If this is a variadic argument, the RISC-V calling convention requires
1034   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
1035   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
1036   // be used regardless of whether the original argument was split during
1037   // legalisation or not. The argument will not be passed by registers if the
1038   // original type is larger than 2*XLEN, so the register alignment rule does
1039   // not apply.
1040   unsigned TwoXLenInBytes = (2 * XLen) / 8;
1041   if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
1042       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
1043     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
1044     // Skip 'odd' register if necessary.
1045     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
1046       State.AllocateReg(ArgGPRs);
1047   }
1048 
1049   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
1050   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
1051       State.getPendingArgFlags();
1052 
1053   assert(PendingLocs.size() == PendingArgFlags.size() &&
1054          "PendingLocs and PendingArgFlags out of sync");
1055 
1056   // Handle passing f64 on RV32D with a soft float ABI.
1057   if (XLen == 32 && ValVT == MVT::f64) {
1058     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
1059            "Can't lower f64 if it is split");
1060     // Depending on available argument GPRS, f64 may be passed in a pair of
1061     // GPRs, split between a GPR and the stack, or passed completely on the
1062     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
1063     // cases.
1064     unsigned Reg = State.AllocateReg(ArgGPRs);
1065     LocVT = MVT::i32;
1066     if (!Reg) {
1067       unsigned StackOffset = State.AllocateStack(8, 8);
1068       State.addLoc(
1069           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1070       return false;
1071     }
1072     if (!State.AllocateReg(ArgGPRs))
1073       State.AllocateStack(4, 4);
1074     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1075     return false;
1076   }
1077 
1078   // Split arguments might be passed indirectly, so keep track of the pending
1079   // values.
1080   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
1081     LocVT = XLenVT;
1082     LocInfo = CCValAssign::Indirect;
1083     PendingLocs.push_back(
1084         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
1085     PendingArgFlags.push_back(ArgFlags);
1086     if (!ArgFlags.isSplitEnd()) {
1087       return false;
1088     }
1089   }
1090 
1091   // If the split argument only had two elements, it should be passed directly
1092   // in registers or on the stack.
1093   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
1094     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
1095     // Apply the normal calling convention rules to the first half of the
1096     // split argument.
1097     CCValAssign VA = PendingLocs[0];
1098     ISD::ArgFlagsTy AF = PendingArgFlags[0];
1099     PendingLocs.clear();
1100     PendingArgFlags.clear();
1101     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
1102                                ArgFlags);
1103   }
1104 
1105   // Allocate to a register if possible, or else a stack slot.
1106   unsigned Reg = State.AllocateReg(ArgGPRs);
1107   unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
1108 
1109   // If we reach this point and PendingLocs is non-empty, we must be at the
1110   // end of a split argument that must be passed indirectly.
1111   if (!PendingLocs.empty()) {
1112     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
1113     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
1114 
1115     for (auto &It : PendingLocs) {
1116       if (Reg)
1117         It.convertToReg(Reg);
1118       else
1119         It.convertToMem(StackOffset);
1120       State.addLoc(It);
1121     }
1122     PendingLocs.clear();
1123     PendingArgFlags.clear();
1124     return false;
1125   }
1126 
1127   assert(LocVT == XLenVT && "Expected an XLenVT at this stage");
1128 
1129   if (Reg) {
1130     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1131     return false;
1132   }
1133 
1134   // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
1135   if (ValVT == MVT::f32 || ValVT == MVT::f64) {
1136     LocVT = ValVT;
1137     LocInfo = CCValAssign::Full;
1138   }
1139   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1140   return false;
1141 }
1142 
1143 void RISCVTargetLowering::analyzeInputArgs(
1144     MachineFunction &MF, CCState &CCInfo,
1145     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
1146   unsigned NumArgs = Ins.size();
1147   FunctionType *FType = MF.getFunction().getFunctionType();
1148 
1149   for (unsigned i = 0; i != NumArgs; ++i) {
1150     MVT ArgVT = Ins[i].VT;
1151     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
1152 
1153     Type *ArgTy = nullptr;
1154     if (IsRet)
1155       ArgTy = FType->getReturnType();
1156     else if (Ins[i].isOrigArg())
1157       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
1158 
1159     if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
1160                  ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
1161       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
1162                         << EVT(ArgVT).getEVTString() << '\n');
1163       llvm_unreachable(nullptr);
1164     }
1165   }
1166 }
1167 
1168 void RISCVTargetLowering::analyzeOutputArgs(
1169     MachineFunction &MF, CCState &CCInfo,
1170     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
1171     CallLoweringInfo *CLI) const {
1172   unsigned NumArgs = Outs.size();
1173 
1174   for (unsigned i = 0; i != NumArgs; i++) {
1175     MVT ArgVT = Outs[i].VT;
1176     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1177     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
1178 
1179     if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
1180                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
1181       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
1182                         << EVT(ArgVT).getEVTString() << "\n");
1183       llvm_unreachable(nullptr);
1184     }
1185   }
1186 }
1187 
1188 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
1189 // values.
1190 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
1191                                    const CCValAssign &VA, const SDLoc &DL) {
1192   switch (VA.getLocInfo()) {
1193   default:
1194     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1195   case CCValAssign::Full:
1196     break;
1197   case CCValAssign::BCvt:
1198     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1199       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
1200       break;
1201     }
1202     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
1203     break;
1204   }
1205   return Val;
1206 }
1207 
1208 // The caller is responsible for loading the full value if the argument is
1209 // passed with CCValAssign::Indirect.
1210 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
1211                                 const CCValAssign &VA, const SDLoc &DL) {
1212   MachineFunction &MF = DAG.getMachineFunction();
1213   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1214   EVT LocVT = VA.getLocVT();
1215   SDValue Val;
1216 
1217   unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1218   RegInfo.addLiveIn(VA.getLocReg(), VReg);
1219   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1220 
1221   if (VA.getLocInfo() == CCValAssign::Indirect)
1222     return Val;
1223 
1224   return convertLocVTToValVT(DAG, Val, VA, DL);
1225 }
1226 
1227 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
1228                                    const CCValAssign &VA, const SDLoc &DL) {
1229   EVT LocVT = VA.getLocVT();
1230 
1231   switch (VA.getLocInfo()) {
1232   default:
1233     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1234   case CCValAssign::Full:
1235     break;
1236   case CCValAssign::BCvt:
1237     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1238       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
1239       break;
1240     }
1241     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
1242     break;
1243   }
1244   return Val;
1245 }
1246 
1247 // The caller is responsible for loading the full value if the argument is
1248 // passed with CCValAssign::Indirect.
1249 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
1250                                 const CCValAssign &VA, const SDLoc &DL) {
1251   MachineFunction &MF = DAG.getMachineFunction();
1252   MachineFrameInfo &MFI = MF.getFrameInfo();
1253   EVT LocVT = VA.getLocVT();
1254   EVT ValVT = VA.getValVT();
1255   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
1256   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1257                                  VA.getLocMemOffset(), /*Immutable=*/true);
1258   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1259   SDValue Val;
1260 
1261   ISD::LoadExtType ExtType;
1262   switch (VA.getLocInfo()) {
1263   default:
1264     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1265   case CCValAssign::Full:
1266   case CCValAssign::Indirect:
1267   case CCValAssign::BCvt:
1268     ExtType = ISD::NON_EXTLOAD;
1269     break;
1270   }
1271   Val = DAG.getExtLoad(
1272       ExtType, DL, LocVT, Chain, FIN,
1273       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
1274   return Val;
1275 }
1276 
1277 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
1278                                        const CCValAssign &VA, const SDLoc &DL) {
1279   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
1280          "Unexpected VA");
1281   MachineFunction &MF = DAG.getMachineFunction();
1282   MachineFrameInfo &MFI = MF.getFrameInfo();
1283   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1284 
1285   if (VA.isMemLoc()) {
1286     // f64 is passed on the stack.
1287     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
1288     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1289     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
1290                        MachinePointerInfo::getFixedStack(MF, FI));
1291   }
1292 
1293   assert(VA.isRegLoc() && "Expected register VA assignment");
1294 
1295   unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1296   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
1297   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
1298   SDValue Hi;
1299   if (VA.getLocReg() == RISCV::X17) {
1300     // Second half of f64 is passed on the stack.
1301     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
1302     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1303     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
1304                      MachinePointerInfo::getFixedStack(MF, FI));
1305   } else {
1306     // Second half of f64 is passed in another GPR.
1307     unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1308     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
1309     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
1310   }
1311   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1312 }
1313 
1314 // Transform physical registers into virtual registers.
1315 SDValue RISCVTargetLowering::LowerFormalArguments(
1316     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1317     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1318     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1319 
1320   switch (CallConv) {
1321   default:
1322     report_fatal_error("Unsupported calling convention");
1323   case CallingConv::C:
1324   case CallingConv::Fast:
1325     break;
1326   }
1327 
1328   MachineFunction &MF = DAG.getMachineFunction();
1329 
1330   const Function &Func = MF.getFunction();
1331   if (Func.hasFnAttribute("interrupt")) {
1332     if (!Func.arg_empty())
1333       report_fatal_error(
1334         "Functions with the interrupt attribute cannot have arguments!");
1335 
1336     StringRef Kind =
1337       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1338 
1339     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
1340       report_fatal_error(
1341         "Function interrupt attribute argument not supported!");
1342   }
1343 
1344   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1345   MVT XLenVT = Subtarget.getXLenVT();
1346   unsigned XLenInBytes = Subtarget.getXLen() / 8;
1347   // Used with vargs to acumulate store chains.
1348   std::vector<SDValue> OutChains;
1349 
1350   // Assign locations to all of the incoming arguments.
1351   SmallVector<CCValAssign, 16> ArgLocs;
1352   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1353   analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
1354 
1355   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1356     CCValAssign &VA = ArgLocs[i];
1357     SDValue ArgValue;
1358     // Passing f64 on RV32D with a soft float ABI must be handled as a special
1359     // case.
1360     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
1361       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
1362     else if (VA.isRegLoc())
1363       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
1364     else
1365       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
1366 
1367     if (VA.getLocInfo() == CCValAssign::Indirect) {
1368       // If the original argument was split and passed by reference (e.g. i128
1369       // on RV32), we need to load all parts of it here (using the same
1370       // address).
1371       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1372                                    MachinePointerInfo()));
1373       unsigned ArgIndex = Ins[i].OrigArgIndex;
1374       assert(Ins[i].PartOffset == 0);
1375       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
1376         CCValAssign &PartVA = ArgLocs[i + 1];
1377         unsigned PartOffset = Ins[i + 1].PartOffset;
1378         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1379                                       DAG.getIntPtrConstant(PartOffset, DL));
1380         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1381                                      MachinePointerInfo()));
1382         ++i;
1383       }
1384       continue;
1385     }
1386     InVals.push_back(ArgValue);
1387   }
1388 
1389   if (IsVarArg) {
1390     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
1391     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
1392     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1393     MachineFrameInfo &MFI = MF.getFrameInfo();
1394     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1395     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1396 
1397     // Offset of the first variable argument from stack pointer, and size of
1398     // the vararg save area. For now, the varargs save area is either zero or
1399     // large enough to hold a0-a7.
1400     int VaArgOffset, VarArgsSaveSize;
1401 
1402     // If all registers are allocated, then all varargs must be passed on the
1403     // stack and we don't need to save any argregs.
1404     if (ArgRegs.size() == Idx) {
1405       VaArgOffset = CCInfo.getNextStackOffset();
1406       VarArgsSaveSize = 0;
1407     } else {
1408       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
1409       VaArgOffset = -VarArgsSaveSize;
1410     }
1411 
1412     // Record the frame index of the first variable argument
1413     // which is a value necessary to VASTART.
1414     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1415     RVFI->setVarArgsFrameIndex(FI);
1416 
1417     // If saving an odd number of registers then create an extra stack slot to
1418     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
1419     // offsets to even-numbered registered remain 2*XLEN-aligned.
1420     if (Idx % 2) {
1421       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes,
1422                                  true);
1423       VarArgsSaveSize += XLenInBytes;
1424     }
1425 
1426     // Copy the integer registers that may have been used for passing varargs
1427     // to the vararg save area.
1428     for (unsigned I = Idx; I < ArgRegs.size();
1429          ++I, VaArgOffset += XLenInBytes) {
1430       const unsigned Reg = RegInfo.createVirtualRegister(RC);
1431       RegInfo.addLiveIn(ArgRegs[I], Reg);
1432       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
1433       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1434       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1435       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
1436                                    MachinePointerInfo::getFixedStack(MF, FI));
1437       cast<StoreSDNode>(Store.getNode())
1438           ->getMemOperand()
1439           ->setValue((Value *)nullptr);
1440       OutChains.push_back(Store);
1441     }
1442     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
1443   }
1444 
1445   // All stores are grouped in one node to allow the matching between
1446   // the size of Ins and InVals. This only happens for vararg functions.
1447   if (!OutChains.empty()) {
1448     OutChains.push_back(Chain);
1449     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1450   }
1451 
1452   return Chain;
1453 }
1454 
1455 /// isEligibleForTailCallOptimization - Check whether the call is eligible
1456 /// for tail call optimization.
1457 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
1458 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
1459     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
1460     const SmallVector<CCValAssign, 16> &ArgLocs) const {
1461 
1462   auto &Callee = CLI.Callee;
1463   auto CalleeCC = CLI.CallConv;
1464   auto IsVarArg = CLI.IsVarArg;
1465   auto &Outs = CLI.Outs;
1466   auto &Caller = MF.getFunction();
1467   auto CallerCC = Caller.getCallingConv();
1468 
1469   // Do not tail call opt functions with "disable-tail-calls" attribute.
1470   if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
1471     return false;
1472 
1473   // Exception-handling functions need a special set of instructions to
1474   // indicate a return to the hardware. Tail-calling another function would
1475   // probably break this.
1476   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
1477   // should be expanded as new function attributes are introduced.
1478   if (Caller.hasFnAttribute("interrupt"))
1479     return false;
1480 
1481   // Do not tail call opt functions with varargs.
1482   if (IsVarArg)
1483     return false;
1484 
1485   // Do not tail call opt if the stack is used to pass parameters.
1486   if (CCInfo.getNextStackOffset() != 0)
1487     return false;
1488 
1489   // Do not tail call opt if any parameters need to be passed indirectly.
1490   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
1491   // passed indirectly. So the address of the value will be passed in a
1492   // register, or if not available, then the address is put on the stack. In
1493   // order to pass indirectly, space on the stack often needs to be allocated
1494   // in order to store the value. In this case the CCInfo.getNextStackOffset()
1495   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
1496   // are passed CCValAssign::Indirect.
1497   for (auto &VA : ArgLocs)
1498     if (VA.getLocInfo() == CCValAssign::Indirect)
1499       return false;
1500 
1501   // Do not tail call opt if either caller or callee uses struct return
1502   // semantics.
1503   auto IsCallerStructRet = Caller.hasStructRetAttr();
1504   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
1505   if (IsCallerStructRet || IsCalleeStructRet)
1506     return false;
1507 
1508   // Externally-defined functions with weak linkage should not be
1509   // tail-called. The behaviour of branch instructions in this situation (as
1510   // used for tail calls) is implementation-defined, so we cannot rely on the
1511   // linker replacing the tail call with a return.
1512   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1513     const GlobalValue *GV = G->getGlobal();
1514     if (GV->hasExternalWeakLinkage())
1515       return false;
1516   }
1517 
1518   // The callee has to preserve all registers the caller needs to preserve.
1519   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
1520   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1521   if (CalleeCC != CallerCC) {
1522     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1523     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1524       return false;
1525   }
1526 
1527   // Byval parameters hand the function a pointer directly into the stack area
1528   // we want to reuse during a tail call. Working around this *is* possible
1529   // but less efficient and uglier in LowerCall.
1530   for (auto &Arg : Outs)
1531     if (Arg.Flags.isByVal())
1532       return false;
1533 
1534   return true;
1535 }
1536 
1537 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
1538 // and output parameter nodes.
1539 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1540                                        SmallVectorImpl<SDValue> &InVals) const {
1541   SelectionDAG &DAG = CLI.DAG;
1542   SDLoc &DL = CLI.DL;
1543   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1544   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1545   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1546   SDValue Chain = CLI.Chain;
1547   SDValue Callee = CLI.Callee;
1548   bool &IsTailCall = CLI.IsTailCall;
1549   CallingConv::ID CallConv = CLI.CallConv;
1550   bool IsVarArg = CLI.IsVarArg;
1551   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1552   MVT XLenVT = Subtarget.getXLenVT();
1553 
1554   MachineFunction &MF = DAG.getMachineFunction();
1555 
1556   // Analyze the operands of the call, assigning locations to each operand.
1557   SmallVector<CCValAssign, 16> ArgLocs;
1558   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1559   analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
1560 
1561   // Check if it's really possible to do a tail call.
1562   if (IsTailCall)
1563     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
1564 
1565   if (IsTailCall)
1566     ++NumTailCalls;
1567   else if (CLI.CS && CLI.CS.isMustTailCall())
1568     report_fatal_error("failed to perform tail call elimination on a call "
1569                        "site marked musttail");
1570 
1571   // Get a count of how many bytes are to be pushed on the stack.
1572   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1573 
1574   // Create local copies for byval args
1575   SmallVector<SDValue, 8> ByValArgs;
1576   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1577     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1578     if (!Flags.isByVal())
1579       continue;
1580 
1581     SDValue Arg = OutVals[i];
1582     unsigned Size = Flags.getByValSize();
1583     unsigned Align = Flags.getByValAlign();
1584 
1585     int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false);
1586     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1587     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
1588 
1589     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
1590                           /*IsVolatile=*/false,
1591                           /*AlwaysInline=*/false,
1592                           IsTailCall, MachinePointerInfo(),
1593                           MachinePointerInfo());
1594     ByValArgs.push_back(FIPtr);
1595   }
1596 
1597   if (!IsTailCall)
1598     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
1599 
1600   // Copy argument values to their designated locations.
1601   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
1602   SmallVector<SDValue, 8> MemOpChains;
1603   SDValue StackPtr;
1604   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
1605     CCValAssign &VA = ArgLocs[i];
1606     SDValue ArgValue = OutVals[i];
1607     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1608 
1609     // Handle passing f64 on RV32D with a soft float ABI as a special case.
1610     bool IsF64OnRV32DSoftABI =
1611         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
1612     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
1613       SDValue SplitF64 = DAG.getNode(
1614           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
1615       SDValue Lo = SplitF64.getValue(0);
1616       SDValue Hi = SplitF64.getValue(1);
1617 
1618       unsigned RegLo = VA.getLocReg();
1619       RegsToPass.push_back(std::make_pair(RegLo, Lo));
1620 
1621       if (RegLo == RISCV::X17) {
1622         // Second half of f64 is passed on the stack.
1623         // Work out the address of the stack slot.
1624         if (!StackPtr.getNode())
1625           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1626         // Emit the store.
1627         MemOpChains.push_back(
1628             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
1629       } else {
1630         // Second half of f64 is passed in another GPR.
1631         unsigned RegHigh = RegLo + 1;
1632         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
1633       }
1634       continue;
1635     }
1636 
1637     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
1638     // as any other MemLoc.
1639 
1640     // Promote the value if needed.
1641     // For now, only handle fully promoted and indirect arguments.
1642     if (VA.getLocInfo() == CCValAssign::Indirect) {
1643       // Store the argument in a stack slot and pass its address.
1644       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
1645       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1646       MemOpChains.push_back(
1647           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1648                        MachinePointerInfo::getFixedStack(MF, FI)));
1649       // If the original argument was split (e.g. i128), we need
1650       // to store all parts of it here (and pass just one address).
1651       unsigned ArgIndex = Outs[i].OrigArgIndex;
1652       assert(Outs[i].PartOffset == 0);
1653       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
1654         SDValue PartValue = OutVals[i + 1];
1655         unsigned PartOffset = Outs[i + 1].PartOffset;
1656         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1657                                       DAG.getIntPtrConstant(PartOffset, DL));
1658         MemOpChains.push_back(
1659             DAG.getStore(Chain, DL, PartValue, Address,
1660                          MachinePointerInfo::getFixedStack(MF, FI)));
1661         ++i;
1662       }
1663       ArgValue = SpillSlot;
1664     } else {
1665       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
1666     }
1667 
1668     // Use local copy if it is a byval arg.
1669     if (Flags.isByVal())
1670       ArgValue = ByValArgs[j++];
1671 
1672     if (VA.isRegLoc()) {
1673       // Queue up the argument copies and emit them at the end.
1674       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1675     } else {
1676       assert(VA.isMemLoc() && "Argument not register or memory");
1677       assert(!IsTailCall && "Tail call not allowed if stack is used "
1678                             "for passing parameters");
1679 
1680       // Work out the address of the stack slot.
1681       if (!StackPtr.getNode())
1682         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1683       SDValue Address =
1684           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1685                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
1686 
1687       // Emit the store.
1688       MemOpChains.push_back(
1689           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1690     }
1691   }
1692 
1693   // Join the stores, which are independent of one another.
1694   if (!MemOpChains.empty())
1695     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1696 
1697   SDValue Glue;
1698 
1699   // Build a sequence of copy-to-reg nodes, chained and glued together.
1700   for (auto &Reg : RegsToPass) {
1701     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
1702     Glue = Chain.getValue(1);
1703   }
1704 
1705   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
1706   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
1707   // split it and then direct call can be matched by PseudoCALL.
1708   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
1709     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0);
1710   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1711     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0);
1712   }
1713 
1714   // The first call operand is the chain and the second is the target address.
1715   SmallVector<SDValue, 8> Ops;
1716   Ops.push_back(Chain);
1717   Ops.push_back(Callee);
1718 
1719   // Add argument registers to the end of the list so that they are
1720   // known live into the call.
1721   for (auto &Reg : RegsToPass)
1722     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1723 
1724   if (!IsTailCall) {
1725     // Add a register mask operand representing the call-preserved registers.
1726     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1727     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1728     assert(Mask && "Missing call preserved mask for calling convention");
1729     Ops.push_back(DAG.getRegisterMask(Mask));
1730   }
1731 
1732   // Glue the call to the argument copies, if any.
1733   if (Glue.getNode())
1734     Ops.push_back(Glue);
1735 
1736   // Emit the call.
1737   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1738 
1739   if (IsTailCall) {
1740     MF.getFrameInfo().setHasTailCall();
1741     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
1742   }
1743 
1744   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
1745   Glue = Chain.getValue(1);
1746 
1747   // Mark the end of the call, which is glued to the call itself.
1748   Chain = DAG.getCALLSEQ_END(Chain,
1749                              DAG.getConstant(NumBytes, DL, PtrVT, true),
1750                              DAG.getConstant(0, DL, PtrVT, true),
1751                              Glue, DL);
1752   Glue = Chain.getValue(1);
1753 
1754   // Assign locations to each value returned by this call.
1755   SmallVector<CCValAssign, 16> RVLocs;
1756   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
1757   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
1758 
1759   // Copy all of the result registers out of their specified physreg.
1760   for (auto &VA : RVLocs) {
1761     // Copy the value out
1762     SDValue RetValue =
1763         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
1764     // Glue the RetValue to the end of the call sequence
1765     Chain = RetValue.getValue(1);
1766     Glue = RetValue.getValue(2);
1767 
1768     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1769       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
1770       SDValue RetValue2 =
1771           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
1772       Chain = RetValue2.getValue(1);
1773       Glue = RetValue2.getValue(2);
1774       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
1775                              RetValue2);
1776     }
1777 
1778     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
1779 
1780     InVals.push_back(RetValue);
1781   }
1782 
1783   return Chain;
1784 }
1785 
1786 bool RISCVTargetLowering::CanLowerReturn(
1787     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
1788     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
1789   SmallVector<CCValAssign, 16> RVLocs;
1790   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
1791   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1792     MVT VT = Outs[i].VT;
1793     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1794     if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags,
1795                  CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
1796       return false;
1797   }
1798   return true;
1799 }
1800 
1801 SDValue
1802 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1803                                  bool IsVarArg,
1804                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
1805                                  const SmallVectorImpl<SDValue> &OutVals,
1806                                  const SDLoc &DL, SelectionDAG &DAG) const {
1807   // Stores the assignment of the return value to a location.
1808   SmallVector<CCValAssign, 16> RVLocs;
1809 
1810   // Info about the registers and stack slot.
1811   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
1812                  *DAG.getContext());
1813 
1814   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
1815                     nullptr);
1816 
1817   SDValue Glue;
1818   SmallVector<SDValue, 4> RetOps(1, Chain);
1819 
1820   // Copy the result values into the output registers.
1821   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
1822     SDValue Val = OutVals[i];
1823     CCValAssign &VA = RVLocs[i];
1824     assert(VA.isRegLoc() && "Can only return in registers!");
1825 
1826     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1827       // Handle returning f64 on RV32D with a soft float ABI.
1828       assert(VA.isRegLoc() && "Expected return via registers");
1829       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
1830                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
1831       SDValue Lo = SplitF64.getValue(0);
1832       SDValue Hi = SplitF64.getValue(1);
1833       unsigned RegLo = VA.getLocReg();
1834       unsigned RegHi = RegLo + 1;
1835       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
1836       Glue = Chain.getValue(1);
1837       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
1838       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
1839       Glue = Chain.getValue(1);
1840       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
1841     } else {
1842       // Handle a 'normal' return.
1843       Val = convertValVTToLocVT(DAG, Val, VA, DL);
1844       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
1845 
1846       // Guarantee that all emitted copies are stuck together.
1847       Glue = Chain.getValue(1);
1848       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
1849     }
1850   }
1851 
1852   RetOps[0] = Chain; // Update chain.
1853 
1854   // Add the glue node if we have it.
1855   if (Glue.getNode()) {
1856     RetOps.push_back(Glue);
1857   }
1858 
1859   // Interrupt service routines use different return instructions.
1860   const Function &Func = DAG.getMachineFunction().getFunction();
1861   if (Func.hasFnAttribute("interrupt")) {
1862     if (!Func.getReturnType()->isVoidTy())
1863       report_fatal_error(
1864           "Functions with the interrupt attribute must have void return type!");
1865 
1866     MachineFunction &MF = DAG.getMachineFunction();
1867     StringRef Kind =
1868       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1869 
1870     unsigned RetOpc;
1871     if (Kind == "user")
1872       RetOpc = RISCVISD::URET_FLAG;
1873     else if (Kind == "supervisor")
1874       RetOpc = RISCVISD::SRET_FLAG;
1875     else
1876       RetOpc = RISCVISD::MRET_FLAG;
1877 
1878     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
1879   }
1880 
1881   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
1882 }
1883 
1884 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
1885   switch ((RISCVISD::NodeType)Opcode) {
1886   case RISCVISD::FIRST_NUMBER:
1887     break;
1888   case RISCVISD::RET_FLAG:
1889     return "RISCVISD::RET_FLAG";
1890   case RISCVISD::URET_FLAG:
1891     return "RISCVISD::URET_FLAG";
1892   case RISCVISD::SRET_FLAG:
1893     return "RISCVISD::SRET_FLAG";
1894   case RISCVISD::MRET_FLAG:
1895     return "RISCVISD::MRET_FLAG";
1896   case RISCVISD::CALL:
1897     return "RISCVISD::CALL";
1898   case RISCVISD::SELECT_CC:
1899     return "RISCVISD::SELECT_CC";
1900   case RISCVISD::BuildPairF64:
1901     return "RISCVISD::BuildPairF64";
1902   case RISCVISD::SplitF64:
1903     return "RISCVISD::SplitF64";
1904   case RISCVISD::TAIL:
1905     return "RISCVISD::TAIL";
1906   case RISCVISD::SLLW:
1907     return "RISCVISD::SLLW";
1908   case RISCVISD::SRAW:
1909     return "RISCVISD::SRAW";
1910   case RISCVISD::SRLW:
1911     return "RISCVISD::SRLW";
1912   case RISCVISD::DIVW:
1913     return "RISCVISD::DIVW";
1914   case RISCVISD::DIVUW:
1915     return "RISCVISD::DIVUW";
1916   case RISCVISD::REMUW:
1917     return "RISCVISD::REMUW";
1918   case RISCVISD::FMV_W_X_RV64:
1919     return "RISCVISD::FMV_W_X_RV64";
1920   case RISCVISD::FMV_X_ANYEXTW_RV64:
1921     return "RISCVISD::FMV_X_ANYEXTW_RV64";
1922   }
1923   return nullptr;
1924 }
1925 
1926 std::pair<unsigned, const TargetRegisterClass *>
1927 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1928                                                   StringRef Constraint,
1929                                                   MVT VT) const {
1930   // First, see if this is a constraint that directly corresponds to a
1931   // RISCV register class.
1932   if (Constraint.size() == 1) {
1933     switch (Constraint[0]) {
1934     case 'r':
1935       return std::make_pair(0U, &RISCV::GPRRegClass);
1936     default:
1937       break;
1938     }
1939   }
1940 
1941   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1942 }
1943 
1944 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
1945                                                    Instruction *Inst,
1946                                                    AtomicOrdering Ord) const {
1947   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
1948     return Builder.CreateFence(Ord);
1949   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
1950     return Builder.CreateFence(AtomicOrdering::Release);
1951   return nullptr;
1952 }
1953 
1954 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
1955                                                     Instruction *Inst,
1956                                                     AtomicOrdering Ord) const {
1957   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
1958     return Builder.CreateFence(AtomicOrdering::Acquire);
1959   return nullptr;
1960 }
1961 
1962 TargetLowering::AtomicExpansionKind
1963 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1964   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
1965   // point operations can't be used in an lr/sc sequence without breaking the
1966   // forward-progress guarantee.
1967   if (AI->isFloatingPointOperation())
1968     return AtomicExpansionKind::CmpXChg;
1969 
1970   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
1971   if (Size == 8 || Size == 16)
1972     return AtomicExpansionKind::MaskedIntrinsic;
1973   return AtomicExpansionKind::None;
1974 }
1975 
1976 static Intrinsic::ID
1977 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
1978   if (XLen == 32) {
1979     switch (BinOp) {
1980     default:
1981       llvm_unreachable("Unexpected AtomicRMW BinOp");
1982     case AtomicRMWInst::Xchg:
1983       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
1984     case AtomicRMWInst::Add:
1985       return Intrinsic::riscv_masked_atomicrmw_add_i32;
1986     case AtomicRMWInst::Sub:
1987       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
1988     case AtomicRMWInst::Nand:
1989       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
1990     case AtomicRMWInst::Max:
1991       return Intrinsic::riscv_masked_atomicrmw_max_i32;
1992     case AtomicRMWInst::Min:
1993       return Intrinsic::riscv_masked_atomicrmw_min_i32;
1994     case AtomicRMWInst::UMax:
1995       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
1996     case AtomicRMWInst::UMin:
1997       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
1998     }
1999   }
2000 
2001   if (XLen == 64) {
2002     switch (BinOp) {
2003     default:
2004       llvm_unreachable("Unexpected AtomicRMW BinOp");
2005     case AtomicRMWInst::Xchg:
2006       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
2007     case AtomicRMWInst::Add:
2008       return Intrinsic::riscv_masked_atomicrmw_add_i64;
2009     case AtomicRMWInst::Sub:
2010       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
2011     case AtomicRMWInst::Nand:
2012       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
2013     case AtomicRMWInst::Max:
2014       return Intrinsic::riscv_masked_atomicrmw_max_i64;
2015     case AtomicRMWInst::Min:
2016       return Intrinsic::riscv_masked_atomicrmw_min_i64;
2017     case AtomicRMWInst::UMax:
2018       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
2019     case AtomicRMWInst::UMin:
2020       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
2021     }
2022   }
2023 
2024   llvm_unreachable("Unexpected XLen\n");
2025 }
2026 
2027 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
2028     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
2029     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
2030   unsigned XLen = Subtarget.getXLen();
2031   Value *Ordering =
2032       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
2033   Type *Tys[] = {AlignedAddr->getType()};
2034   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
2035       AI->getModule(),
2036       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
2037 
2038   if (XLen == 64) {
2039     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
2040     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2041     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
2042   }
2043 
2044   Value *Result;
2045 
2046   // Must pass the shift amount needed to sign extend the loaded value prior
2047   // to performing a signed comparison for min/max. ShiftAmt is the number of
2048   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
2049   // is the number of bits to left+right shift the value in order to
2050   // sign-extend.
2051   if (AI->getOperation() == AtomicRMWInst::Min ||
2052       AI->getOperation() == AtomicRMWInst::Max) {
2053     const DataLayout &DL = AI->getModule()->getDataLayout();
2054     unsigned ValWidth =
2055         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
2056     Value *SextShamt =
2057         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
2058     Result = Builder.CreateCall(LrwOpScwLoop,
2059                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
2060   } else {
2061     Result =
2062         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
2063   }
2064 
2065   if (XLen == 64)
2066     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2067   return Result;
2068 }
2069 
2070 TargetLowering::AtomicExpansionKind
2071 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
2072     AtomicCmpXchgInst *CI) const {
2073   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
2074   if (Size == 8 || Size == 16)
2075     return AtomicExpansionKind::MaskedIntrinsic;
2076   return AtomicExpansionKind::None;
2077 }
2078 
2079 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2080     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2081     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2082   unsigned XLen = Subtarget.getXLen();
2083   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
2084   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
2085   if (XLen == 64) {
2086     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
2087     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
2088     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2089     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
2090   }
2091   Type *Tys[] = {AlignedAddr->getType()};
2092   Function *MaskedCmpXchg =
2093       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
2094   Value *Result = Builder.CreateCall(
2095       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
2096   if (XLen == 64)
2097     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2098   return Result;
2099 }
2100