1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/CodeGen/ValueTypes.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/IR/DiagnosticPrinter.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/raw_ostream.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "riscv-lower"
38 
39 STATISTIC(NumTailCalls, "Number of tail calls");
40 
41 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
42                                          const RISCVSubtarget &STI)
43     : TargetLowering(TM), Subtarget(STI) {
44 
45   RISCVABI::ABI ABI = Subtarget.getTargetABI();
46   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
47 
48   if (ABI != RISCVABI::ABI_ILP32 && ABI != RISCVABI::ABI_LP64)
49     report_fatal_error("Don't know how to lower this ABI");
50 
51   MVT XLenVT = Subtarget.getXLenVT();
52 
53   // Set up the register classes.
54   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
55 
56   if (Subtarget.hasStdExtF())
57     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
58   if (Subtarget.hasStdExtD())
59     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
60 
61   // Compute derived properties from the register classes.
62   computeRegisterProperties(STI.getRegisterInfo());
63 
64   setStackPointerRegisterToSaveRestore(RISCV::X2);
65 
66   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
67     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
68 
69   // TODO: add all necessary setOperationAction calls.
70   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
71 
72   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
73   setOperationAction(ISD::BR_CC, XLenVT, Expand);
74   setOperationAction(ISD::SELECT, XLenVT, Custom);
75   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
76 
77   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
78   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
79 
80   setOperationAction(ISD::VASTART, MVT::Other, Custom);
81   setOperationAction(ISD::VAARG, MVT::Other, Expand);
82   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
83   setOperationAction(ISD::VAEND, MVT::Other, Expand);
84 
85   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
86     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
87 
88   if (Subtarget.is64Bit()) {
89     setOperationAction(ISD::SHL, MVT::i32, Custom);
90     setOperationAction(ISD::SRA, MVT::i32, Custom);
91     setOperationAction(ISD::SRL, MVT::i32, Custom);
92   }
93 
94   if (!Subtarget.hasStdExtM()) {
95     setOperationAction(ISD::MUL, XLenVT, Expand);
96     setOperationAction(ISD::MULHS, XLenVT, Expand);
97     setOperationAction(ISD::MULHU, XLenVT, Expand);
98     setOperationAction(ISD::SDIV, XLenVT, Expand);
99     setOperationAction(ISD::UDIV, XLenVT, Expand);
100     setOperationAction(ISD::SREM, XLenVT, Expand);
101     setOperationAction(ISD::UREM, XLenVT, Expand);
102   }
103 
104   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
105     setOperationAction(ISD::SDIV, MVT::i32, Custom);
106     setOperationAction(ISD::UDIV, MVT::i32, Custom);
107     setOperationAction(ISD::UREM, MVT::i32, Custom);
108   }
109 
110   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
111   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
112   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
113   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
114 
115   setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
116   setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
117   setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
118 
119   setOperationAction(ISD::ROTL, XLenVT, Expand);
120   setOperationAction(ISD::ROTR, XLenVT, Expand);
121   setOperationAction(ISD::BSWAP, XLenVT, Expand);
122   setOperationAction(ISD::CTTZ, XLenVT, Expand);
123   setOperationAction(ISD::CTLZ, XLenVT, Expand);
124   setOperationAction(ISD::CTPOP, XLenVT, Expand);
125 
126   ISD::CondCode FPCCToExtend[] = {
127       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO,   ISD::SETUEQ,
128       ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
129       ISD::SETGT,  ISD::SETGE,  ISD::SETNE};
130 
131   ISD::NodeType FPOpToExtend[] = {
132       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
133 
134   if (Subtarget.hasStdExtF()) {
135     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
136     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
137     for (auto CC : FPCCToExtend)
138       setCondCodeAction(CC, MVT::f32, Expand);
139     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
140     setOperationAction(ISD::SELECT, MVT::f32, Custom);
141     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
142     for (auto Op : FPOpToExtend)
143       setOperationAction(Op, MVT::f32, Expand);
144   }
145 
146   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
147     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
148 
149   if (Subtarget.hasStdExtD()) {
150     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
151     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
152     for (auto CC : FPCCToExtend)
153       setCondCodeAction(CC, MVT::f64, Expand);
154     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
155     setOperationAction(ISD::SELECT, MVT::f64, Custom);
156     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
157     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
158     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
159     for (auto Op : FPOpToExtend)
160       setOperationAction(Op, MVT::f64, Expand);
161   }
162 
163   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
164   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
165   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
166 
167   if (Subtarget.hasStdExtA()) {
168     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
169     setMinCmpXchgSizeInBits(32);
170   } else {
171     setMaxAtomicSizeInBitsSupported(0);
172   }
173 
174   setBooleanContents(ZeroOrOneBooleanContent);
175 
176   // Function alignments (log2).
177   unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2;
178   setMinFunctionAlignment(FunctionAlignment);
179   setPrefFunctionAlignment(FunctionAlignment);
180 
181   // Effectively disable jump table generation.
182   setMinimumJumpTableEntries(INT_MAX);
183 }
184 
185 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
186                                             EVT VT) const {
187   if (!VT.isVector())
188     return getPointerTy(DL);
189   return VT.changeVectorElementTypeToInteger();
190 }
191 
192 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
193                                              const CallInst &I,
194                                              MachineFunction &MF,
195                                              unsigned Intrinsic) const {
196   switch (Intrinsic) {
197   default:
198     return false;
199   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
200   case Intrinsic::riscv_masked_atomicrmw_add_i32:
201   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
202   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
203   case Intrinsic::riscv_masked_atomicrmw_max_i32:
204   case Intrinsic::riscv_masked_atomicrmw_min_i32:
205   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
206   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
207   case Intrinsic::riscv_masked_cmpxchg_i32:
208     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
209     Info.opc = ISD::INTRINSIC_W_CHAIN;
210     Info.memVT = MVT::getVT(PtrTy->getElementType());
211     Info.ptrVal = I.getArgOperand(0);
212     Info.offset = 0;
213     Info.align = 4;
214     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
215                  MachineMemOperand::MOVolatile;
216     return true;
217   }
218 }
219 
220 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
221                                                 const AddrMode &AM, Type *Ty,
222                                                 unsigned AS,
223                                                 Instruction *I) const {
224   // No global is ever allowed as a base.
225   if (AM.BaseGV)
226     return false;
227 
228   // Require a 12-bit signed offset.
229   if (!isInt<12>(AM.BaseOffs))
230     return false;
231 
232   switch (AM.Scale) {
233   case 0: // "r+i" or just "i", depending on HasBaseReg.
234     break;
235   case 1:
236     if (!AM.HasBaseReg) // allow "r+i".
237       break;
238     return false; // disallow "r+r" or "r+r+i".
239   default:
240     return false;
241   }
242 
243   return true;
244 }
245 
246 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
247   return isInt<12>(Imm);
248 }
249 
250 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
251   return isInt<12>(Imm);
252 }
253 
254 // On RV32, 64-bit integers are split into their high and low parts and held
255 // in two different registers, so the trunc is free since the low register can
256 // just be used.
257 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
258   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
259     return false;
260   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
261   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
262   return (SrcBits == 64 && DestBits == 32);
263 }
264 
265 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
266   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
267       !SrcVT.isInteger() || !DstVT.isInteger())
268     return false;
269   unsigned SrcBits = SrcVT.getSizeInBits();
270   unsigned DestBits = DstVT.getSizeInBits();
271   return (SrcBits == 64 && DestBits == 32);
272 }
273 
274 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
275   // Zexts are free if they can be combined with a load.
276   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
277     EVT MemVT = LD->getMemoryVT();
278     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
279          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
280         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
281          LD->getExtensionType() == ISD::ZEXTLOAD))
282       return true;
283   }
284 
285   return TargetLowering::isZExtFree(Val, VT2);
286 }
287 
288 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
289   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
290 }
291 
292 // Changes the condition code and swaps operands if necessary, so the SetCC
293 // operation matches one of the comparisons supported directly in the RISC-V
294 // ISA.
295 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
296   switch (CC) {
297   default:
298     break;
299   case ISD::SETGT:
300   case ISD::SETLE:
301   case ISD::SETUGT:
302   case ISD::SETULE:
303     CC = ISD::getSetCCSwappedOperands(CC);
304     std::swap(LHS, RHS);
305     break;
306   }
307 }
308 
309 // Return the RISC-V branch opcode that matches the given DAG integer
310 // condition code. The CondCode must be one of those supported by the RISC-V
311 // ISA (see normaliseSetCC).
312 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
313   switch (CC) {
314   default:
315     llvm_unreachable("Unsupported CondCode");
316   case ISD::SETEQ:
317     return RISCV::BEQ;
318   case ISD::SETNE:
319     return RISCV::BNE;
320   case ISD::SETLT:
321     return RISCV::BLT;
322   case ISD::SETGE:
323     return RISCV::BGE;
324   case ISD::SETULT:
325     return RISCV::BLTU;
326   case ISD::SETUGE:
327     return RISCV::BGEU;
328   }
329 }
330 
331 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
332                                             SelectionDAG &DAG) const {
333   switch (Op.getOpcode()) {
334   default:
335     report_fatal_error("unimplemented operand");
336   case ISD::GlobalAddress:
337     return lowerGlobalAddress(Op, DAG);
338   case ISD::BlockAddress:
339     return lowerBlockAddress(Op, DAG);
340   case ISD::ConstantPool:
341     return lowerConstantPool(Op, DAG);
342   case ISD::SELECT:
343     return lowerSELECT(Op, DAG);
344   case ISD::VASTART:
345     return lowerVASTART(Op, DAG);
346   case ISD::FRAMEADDR:
347     return lowerFRAMEADDR(Op, DAG);
348   case ISD::RETURNADDR:
349     return lowerRETURNADDR(Op, DAG);
350   case ISD::BITCAST: {
351     assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
352            "Unexpected custom legalisation");
353     SDLoc DL(Op);
354     SDValue Op0 = Op.getOperand(0);
355     if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
356       return SDValue();
357     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
358     SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
359     return FPConv;
360   }
361   }
362 }
363 
364 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
365                                                 SelectionDAG &DAG) const {
366   SDLoc DL(Op);
367   EVT Ty = Op.getValueType();
368   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
369   const GlobalValue *GV = N->getGlobal();
370   int64_t Offset = N->getOffset();
371   MVT XLenVT = Subtarget.getXLenVT();
372 
373   if (isPositionIndependent())
374     report_fatal_error("Unable to lowerGlobalAddress");
375   // In order to maximise the opportunity for common subexpression elimination,
376   // emit a separate ADD node for the global address offset instead of folding
377   // it in the global address node. Later peephole optimisations may choose to
378   // fold it back in when profitable.
379   SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI);
380   SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);
381   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
382   SDValue MNLo =
383     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
384   if (Offset != 0)
385     return DAG.getNode(ISD::ADD, DL, Ty, MNLo,
386                        DAG.getConstant(Offset, DL, XLenVT));
387   return MNLo;
388 }
389 
390 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
391                                                SelectionDAG &DAG) const {
392   SDLoc DL(Op);
393   EVT Ty = Op.getValueType();
394   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
395   const BlockAddress *BA = N->getBlockAddress();
396   int64_t Offset = N->getOffset();
397 
398   if (isPositionIndependent())
399     report_fatal_error("Unable to lowerBlockAddress");
400 
401   SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
402   SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO);
403   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0);
404   SDValue MNLo =
405     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0);
406   return MNLo;
407 }
408 
409 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
410                                                SelectionDAG &DAG) const {
411   SDLoc DL(Op);
412   EVT Ty = Op.getValueType();
413   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
414   const Constant *CPA = N->getConstVal();
415   int64_t Offset = N->getOffset();
416   unsigned Alignment = N->getAlignment();
417 
418   if (!isPositionIndependent()) {
419     SDValue CPAHi =
420         DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI);
421     SDValue CPALo =
422         DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO);
423     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0);
424     SDValue MNLo =
425         SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0);
426     return MNLo;
427   } else {
428     report_fatal_error("Unable to lowerConstantPool");
429   }
430 }
431 
432 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
433   SDValue CondV = Op.getOperand(0);
434   SDValue TrueV = Op.getOperand(1);
435   SDValue FalseV = Op.getOperand(2);
436   SDLoc DL(Op);
437   MVT XLenVT = Subtarget.getXLenVT();
438 
439   // If the result type is XLenVT and CondV is the output of a SETCC node
440   // which also operated on XLenVT inputs, then merge the SETCC node into the
441   // lowered RISCVISD::SELECT_CC to take advantage of the integer
442   // compare+branch instructions. i.e.:
443   // (select (setcc lhs, rhs, cc), truev, falsev)
444   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
445   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
446       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
447     SDValue LHS = CondV.getOperand(0);
448     SDValue RHS = CondV.getOperand(1);
449     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
450     ISD::CondCode CCVal = CC->get();
451 
452     normaliseSetCC(LHS, RHS, CCVal);
453 
454     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
455     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
456     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
457     return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
458   }
459 
460   // Otherwise:
461   // (select condv, truev, falsev)
462   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
463   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
464   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
465 
466   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
467   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
468 
469   return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
470 }
471 
472 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
473   MachineFunction &MF = DAG.getMachineFunction();
474   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
475 
476   SDLoc DL(Op);
477   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
478                                  getPointerTy(MF.getDataLayout()));
479 
480   // vastart just stores the address of the VarArgsFrameIndex slot into the
481   // memory location argument.
482   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
483   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
484                       MachinePointerInfo(SV));
485 }
486 
487 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
488                                             SelectionDAG &DAG) const {
489   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
490   MachineFunction &MF = DAG.getMachineFunction();
491   MachineFrameInfo &MFI = MF.getFrameInfo();
492   MFI.setFrameAddressIsTaken(true);
493   unsigned FrameReg = RI.getFrameRegister(MF);
494   int XLenInBytes = Subtarget.getXLen() / 8;
495 
496   EVT VT = Op.getValueType();
497   SDLoc DL(Op);
498   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
499   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
500   while (Depth--) {
501     int Offset = -(XLenInBytes * 2);
502     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
503                               DAG.getIntPtrConstant(Offset, DL));
504     FrameAddr =
505         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
506   }
507   return FrameAddr;
508 }
509 
510 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
511                                              SelectionDAG &DAG) const {
512   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
513   MachineFunction &MF = DAG.getMachineFunction();
514   MachineFrameInfo &MFI = MF.getFrameInfo();
515   MFI.setReturnAddressIsTaken(true);
516   MVT XLenVT = Subtarget.getXLenVT();
517   int XLenInBytes = Subtarget.getXLen() / 8;
518 
519   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
520     return SDValue();
521 
522   EVT VT = Op.getValueType();
523   SDLoc DL(Op);
524   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
525   if (Depth) {
526     int Off = -XLenInBytes;
527     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
528     SDValue Offset = DAG.getConstant(Off, DL, VT);
529     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
530                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
531                        MachinePointerInfo());
532   }
533 
534   // Return the value of the return address register, marking it an implicit
535   // live-in.
536   unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
537   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
538 }
539 
540 // Returns the opcode of the target-specific SDNode that implements the 32-bit
541 // form of the given Opcode.
542 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
543   switch (Opcode) {
544   default:
545     llvm_unreachable("Unexpected opcode");
546   case ISD::SHL:
547     return RISCVISD::SLLW;
548   case ISD::SRA:
549     return RISCVISD::SRAW;
550   case ISD::SRL:
551     return RISCVISD::SRLW;
552   case ISD::SDIV:
553     return RISCVISD::DIVW;
554   case ISD::UDIV:
555     return RISCVISD::DIVUW;
556   case ISD::UREM:
557     return RISCVISD::REMUW;
558   }
559 }
560 
561 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
562 // Because i32 isn't a legal type for RV64, these operations would otherwise
563 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
564 // later one because the fact the operation was originally of type i32 is
565 // lost.
566 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
567   SDLoc DL(N);
568   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
569   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
570   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
571   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
572   // ReplaceNodeResults requires we maintain the same type for the return value.
573   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
574 }
575 
576 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
577                                              SmallVectorImpl<SDValue> &Results,
578                                              SelectionDAG &DAG) const {
579   SDLoc DL(N);
580   switch (N->getOpcode()) {
581   default:
582     llvm_unreachable("Don't know how to custom type legalize this operation!");
583   case ISD::SHL:
584   case ISD::SRA:
585   case ISD::SRL:
586     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
587            "Unexpected custom legalisation");
588     if (N->getOperand(1).getOpcode() == ISD::Constant)
589       return;
590     Results.push_back(customLegalizeToWOp(N, DAG));
591     break;
592   case ISD::SDIV:
593   case ISD::UDIV:
594   case ISD::UREM:
595     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
596            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
597     if (N->getOperand(0).getOpcode() == ISD::Constant ||
598         N->getOperand(1).getOpcode() == ISD::Constant)
599       return;
600     Results.push_back(customLegalizeToWOp(N, DAG));
601     break;
602   case ISD::BITCAST: {
603     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
604            Subtarget.hasStdExtF() && "Unexpected custom legalisation");
605     SDLoc DL(N);
606     SDValue Op0 = N->getOperand(0);
607     if (Op0.getValueType() != MVT::f32)
608       return;
609     SDValue FPConv =
610         DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
611     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
612     break;
613   }
614   }
615 }
616 
617 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
618                                                DAGCombinerInfo &DCI) const {
619   SelectionDAG &DAG = DCI.DAG;
620 
621   switch (N->getOpcode()) {
622   default:
623     break;
624   case RISCVISD::SplitF64: {
625     SDValue Op0 = N->getOperand(0);
626     // If the input to SplitF64 is just BuildPairF64 then the operation is
627     // redundant. Instead, use BuildPairF64's operands directly.
628     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
629       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
630 
631     SDLoc DL(N);
632     // This is a target-specific version of a DAGCombine performed in
633     // DAGCombiner::visitBITCAST. It performs the equivalent of:
634     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
635     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
636     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
637         !Op0.getNode()->hasOneUse())
638       break;
639     SDValue NewSplitF64 =
640         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
641                     Op0.getOperand(0));
642     SDValue Lo = NewSplitF64.getValue(0);
643     SDValue Hi = NewSplitF64.getValue(1);
644     APInt SignBit = APInt::getSignMask(32);
645     if (Op0.getOpcode() == ISD::FNEG) {
646       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
647                                   DAG.getConstant(SignBit, DL, MVT::i32));
648       return DCI.CombineTo(N, Lo, NewHi);
649     }
650     assert(Op0.getOpcode() == ISD::FABS);
651     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
652                                 DAG.getConstant(~SignBit, DL, MVT::i32));
653     return DCI.CombineTo(N, Lo, NewHi);
654   }
655   case RISCVISD::SLLW:
656   case RISCVISD::SRAW:
657   case RISCVISD::SRLW: {
658     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
659     SDValue LHS = N->getOperand(0);
660     SDValue RHS = N->getOperand(1);
661     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
662     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
663     if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
664         (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
665       return SDValue();
666     break;
667   }
668   case RISCVISD::FMV_X_ANYEXTW_RV64: {
669     SDLoc DL(N);
670     SDValue Op0 = N->getOperand(0);
671     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
672     // conversion is unnecessary and can be replaced with an ANY_EXTEND
673     // of the FMV_W_X_RV64 operand.
674     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
675       SDValue AExtOp =
676           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
677       return DCI.CombineTo(N, AExtOp);
678     }
679 
680     // This is a target-specific version of a DAGCombine performed in
681     // DAGCombiner::visitBITCAST. It performs the equivalent of:
682     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
683     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
684     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
685         !Op0.getNode()->hasOneUse())
686       break;
687     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
688                                  Op0.getOperand(0));
689     APInt SignBit = APInt::getSignMask(32).sext(64);
690     if (Op0.getOpcode() == ISD::FNEG) {
691       return DCI.CombineTo(N,
692                            DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
693                                        DAG.getConstant(SignBit, DL, MVT::i64)));
694     }
695     assert(Op0.getOpcode() == ISD::FABS);
696     return DCI.CombineTo(N,
697                          DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
698                                      DAG.getConstant(~SignBit, DL, MVT::i64)));
699   }
700   }
701 
702   return SDValue();
703 }
704 
705 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
706     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
707     unsigned Depth) const {
708   switch (Op.getOpcode()) {
709   default:
710     break;
711   case RISCVISD::SLLW:
712   case RISCVISD::SRAW:
713   case RISCVISD::SRLW:
714   case RISCVISD::DIVW:
715   case RISCVISD::DIVUW:
716   case RISCVISD::REMUW:
717     // TODO: As the result is sign-extended, this is conservatively correct. A
718     // more precise answer could be calculated for SRAW depending on known
719     // bits in the shift amount.
720     return 33;
721   }
722 
723   return 1;
724 }
725 
726 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
727                                              MachineBasicBlock *BB) {
728   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
729 
730   MachineFunction &MF = *BB->getParent();
731   DebugLoc DL = MI.getDebugLoc();
732   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
733   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
734   unsigned LoReg = MI.getOperand(0).getReg();
735   unsigned HiReg = MI.getOperand(1).getReg();
736   unsigned SrcReg = MI.getOperand(2).getReg();
737   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
738   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
739 
740   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
741                           RI);
742   MachineMemOperand *MMO =
743       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
744                               MachineMemOperand::MOLoad, 8, 8);
745   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
746       .addFrameIndex(FI)
747       .addImm(0)
748       .addMemOperand(MMO);
749   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
750       .addFrameIndex(FI)
751       .addImm(4)
752       .addMemOperand(MMO);
753   MI.eraseFromParent(); // The pseudo instruction is gone now.
754   return BB;
755 }
756 
757 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
758                                                  MachineBasicBlock *BB) {
759   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
760          "Unexpected instruction");
761 
762   MachineFunction &MF = *BB->getParent();
763   DebugLoc DL = MI.getDebugLoc();
764   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
765   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
766   unsigned DstReg = MI.getOperand(0).getReg();
767   unsigned LoReg = MI.getOperand(1).getReg();
768   unsigned HiReg = MI.getOperand(2).getReg();
769   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
770   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
771 
772   MachineMemOperand *MMO =
773       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
774                               MachineMemOperand::MOStore, 8, 8);
775   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
776       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
777       .addFrameIndex(FI)
778       .addImm(0)
779       .addMemOperand(MMO);
780   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
781       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
782       .addFrameIndex(FI)
783       .addImm(4)
784       .addMemOperand(MMO);
785   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
786   MI.eraseFromParent(); // The pseudo instruction is gone now.
787   return BB;
788 }
789 
790 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
791                                            MachineBasicBlock *BB) {
792   // To "insert" a SELECT instruction, we actually have to insert the triangle
793   // control-flow pattern.  The incoming instruction knows the destination vreg
794   // to set, the condition code register to branch on, the true/false values to
795   // select between, and the condcode to use to select the appropriate branch.
796   //
797   // We produce the following control flow:
798   //     HeadMBB
799   //     |  \
800   //     |  IfFalseMBB
801   //     | /
802   //    TailMBB
803   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
804   const BasicBlock *LLVM_BB = BB->getBasicBlock();
805   DebugLoc DL = MI.getDebugLoc();
806   MachineFunction::iterator I = ++BB->getIterator();
807 
808   MachineBasicBlock *HeadMBB = BB;
809   MachineFunction *F = BB->getParent();
810   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
811   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
812 
813   F->insert(I, IfFalseMBB);
814   F->insert(I, TailMBB);
815   // Move all remaining instructions to TailMBB.
816   TailMBB->splice(TailMBB->begin(), HeadMBB,
817                   std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end());
818   // Update machine-CFG edges by transferring all successors of the current
819   // block to the new block which will contain the Phi node for the select.
820   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
821   // Set the successors for HeadMBB.
822   HeadMBB->addSuccessor(IfFalseMBB);
823   HeadMBB->addSuccessor(TailMBB);
824 
825   // Insert appropriate branch.
826   unsigned LHS = MI.getOperand(1).getReg();
827   unsigned RHS = MI.getOperand(2).getReg();
828   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
829   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
830 
831   BuildMI(HeadMBB, DL, TII.get(Opcode))
832     .addReg(LHS)
833     .addReg(RHS)
834     .addMBB(TailMBB);
835 
836   // IfFalseMBB just falls through to TailMBB.
837   IfFalseMBB->addSuccessor(TailMBB);
838 
839   // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
840   BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI),
841           MI.getOperand(0).getReg())
842       .addReg(MI.getOperand(4).getReg())
843       .addMBB(HeadMBB)
844       .addReg(MI.getOperand(5).getReg())
845       .addMBB(IfFalseMBB);
846 
847   MI.eraseFromParent(); // The pseudo instruction is gone now.
848   return TailMBB;
849 }
850 
851 MachineBasicBlock *
852 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
853                                                  MachineBasicBlock *BB) const {
854   switch (MI.getOpcode()) {
855   default:
856     llvm_unreachable("Unexpected instr type to insert");
857   case RISCV::Select_GPR_Using_CC_GPR:
858   case RISCV::Select_FPR32_Using_CC_GPR:
859   case RISCV::Select_FPR64_Using_CC_GPR:
860     return emitSelectPseudo(MI, BB);
861   case RISCV::BuildPairF64Pseudo:
862     return emitBuildPairF64Pseudo(MI, BB);
863   case RISCV::SplitF64Pseudo:
864     return emitSplitF64Pseudo(MI, BB);
865   }
866 }
867 
868 // Calling Convention Implementation.
869 // The expectations for frontend ABI lowering vary from target to target.
870 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
871 // details, but this is a longer term goal. For now, we simply try to keep the
872 // role of the frontend as simple and well-defined as possible. The rules can
873 // be summarised as:
874 // * Never split up large scalar arguments. We handle them here.
875 // * If a hardfloat calling convention is being used, and the struct may be
876 // passed in a pair of registers (fp+fp, int+fp), and both registers are
877 // available, then pass as two separate arguments. If either the GPRs or FPRs
878 // are exhausted, then pass according to the rule below.
879 // * If a struct could never be passed in registers or directly in a stack
880 // slot (as it is larger than 2*XLEN and the floating point rules don't
881 // apply), then pass it using a pointer with the byval attribute.
882 // * If a struct is less than 2*XLEN, then coerce to either a two-element
883 // word-sized array or a 2*XLEN scalar (depending on alignment).
884 // * The frontend can determine whether a struct is returned by reference or
885 // not based on its size and fields. If it will be returned by reference, the
886 // frontend must modify the prototype so a pointer with the sret annotation is
887 // passed as the first argument. This is not necessary for large scalar
888 // returns.
889 // * Struct return values and varargs should be coerced to structs containing
890 // register-size fields in the same situations they would be for fixed
891 // arguments.
892 
893 static const MCPhysReg ArgGPRs[] = {
894   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
895   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
896 };
897 
898 // Pass a 2*XLEN argument that has been split into two XLEN values through
899 // registers or the stack as necessary.
900 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
901                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
902                                 MVT ValVT2, MVT LocVT2,
903                                 ISD::ArgFlagsTy ArgFlags2) {
904   unsigned XLenInBytes = XLen / 8;
905   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
906     // At least one half can be passed via register.
907     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
908                                      VA1.getLocVT(), CCValAssign::Full));
909   } else {
910     // Both halves must be passed on the stack, with proper alignment.
911     unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
912     State.addLoc(
913         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
914                             State.AllocateStack(XLenInBytes, StackAlign),
915                             VA1.getLocVT(), CCValAssign::Full));
916     State.addLoc(CCValAssign::getMem(
917         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
918         CCValAssign::Full));
919     return false;
920   }
921 
922   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
923     // The second half can also be passed via register.
924     State.addLoc(
925         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
926   } else {
927     // The second half is passed via the stack, without additional alignment.
928     State.addLoc(CCValAssign::getMem(
929         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
930         CCValAssign::Full));
931   }
932 
933   return false;
934 }
935 
936 // Implements the RISC-V calling convention. Returns true upon failure.
937 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
938                      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
939                      CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {
940   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
941   assert(XLen == 32 || XLen == 64);
942   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
943 
944   // Any return value split in to more than two values can't be returned
945   // directly.
946   if (IsRet && ValNo > 1)
947     return true;
948 
949   if (ValVT == MVT::f32) {
950     LocVT = XLenVT;
951     LocInfo = CCValAssign::BCvt;
952   } else if (XLen == 64 && ValVT == MVT::f64) {
953     LocVT = MVT::i64;
954     LocInfo = CCValAssign::BCvt;
955   }
956 
957   // If this is a variadic argument, the RISC-V calling convention requires
958   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
959   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
960   // be used regardless of whether the original argument was split during
961   // legalisation or not. The argument will not be passed by registers if the
962   // original type is larger than 2*XLEN, so the register alignment rule does
963   // not apply.
964   unsigned TwoXLenInBytes = (2 * XLen) / 8;
965   if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
966       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
967     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
968     // Skip 'odd' register if necessary.
969     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
970       State.AllocateReg(ArgGPRs);
971   }
972 
973   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
974   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
975       State.getPendingArgFlags();
976 
977   assert(PendingLocs.size() == PendingArgFlags.size() &&
978          "PendingLocs and PendingArgFlags out of sync");
979 
980   // Handle passing f64 on RV32D with a soft float ABI.
981   if (XLen == 32 && ValVT == MVT::f64) {
982     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
983            "Can't lower f64 if it is split");
984     // Depending on available argument GPRS, f64 may be passed in a pair of
985     // GPRs, split between a GPR and the stack, or passed completely on the
986     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
987     // cases.
988     unsigned Reg = State.AllocateReg(ArgGPRs);
989     LocVT = MVT::i32;
990     if (!Reg) {
991       unsigned StackOffset = State.AllocateStack(8, 8);
992       State.addLoc(
993           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
994       return false;
995     }
996     if (!State.AllocateReg(ArgGPRs))
997       State.AllocateStack(4, 4);
998     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
999     return false;
1000   }
1001 
1002   // Split arguments might be passed indirectly, so keep track of the pending
1003   // values.
1004   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
1005     LocVT = XLenVT;
1006     LocInfo = CCValAssign::Indirect;
1007     PendingLocs.push_back(
1008         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
1009     PendingArgFlags.push_back(ArgFlags);
1010     if (!ArgFlags.isSplitEnd()) {
1011       return false;
1012     }
1013   }
1014 
1015   // If the split argument only had two elements, it should be passed directly
1016   // in registers or on the stack.
1017   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
1018     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
1019     // Apply the normal calling convention rules to the first half of the
1020     // split argument.
1021     CCValAssign VA = PendingLocs[0];
1022     ISD::ArgFlagsTy AF = PendingArgFlags[0];
1023     PendingLocs.clear();
1024     PendingArgFlags.clear();
1025     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
1026                                ArgFlags);
1027   }
1028 
1029   // Allocate to a register if possible, or else a stack slot.
1030   unsigned Reg = State.AllocateReg(ArgGPRs);
1031   unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
1032 
1033   // If we reach this point and PendingLocs is non-empty, we must be at the
1034   // end of a split argument that must be passed indirectly.
1035   if (!PendingLocs.empty()) {
1036     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
1037     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
1038 
1039     for (auto &It : PendingLocs) {
1040       if (Reg)
1041         It.convertToReg(Reg);
1042       else
1043         It.convertToMem(StackOffset);
1044       State.addLoc(It);
1045     }
1046     PendingLocs.clear();
1047     PendingArgFlags.clear();
1048     return false;
1049   }
1050 
1051   assert(LocVT == XLenVT && "Expected an XLenVT at this stage");
1052 
1053   if (Reg) {
1054     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1055     return false;
1056   }
1057 
1058   // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
1059   if (ValVT == MVT::f32 || ValVT == MVT::f64) {
1060     LocVT = ValVT;
1061     LocInfo = CCValAssign::Full;
1062   }
1063   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1064   return false;
1065 }
1066 
1067 void RISCVTargetLowering::analyzeInputArgs(
1068     MachineFunction &MF, CCState &CCInfo,
1069     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
1070   unsigned NumArgs = Ins.size();
1071   FunctionType *FType = MF.getFunction().getFunctionType();
1072 
1073   for (unsigned i = 0; i != NumArgs; ++i) {
1074     MVT ArgVT = Ins[i].VT;
1075     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
1076 
1077     Type *ArgTy = nullptr;
1078     if (IsRet)
1079       ArgTy = FType->getReturnType();
1080     else if (Ins[i].isOrigArg())
1081       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
1082 
1083     if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
1084                  ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
1085       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
1086                         << EVT(ArgVT).getEVTString() << '\n');
1087       llvm_unreachable(nullptr);
1088     }
1089   }
1090 }
1091 
1092 void RISCVTargetLowering::analyzeOutputArgs(
1093     MachineFunction &MF, CCState &CCInfo,
1094     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
1095     CallLoweringInfo *CLI) const {
1096   unsigned NumArgs = Outs.size();
1097 
1098   for (unsigned i = 0; i != NumArgs; i++) {
1099     MVT ArgVT = Outs[i].VT;
1100     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1101     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
1102 
1103     if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
1104                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
1105       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
1106                         << EVT(ArgVT).getEVTString() << "\n");
1107       llvm_unreachable(nullptr);
1108     }
1109   }
1110 }
1111 
1112 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
1113 // values.
1114 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
1115                                    const CCValAssign &VA, const SDLoc &DL) {
1116   switch (VA.getLocInfo()) {
1117   default:
1118     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1119   case CCValAssign::Full:
1120     break;
1121   case CCValAssign::BCvt:
1122     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1123       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
1124       break;
1125     }
1126     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
1127     break;
1128   }
1129   return Val;
1130 }
1131 
1132 // The caller is responsible for loading the full value if the argument is
1133 // passed with CCValAssign::Indirect.
1134 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
1135                                 const CCValAssign &VA, const SDLoc &DL) {
1136   MachineFunction &MF = DAG.getMachineFunction();
1137   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1138   EVT LocVT = VA.getLocVT();
1139   SDValue Val;
1140 
1141   unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1142   RegInfo.addLiveIn(VA.getLocReg(), VReg);
1143   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1144 
1145   if (VA.getLocInfo() == CCValAssign::Indirect)
1146     return Val;
1147 
1148   return convertLocVTToValVT(DAG, Val, VA, DL);
1149 }
1150 
1151 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
1152                                    const CCValAssign &VA, const SDLoc &DL) {
1153   EVT LocVT = VA.getLocVT();
1154 
1155   switch (VA.getLocInfo()) {
1156   default:
1157     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1158   case CCValAssign::Full:
1159     break;
1160   case CCValAssign::BCvt:
1161     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1162       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
1163       break;
1164     }
1165     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
1166     break;
1167   }
1168   return Val;
1169 }
1170 
1171 // The caller is responsible for loading the full value if the argument is
1172 // passed with CCValAssign::Indirect.
1173 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
1174                                 const CCValAssign &VA, const SDLoc &DL) {
1175   MachineFunction &MF = DAG.getMachineFunction();
1176   MachineFrameInfo &MFI = MF.getFrameInfo();
1177   EVT LocVT = VA.getLocVT();
1178   EVT ValVT = VA.getValVT();
1179   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
1180   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1181                                  VA.getLocMemOffset(), /*Immutable=*/true);
1182   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1183   SDValue Val;
1184 
1185   ISD::LoadExtType ExtType;
1186   switch (VA.getLocInfo()) {
1187   default:
1188     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1189   case CCValAssign::Full:
1190   case CCValAssign::Indirect:
1191   case CCValAssign::BCvt:
1192     ExtType = ISD::NON_EXTLOAD;
1193     break;
1194   }
1195   Val = DAG.getExtLoad(
1196       ExtType, DL, LocVT, Chain, FIN,
1197       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
1198   return Val;
1199 }
1200 
1201 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
1202                                        const CCValAssign &VA, const SDLoc &DL) {
1203   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
1204          "Unexpected VA");
1205   MachineFunction &MF = DAG.getMachineFunction();
1206   MachineFrameInfo &MFI = MF.getFrameInfo();
1207   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1208 
1209   if (VA.isMemLoc()) {
1210     // f64 is passed on the stack.
1211     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
1212     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1213     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
1214                        MachinePointerInfo::getFixedStack(MF, FI));
1215   }
1216 
1217   assert(VA.isRegLoc() && "Expected register VA assignment");
1218 
1219   unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1220   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
1221   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
1222   SDValue Hi;
1223   if (VA.getLocReg() == RISCV::X17) {
1224     // Second half of f64 is passed on the stack.
1225     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
1226     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1227     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
1228                      MachinePointerInfo::getFixedStack(MF, FI));
1229   } else {
1230     // Second half of f64 is passed in another GPR.
1231     unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1232     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
1233     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
1234   }
1235   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1236 }
1237 
1238 // Transform physical registers into virtual registers.
1239 SDValue RISCVTargetLowering::LowerFormalArguments(
1240     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1241     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1242     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1243 
1244   switch (CallConv) {
1245   default:
1246     report_fatal_error("Unsupported calling convention");
1247   case CallingConv::C:
1248   case CallingConv::Fast:
1249     break;
1250   }
1251 
1252   MachineFunction &MF = DAG.getMachineFunction();
1253 
1254   const Function &Func = MF.getFunction();
1255   if (Func.hasFnAttribute("interrupt")) {
1256     if (!Func.arg_empty())
1257       report_fatal_error(
1258         "Functions with the interrupt attribute cannot have arguments!");
1259 
1260     StringRef Kind =
1261       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1262 
1263     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
1264       report_fatal_error(
1265         "Function interrupt attribute argument not supported!");
1266   }
1267 
1268   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1269   MVT XLenVT = Subtarget.getXLenVT();
1270   unsigned XLenInBytes = Subtarget.getXLen() / 8;
1271   // Used with vargs to acumulate store chains.
1272   std::vector<SDValue> OutChains;
1273 
1274   // Assign locations to all of the incoming arguments.
1275   SmallVector<CCValAssign, 16> ArgLocs;
1276   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1277   analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
1278 
1279   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1280     CCValAssign &VA = ArgLocs[i];
1281     SDValue ArgValue;
1282     // Passing f64 on RV32D with a soft float ABI must be handled as a special
1283     // case.
1284     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
1285       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
1286     else if (VA.isRegLoc())
1287       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
1288     else
1289       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
1290 
1291     if (VA.getLocInfo() == CCValAssign::Indirect) {
1292       // If the original argument was split and passed by reference (e.g. i128
1293       // on RV32), we need to load all parts of it here (using the same
1294       // address).
1295       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1296                                    MachinePointerInfo()));
1297       unsigned ArgIndex = Ins[i].OrigArgIndex;
1298       assert(Ins[i].PartOffset == 0);
1299       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
1300         CCValAssign &PartVA = ArgLocs[i + 1];
1301         unsigned PartOffset = Ins[i + 1].PartOffset;
1302         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1303                                       DAG.getIntPtrConstant(PartOffset, DL));
1304         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1305                                      MachinePointerInfo()));
1306         ++i;
1307       }
1308       continue;
1309     }
1310     InVals.push_back(ArgValue);
1311   }
1312 
1313   if (IsVarArg) {
1314     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
1315     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
1316     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1317     MachineFrameInfo &MFI = MF.getFrameInfo();
1318     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1319     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1320 
1321     // Offset of the first variable argument from stack pointer, and size of
1322     // the vararg save area. For now, the varargs save area is either zero or
1323     // large enough to hold a0-a7.
1324     int VaArgOffset, VarArgsSaveSize;
1325 
1326     // If all registers are allocated, then all varargs must be passed on the
1327     // stack and we don't need to save any argregs.
1328     if (ArgRegs.size() == Idx) {
1329       VaArgOffset = CCInfo.getNextStackOffset();
1330       VarArgsSaveSize = 0;
1331     } else {
1332       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
1333       VaArgOffset = -VarArgsSaveSize;
1334     }
1335 
1336     // Record the frame index of the first variable argument
1337     // which is a value necessary to VASTART.
1338     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1339     RVFI->setVarArgsFrameIndex(FI);
1340 
1341     // If saving an odd number of registers then create an extra stack slot to
1342     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
1343     // offsets to even-numbered registered remain 2*XLEN-aligned.
1344     if (Idx % 2) {
1345       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes,
1346                                  true);
1347       VarArgsSaveSize += XLenInBytes;
1348     }
1349 
1350     // Copy the integer registers that may have been used for passing varargs
1351     // to the vararg save area.
1352     for (unsigned I = Idx; I < ArgRegs.size();
1353          ++I, VaArgOffset += XLenInBytes) {
1354       const unsigned Reg = RegInfo.createVirtualRegister(RC);
1355       RegInfo.addLiveIn(ArgRegs[I], Reg);
1356       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
1357       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1358       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1359       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
1360                                    MachinePointerInfo::getFixedStack(MF, FI));
1361       cast<StoreSDNode>(Store.getNode())
1362           ->getMemOperand()
1363           ->setValue((Value *)nullptr);
1364       OutChains.push_back(Store);
1365     }
1366     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
1367   }
1368 
1369   // All stores are grouped in one node to allow the matching between
1370   // the size of Ins and InVals. This only happens for vararg functions.
1371   if (!OutChains.empty()) {
1372     OutChains.push_back(Chain);
1373     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1374   }
1375 
1376   return Chain;
1377 }
1378 
1379 /// isEligibleForTailCallOptimization - Check whether the call is eligible
1380 /// for tail call optimization.
1381 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
1382 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
1383     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
1384     const SmallVector<CCValAssign, 16> &ArgLocs) const {
1385 
1386   auto &Callee = CLI.Callee;
1387   auto CalleeCC = CLI.CallConv;
1388   auto IsVarArg = CLI.IsVarArg;
1389   auto &Outs = CLI.Outs;
1390   auto &Caller = MF.getFunction();
1391   auto CallerCC = Caller.getCallingConv();
1392 
1393   // Do not tail call opt functions with "disable-tail-calls" attribute.
1394   if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
1395     return false;
1396 
1397   // Exception-handling functions need a special set of instructions to
1398   // indicate a return to the hardware. Tail-calling another function would
1399   // probably break this.
1400   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
1401   // should be expanded as new function attributes are introduced.
1402   if (Caller.hasFnAttribute("interrupt"))
1403     return false;
1404 
1405   // Do not tail call opt functions with varargs.
1406   if (IsVarArg)
1407     return false;
1408 
1409   // Do not tail call opt if the stack is used to pass parameters.
1410   if (CCInfo.getNextStackOffset() != 0)
1411     return false;
1412 
1413   // Do not tail call opt if any parameters need to be passed indirectly.
1414   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
1415   // passed indirectly. So the address of the value will be passed in a
1416   // register, or if not available, then the address is put on the stack. In
1417   // order to pass indirectly, space on the stack often needs to be allocated
1418   // in order to store the value. In this case the CCInfo.getNextStackOffset()
1419   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
1420   // are passed CCValAssign::Indirect.
1421   for (auto &VA : ArgLocs)
1422     if (VA.getLocInfo() == CCValAssign::Indirect)
1423       return false;
1424 
1425   // Do not tail call opt if either caller or callee uses struct return
1426   // semantics.
1427   auto IsCallerStructRet = Caller.hasStructRetAttr();
1428   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
1429   if (IsCallerStructRet || IsCalleeStructRet)
1430     return false;
1431 
1432   // Externally-defined functions with weak linkage should not be
1433   // tail-called. The behaviour of branch instructions in this situation (as
1434   // used for tail calls) is implementation-defined, so we cannot rely on the
1435   // linker replacing the tail call with a return.
1436   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1437     const GlobalValue *GV = G->getGlobal();
1438     if (GV->hasExternalWeakLinkage())
1439       return false;
1440   }
1441 
1442   // The callee has to preserve all registers the caller needs to preserve.
1443   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
1444   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1445   if (CalleeCC != CallerCC) {
1446     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1447     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1448       return false;
1449   }
1450 
1451   // Byval parameters hand the function a pointer directly into the stack area
1452   // we want to reuse during a tail call. Working around this *is* possible
1453   // but less efficient and uglier in LowerCall.
1454   for (auto &Arg : Outs)
1455     if (Arg.Flags.isByVal())
1456       return false;
1457 
1458   return true;
1459 }
1460 
1461 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
1462 // and output parameter nodes.
1463 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1464                                        SmallVectorImpl<SDValue> &InVals) const {
1465   SelectionDAG &DAG = CLI.DAG;
1466   SDLoc &DL = CLI.DL;
1467   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1468   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1469   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1470   SDValue Chain = CLI.Chain;
1471   SDValue Callee = CLI.Callee;
1472   bool &IsTailCall = CLI.IsTailCall;
1473   CallingConv::ID CallConv = CLI.CallConv;
1474   bool IsVarArg = CLI.IsVarArg;
1475   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1476   MVT XLenVT = Subtarget.getXLenVT();
1477 
1478   MachineFunction &MF = DAG.getMachineFunction();
1479 
1480   // Analyze the operands of the call, assigning locations to each operand.
1481   SmallVector<CCValAssign, 16> ArgLocs;
1482   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1483   analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
1484 
1485   // Check if it's really possible to do a tail call.
1486   if (IsTailCall)
1487     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
1488 
1489   if (IsTailCall)
1490     ++NumTailCalls;
1491   else if (CLI.CS && CLI.CS.isMustTailCall())
1492     report_fatal_error("failed to perform tail call elimination on a call "
1493                        "site marked musttail");
1494 
1495   // Get a count of how many bytes are to be pushed on the stack.
1496   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1497 
1498   // Create local copies for byval args
1499   SmallVector<SDValue, 8> ByValArgs;
1500   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1501     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1502     if (!Flags.isByVal())
1503       continue;
1504 
1505     SDValue Arg = OutVals[i];
1506     unsigned Size = Flags.getByValSize();
1507     unsigned Align = Flags.getByValAlign();
1508 
1509     int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false);
1510     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1511     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
1512 
1513     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
1514                           /*IsVolatile=*/false,
1515                           /*AlwaysInline=*/false,
1516                           IsTailCall, MachinePointerInfo(),
1517                           MachinePointerInfo());
1518     ByValArgs.push_back(FIPtr);
1519   }
1520 
1521   if (!IsTailCall)
1522     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
1523 
1524   // Copy argument values to their designated locations.
1525   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
1526   SmallVector<SDValue, 8> MemOpChains;
1527   SDValue StackPtr;
1528   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
1529     CCValAssign &VA = ArgLocs[i];
1530     SDValue ArgValue = OutVals[i];
1531     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1532 
1533     // Handle passing f64 on RV32D with a soft float ABI as a special case.
1534     bool IsF64OnRV32DSoftABI =
1535         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
1536     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
1537       SDValue SplitF64 = DAG.getNode(
1538           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
1539       SDValue Lo = SplitF64.getValue(0);
1540       SDValue Hi = SplitF64.getValue(1);
1541 
1542       unsigned RegLo = VA.getLocReg();
1543       RegsToPass.push_back(std::make_pair(RegLo, Lo));
1544 
1545       if (RegLo == RISCV::X17) {
1546         // Second half of f64 is passed on the stack.
1547         // Work out the address of the stack slot.
1548         if (!StackPtr.getNode())
1549           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1550         // Emit the store.
1551         MemOpChains.push_back(
1552             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
1553       } else {
1554         // Second half of f64 is passed in another GPR.
1555         unsigned RegHigh = RegLo + 1;
1556         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
1557       }
1558       continue;
1559     }
1560 
1561     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
1562     // as any other MemLoc.
1563 
1564     // Promote the value if needed.
1565     // For now, only handle fully promoted and indirect arguments.
1566     if (VA.getLocInfo() == CCValAssign::Indirect) {
1567       // Store the argument in a stack slot and pass its address.
1568       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
1569       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1570       MemOpChains.push_back(
1571           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1572                        MachinePointerInfo::getFixedStack(MF, FI)));
1573       // If the original argument was split (e.g. i128), we need
1574       // to store all parts of it here (and pass just one address).
1575       unsigned ArgIndex = Outs[i].OrigArgIndex;
1576       assert(Outs[i].PartOffset == 0);
1577       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
1578         SDValue PartValue = OutVals[i + 1];
1579         unsigned PartOffset = Outs[i + 1].PartOffset;
1580         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1581                                       DAG.getIntPtrConstant(PartOffset, DL));
1582         MemOpChains.push_back(
1583             DAG.getStore(Chain, DL, PartValue, Address,
1584                          MachinePointerInfo::getFixedStack(MF, FI)));
1585         ++i;
1586       }
1587       ArgValue = SpillSlot;
1588     } else {
1589       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
1590     }
1591 
1592     // Use local copy if it is a byval arg.
1593     if (Flags.isByVal())
1594       ArgValue = ByValArgs[j++];
1595 
1596     if (VA.isRegLoc()) {
1597       // Queue up the argument copies and emit them at the end.
1598       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1599     } else {
1600       assert(VA.isMemLoc() && "Argument not register or memory");
1601       assert(!IsTailCall && "Tail call not allowed if stack is used "
1602                             "for passing parameters");
1603 
1604       // Work out the address of the stack slot.
1605       if (!StackPtr.getNode())
1606         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1607       SDValue Address =
1608           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1609                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
1610 
1611       // Emit the store.
1612       MemOpChains.push_back(
1613           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1614     }
1615   }
1616 
1617   // Join the stores, which are independent of one another.
1618   if (!MemOpChains.empty())
1619     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1620 
1621   SDValue Glue;
1622 
1623   // Build a sequence of copy-to-reg nodes, chained and glued together.
1624   for (auto &Reg : RegsToPass) {
1625     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
1626     Glue = Chain.getValue(1);
1627   }
1628 
1629   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
1630   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
1631   // split it and then direct call can be matched by PseudoCALL.
1632   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
1633     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0);
1634   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1635     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0);
1636   }
1637 
1638   // The first call operand is the chain and the second is the target address.
1639   SmallVector<SDValue, 8> Ops;
1640   Ops.push_back(Chain);
1641   Ops.push_back(Callee);
1642 
1643   // Add argument registers to the end of the list so that they are
1644   // known live into the call.
1645   for (auto &Reg : RegsToPass)
1646     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1647 
1648   if (!IsTailCall) {
1649     // Add a register mask operand representing the call-preserved registers.
1650     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1651     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1652     assert(Mask && "Missing call preserved mask for calling convention");
1653     Ops.push_back(DAG.getRegisterMask(Mask));
1654   }
1655 
1656   // Glue the call to the argument copies, if any.
1657   if (Glue.getNode())
1658     Ops.push_back(Glue);
1659 
1660   // Emit the call.
1661   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1662 
1663   if (IsTailCall) {
1664     MF.getFrameInfo().setHasTailCall();
1665     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
1666   }
1667 
1668   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
1669   Glue = Chain.getValue(1);
1670 
1671   // Mark the end of the call, which is glued to the call itself.
1672   Chain = DAG.getCALLSEQ_END(Chain,
1673                              DAG.getConstant(NumBytes, DL, PtrVT, true),
1674                              DAG.getConstant(0, DL, PtrVT, true),
1675                              Glue, DL);
1676   Glue = Chain.getValue(1);
1677 
1678   // Assign locations to each value returned by this call.
1679   SmallVector<CCValAssign, 16> RVLocs;
1680   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
1681   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
1682 
1683   // Copy all of the result registers out of their specified physreg.
1684   for (auto &VA : RVLocs) {
1685     // Copy the value out
1686     SDValue RetValue =
1687         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
1688     // Glue the RetValue to the end of the call sequence
1689     Chain = RetValue.getValue(1);
1690     Glue = RetValue.getValue(2);
1691 
1692     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1693       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
1694       SDValue RetValue2 =
1695           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
1696       Chain = RetValue2.getValue(1);
1697       Glue = RetValue2.getValue(2);
1698       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
1699                              RetValue2);
1700     }
1701 
1702     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
1703 
1704     InVals.push_back(RetValue);
1705   }
1706 
1707   return Chain;
1708 }
1709 
1710 bool RISCVTargetLowering::CanLowerReturn(
1711     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
1712     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
1713   SmallVector<CCValAssign, 16> RVLocs;
1714   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
1715   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1716     MVT VT = Outs[i].VT;
1717     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1718     if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags,
1719                  CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
1720       return false;
1721   }
1722   return true;
1723 }
1724 
1725 SDValue
1726 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1727                                  bool IsVarArg,
1728                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
1729                                  const SmallVectorImpl<SDValue> &OutVals,
1730                                  const SDLoc &DL, SelectionDAG &DAG) const {
1731   // Stores the assignment of the return value to a location.
1732   SmallVector<CCValAssign, 16> RVLocs;
1733 
1734   // Info about the registers and stack slot.
1735   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
1736                  *DAG.getContext());
1737 
1738   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
1739                     nullptr);
1740 
1741   SDValue Glue;
1742   SmallVector<SDValue, 4> RetOps(1, Chain);
1743 
1744   // Copy the result values into the output registers.
1745   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
1746     SDValue Val = OutVals[i];
1747     CCValAssign &VA = RVLocs[i];
1748     assert(VA.isRegLoc() && "Can only return in registers!");
1749 
1750     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1751       // Handle returning f64 on RV32D with a soft float ABI.
1752       assert(VA.isRegLoc() && "Expected return via registers");
1753       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
1754                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
1755       SDValue Lo = SplitF64.getValue(0);
1756       SDValue Hi = SplitF64.getValue(1);
1757       unsigned RegLo = VA.getLocReg();
1758       unsigned RegHi = RegLo + 1;
1759       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
1760       Glue = Chain.getValue(1);
1761       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
1762       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
1763       Glue = Chain.getValue(1);
1764       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
1765     } else {
1766       // Handle a 'normal' return.
1767       Val = convertValVTToLocVT(DAG, Val, VA, DL);
1768       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
1769 
1770       // Guarantee that all emitted copies are stuck together.
1771       Glue = Chain.getValue(1);
1772       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
1773     }
1774   }
1775 
1776   RetOps[0] = Chain; // Update chain.
1777 
1778   // Add the glue node if we have it.
1779   if (Glue.getNode()) {
1780     RetOps.push_back(Glue);
1781   }
1782 
1783   // Interrupt service routines use different return instructions.
1784   const Function &Func = DAG.getMachineFunction().getFunction();
1785   if (Func.hasFnAttribute("interrupt")) {
1786     if (!Func.getReturnType()->isVoidTy())
1787       report_fatal_error(
1788           "Functions with the interrupt attribute must have void return type!");
1789 
1790     MachineFunction &MF = DAG.getMachineFunction();
1791     StringRef Kind =
1792       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1793 
1794     unsigned RetOpc;
1795     if (Kind == "user")
1796       RetOpc = RISCVISD::URET_FLAG;
1797     else if (Kind == "supervisor")
1798       RetOpc = RISCVISD::SRET_FLAG;
1799     else
1800       RetOpc = RISCVISD::MRET_FLAG;
1801 
1802     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
1803   }
1804 
1805   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
1806 }
1807 
1808 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
1809   switch ((RISCVISD::NodeType)Opcode) {
1810   case RISCVISD::FIRST_NUMBER:
1811     break;
1812   case RISCVISD::RET_FLAG:
1813     return "RISCVISD::RET_FLAG";
1814   case RISCVISD::URET_FLAG:
1815     return "RISCVISD::URET_FLAG";
1816   case RISCVISD::SRET_FLAG:
1817     return "RISCVISD::SRET_FLAG";
1818   case RISCVISD::MRET_FLAG:
1819     return "RISCVISD::MRET_FLAG";
1820   case RISCVISD::CALL:
1821     return "RISCVISD::CALL";
1822   case RISCVISD::SELECT_CC:
1823     return "RISCVISD::SELECT_CC";
1824   case RISCVISD::BuildPairF64:
1825     return "RISCVISD::BuildPairF64";
1826   case RISCVISD::SplitF64:
1827     return "RISCVISD::SplitF64";
1828   case RISCVISD::TAIL:
1829     return "RISCVISD::TAIL";
1830   case RISCVISD::SLLW:
1831     return "RISCVISD::SLLW";
1832   case RISCVISD::SRAW:
1833     return "RISCVISD::SRAW";
1834   case RISCVISD::SRLW:
1835     return "RISCVISD::SRLW";
1836   case RISCVISD::DIVW:
1837     return "RISCVISD::DIVW";
1838   case RISCVISD::DIVUW:
1839     return "RISCVISD::DIVUW";
1840   case RISCVISD::REMUW:
1841     return "RISCVISD::REMUW";
1842   case RISCVISD::FMV_W_X_RV64:
1843     return "RISCVISD::FMV_W_X_RV64";
1844   case RISCVISD::FMV_X_ANYEXTW_RV64:
1845     return "RISCVISD::FMV_X_ANYEXTW_RV64";
1846   }
1847   return nullptr;
1848 }
1849 
1850 std::pair<unsigned, const TargetRegisterClass *>
1851 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1852                                                   StringRef Constraint,
1853                                                   MVT VT) const {
1854   // First, see if this is a constraint that directly corresponds to a
1855   // RISCV register class.
1856   if (Constraint.size() == 1) {
1857     switch (Constraint[0]) {
1858     case 'r':
1859       return std::make_pair(0U, &RISCV::GPRRegClass);
1860     default:
1861       break;
1862     }
1863   }
1864 
1865   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1866 }
1867 
1868 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
1869                                                    Instruction *Inst,
1870                                                    AtomicOrdering Ord) const {
1871   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
1872     return Builder.CreateFence(Ord);
1873   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
1874     return Builder.CreateFence(AtomicOrdering::Release);
1875   return nullptr;
1876 }
1877 
1878 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
1879                                                     Instruction *Inst,
1880                                                     AtomicOrdering Ord) const {
1881   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
1882     return Builder.CreateFence(AtomicOrdering::Acquire);
1883   return nullptr;
1884 }
1885 
1886 TargetLowering::AtomicExpansionKind
1887 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1888   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
1889   // point operations can't be used in an lr/sc sequence without breaking the
1890   // forward-progress guarantee.
1891   if (AI->isFloatingPointOperation())
1892     return AtomicExpansionKind::CmpXChg;
1893 
1894   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
1895   if (Size == 8 || Size == 16)
1896     return AtomicExpansionKind::MaskedIntrinsic;
1897   return AtomicExpansionKind::None;
1898 }
1899 
1900 static Intrinsic::ID
1901 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
1902   if (XLen == 32) {
1903     switch (BinOp) {
1904     default:
1905       llvm_unreachable("Unexpected AtomicRMW BinOp");
1906     case AtomicRMWInst::Xchg:
1907       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
1908     case AtomicRMWInst::Add:
1909       return Intrinsic::riscv_masked_atomicrmw_add_i32;
1910     case AtomicRMWInst::Sub:
1911       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
1912     case AtomicRMWInst::Nand:
1913       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
1914     case AtomicRMWInst::Max:
1915       return Intrinsic::riscv_masked_atomicrmw_max_i32;
1916     case AtomicRMWInst::Min:
1917       return Intrinsic::riscv_masked_atomicrmw_min_i32;
1918     case AtomicRMWInst::UMax:
1919       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
1920     case AtomicRMWInst::UMin:
1921       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
1922     }
1923   }
1924 
1925   if (XLen == 64) {
1926     switch (BinOp) {
1927     default:
1928       llvm_unreachable("Unexpected AtomicRMW BinOp");
1929     case AtomicRMWInst::Xchg:
1930       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
1931     case AtomicRMWInst::Add:
1932       return Intrinsic::riscv_masked_atomicrmw_add_i64;
1933     case AtomicRMWInst::Sub:
1934       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
1935     case AtomicRMWInst::Nand:
1936       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
1937     case AtomicRMWInst::Max:
1938       return Intrinsic::riscv_masked_atomicrmw_max_i64;
1939     case AtomicRMWInst::Min:
1940       return Intrinsic::riscv_masked_atomicrmw_min_i64;
1941     case AtomicRMWInst::UMax:
1942       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
1943     case AtomicRMWInst::UMin:
1944       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
1945     }
1946   }
1947 
1948   llvm_unreachable("Unexpected XLen\n");
1949 }
1950 
1951 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
1952     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
1953     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
1954   unsigned XLen = Subtarget.getXLen();
1955   Value *Ordering =
1956       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
1957   Type *Tys[] = {AlignedAddr->getType()};
1958   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
1959       AI->getModule(),
1960       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
1961 
1962   if (XLen == 64) {
1963     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
1964     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
1965     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
1966   }
1967 
1968   Value *Result;
1969 
1970   // Must pass the shift amount needed to sign extend the loaded value prior
1971   // to performing a signed comparison for min/max. ShiftAmt is the number of
1972   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
1973   // is the number of bits to left+right shift the value in order to
1974   // sign-extend.
1975   if (AI->getOperation() == AtomicRMWInst::Min ||
1976       AI->getOperation() == AtomicRMWInst::Max) {
1977     const DataLayout &DL = AI->getModule()->getDataLayout();
1978     unsigned ValWidth =
1979         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
1980     Value *SextShamt =
1981         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
1982     Result = Builder.CreateCall(LrwOpScwLoop,
1983                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
1984   } else {
1985     Result =
1986         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
1987   }
1988 
1989   if (XLen == 64)
1990     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
1991   return Result;
1992 }
1993 
1994 TargetLowering::AtomicExpansionKind
1995 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
1996     AtomicCmpXchgInst *CI) const {
1997   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
1998   if (Size == 8 || Size == 16)
1999     return AtomicExpansionKind::MaskedIntrinsic;
2000   return AtomicExpansionKind::None;
2001 }
2002 
2003 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2004     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2005     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2006   unsigned XLen = Subtarget.getXLen();
2007   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
2008   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
2009   if (XLen == 64) {
2010     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
2011     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
2012     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2013     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
2014   }
2015   Type *Tys[] = {AlignedAddr->getType()};
2016   Function *MaskedCmpXchg =
2017       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
2018   Value *Result = Builder.CreateCall(
2019       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
2020   if (XLen == 64)
2021     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2022   return Result;
2023 }
2024