1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/CodeGen/ValueTypes.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/IR/DiagnosticPrinter.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/raw_ostream.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "riscv-lower"
38 
39 STATISTIC(NumTailCalls, "Number of tail calls");
40 
41 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
42                                          const RISCVSubtarget &STI)
43     : TargetLowering(TM), Subtarget(STI) {
44 
45   MVT XLenVT = Subtarget.getXLenVT();
46 
47   // Set up the register classes.
48   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
49 
50   if (Subtarget.hasStdExtF())
51     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
52   if (Subtarget.hasStdExtD())
53     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
54 
55   // Compute derived properties from the register classes.
56   computeRegisterProperties(STI.getRegisterInfo());
57 
58   setStackPointerRegisterToSaveRestore(RISCV::X2);
59 
60   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
61     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
62 
63   // TODO: add all necessary setOperationAction calls.
64   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
65 
66   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
67   setOperationAction(ISD::BR_CC, XLenVT, Expand);
68   setOperationAction(ISD::SELECT, XLenVT, Custom);
69   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
70 
71   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
72   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
73 
74   setOperationAction(ISD::VASTART, MVT::Other, Custom);
75   setOperationAction(ISD::VAARG, MVT::Other, Expand);
76   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
77   setOperationAction(ISD::VAEND, MVT::Other, Expand);
78 
79   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
80     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
81 
82   if (Subtarget.is64Bit()) {
83     setOperationAction(ISD::SHL, MVT::i32, Custom);
84     setOperationAction(ISD::SRA, MVT::i32, Custom);
85     setOperationAction(ISD::SRL, MVT::i32, Custom);
86   }
87 
88   if (!Subtarget.hasStdExtM()) {
89     setOperationAction(ISD::MUL, XLenVT, Expand);
90     setOperationAction(ISD::MULHS, XLenVT, Expand);
91     setOperationAction(ISD::MULHU, XLenVT, Expand);
92     setOperationAction(ISD::SDIV, XLenVT, Expand);
93     setOperationAction(ISD::UDIV, XLenVT, Expand);
94     setOperationAction(ISD::SREM, XLenVT, Expand);
95     setOperationAction(ISD::UREM, XLenVT, Expand);
96   }
97 
98   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
99     setOperationAction(ISD::SDIV, MVT::i32, Custom);
100     setOperationAction(ISD::UDIV, MVT::i32, Custom);
101     setOperationAction(ISD::UREM, MVT::i32, Custom);
102   }
103 
104   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
105   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
106   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
107   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
108 
109   setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
110   setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
111   setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
112 
113   setOperationAction(ISD::ROTL, XLenVT, Expand);
114   setOperationAction(ISD::ROTR, XLenVT, Expand);
115   setOperationAction(ISD::BSWAP, XLenVT, Expand);
116   setOperationAction(ISD::CTTZ, XLenVT, Expand);
117   setOperationAction(ISD::CTLZ, XLenVT, Expand);
118   setOperationAction(ISD::CTPOP, XLenVT, Expand);
119 
120   ISD::CondCode FPCCToExtend[] = {
121       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO,   ISD::SETUEQ,
122       ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
123       ISD::SETGT,  ISD::SETGE,  ISD::SETNE};
124 
125   ISD::NodeType FPOpToExtend[] = {
126       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
127 
128   if (Subtarget.hasStdExtF()) {
129     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
130     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
131     for (auto CC : FPCCToExtend)
132       setCondCodeAction(CC, MVT::f32, Expand);
133     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
134     setOperationAction(ISD::SELECT, MVT::f32, Custom);
135     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
136     for (auto Op : FPOpToExtend)
137       setOperationAction(Op, MVT::f32, Expand);
138   }
139 
140   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
141     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
142 
143   if (Subtarget.hasStdExtD()) {
144     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
145     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
146     for (auto CC : FPCCToExtend)
147       setCondCodeAction(CC, MVT::f64, Expand);
148     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
149     setOperationAction(ISD::SELECT, MVT::f64, Custom);
150     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
151     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
152     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
153     for (auto Op : FPOpToExtend)
154       setOperationAction(Op, MVT::f64, Expand);
155   }
156 
157   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
158   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
159   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
160 
161   if (Subtarget.hasStdExtA()) {
162     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
163     setMinCmpXchgSizeInBits(32);
164   } else {
165     setMaxAtomicSizeInBitsSupported(0);
166   }
167 
168   setBooleanContents(ZeroOrOneBooleanContent);
169 
170   // Function alignments (log2).
171   unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2;
172   setMinFunctionAlignment(FunctionAlignment);
173   setPrefFunctionAlignment(FunctionAlignment);
174 
175   // Effectively disable jump table generation.
176   setMinimumJumpTableEntries(INT_MAX);
177 }
178 
179 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
180                                             EVT VT) const {
181   if (!VT.isVector())
182     return getPointerTy(DL);
183   return VT.changeVectorElementTypeToInteger();
184 }
185 
186 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
187                                              const CallInst &I,
188                                              MachineFunction &MF,
189                                              unsigned Intrinsic) const {
190   switch (Intrinsic) {
191   default:
192     return false;
193   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
194   case Intrinsic::riscv_masked_atomicrmw_add_i32:
195   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
196   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
197   case Intrinsic::riscv_masked_atomicrmw_max_i32:
198   case Intrinsic::riscv_masked_atomicrmw_min_i32:
199   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
200   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
201   case Intrinsic::riscv_masked_cmpxchg_i32:
202     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
203     Info.opc = ISD::INTRINSIC_W_CHAIN;
204     Info.memVT = MVT::getVT(PtrTy->getElementType());
205     Info.ptrVal = I.getArgOperand(0);
206     Info.offset = 0;
207     Info.align = 4;
208     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
209                  MachineMemOperand::MOVolatile;
210     return true;
211   }
212 }
213 
214 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
215                                                 const AddrMode &AM, Type *Ty,
216                                                 unsigned AS,
217                                                 Instruction *I) const {
218   // No global is ever allowed as a base.
219   if (AM.BaseGV)
220     return false;
221 
222   // Require a 12-bit signed offset.
223   if (!isInt<12>(AM.BaseOffs))
224     return false;
225 
226   switch (AM.Scale) {
227   case 0: // "r+i" or just "i", depending on HasBaseReg.
228     break;
229   case 1:
230     if (!AM.HasBaseReg) // allow "r+i".
231       break;
232     return false; // disallow "r+r" or "r+r+i".
233   default:
234     return false;
235   }
236 
237   return true;
238 }
239 
240 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
241   return isInt<12>(Imm);
242 }
243 
244 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
245   return isInt<12>(Imm);
246 }
247 
248 // On RV32, 64-bit integers are split into their high and low parts and held
249 // in two different registers, so the trunc is free since the low register can
250 // just be used.
251 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
252   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
253     return false;
254   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
255   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
256   return (SrcBits == 64 && DestBits == 32);
257 }
258 
259 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
260   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
261       !SrcVT.isInteger() || !DstVT.isInteger())
262     return false;
263   unsigned SrcBits = SrcVT.getSizeInBits();
264   unsigned DestBits = DstVT.getSizeInBits();
265   return (SrcBits == 64 && DestBits == 32);
266 }
267 
268 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
269   // Zexts are free if they can be combined with a load.
270   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
271     EVT MemVT = LD->getMemoryVT();
272     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
273          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
274         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
275          LD->getExtensionType() == ISD::ZEXTLOAD))
276       return true;
277   }
278 
279   return TargetLowering::isZExtFree(Val, VT2);
280 }
281 
282 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
283   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
284 }
285 
286 // Changes the condition code and swaps operands if necessary, so the SetCC
287 // operation matches one of the comparisons supported directly in the RISC-V
288 // ISA.
289 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
290   switch (CC) {
291   default:
292     break;
293   case ISD::SETGT:
294   case ISD::SETLE:
295   case ISD::SETUGT:
296   case ISD::SETULE:
297     CC = ISD::getSetCCSwappedOperands(CC);
298     std::swap(LHS, RHS);
299     break;
300   }
301 }
302 
303 // Return the RISC-V branch opcode that matches the given DAG integer
304 // condition code. The CondCode must be one of those supported by the RISC-V
305 // ISA (see normaliseSetCC).
306 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
307   switch (CC) {
308   default:
309     llvm_unreachable("Unsupported CondCode");
310   case ISD::SETEQ:
311     return RISCV::BEQ;
312   case ISD::SETNE:
313     return RISCV::BNE;
314   case ISD::SETLT:
315     return RISCV::BLT;
316   case ISD::SETGE:
317     return RISCV::BGE;
318   case ISD::SETULT:
319     return RISCV::BLTU;
320   case ISD::SETUGE:
321     return RISCV::BGEU;
322   }
323 }
324 
325 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
326                                             SelectionDAG &DAG) const {
327   switch (Op.getOpcode()) {
328   default:
329     report_fatal_error("unimplemented operand");
330   case ISD::GlobalAddress:
331     return lowerGlobalAddress(Op, DAG);
332   case ISD::BlockAddress:
333     return lowerBlockAddress(Op, DAG);
334   case ISD::ConstantPool:
335     return lowerConstantPool(Op, DAG);
336   case ISD::SELECT:
337     return lowerSELECT(Op, DAG);
338   case ISD::VASTART:
339     return lowerVASTART(Op, DAG);
340   case ISD::FRAMEADDR:
341     return lowerFRAMEADDR(Op, DAG);
342   case ISD::RETURNADDR:
343     return lowerRETURNADDR(Op, DAG);
344   case ISD::BITCAST: {
345     assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
346            "Unexpected custom legalisation");
347     SDLoc DL(Op);
348     SDValue Op0 = Op.getOperand(0);
349     if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
350       return SDValue();
351     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
352     SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
353     return FPConv;
354   }
355   }
356 }
357 
358 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
359                                                 SelectionDAG &DAG) const {
360   SDLoc DL(Op);
361   EVT Ty = Op.getValueType();
362   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
363   const GlobalValue *GV = N->getGlobal();
364   int64_t Offset = N->getOffset();
365   MVT XLenVT = Subtarget.getXLenVT();
366 
367   if (isPositionIndependent())
368     report_fatal_error("Unable to lowerGlobalAddress");
369   // In order to maximise the opportunity for common subexpression elimination,
370   // emit a separate ADD node for the global address offset instead of folding
371   // it in the global address node. Later peephole optimisations may choose to
372   // fold it back in when profitable.
373   SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI);
374   SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);
375   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
376   SDValue MNLo =
377     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
378   if (Offset != 0)
379     return DAG.getNode(ISD::ADD, DL, Ty, MNLo,
380                        DAG.getConstant(Offset, DL, XLenVT));
381   return MNLo;
382 }
383 
384 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
385                                                SelectionDAG &DAG) const {
386   SDLoc DL(Op);
387   EVT Ty = Op.getValueType();
388   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
389   const BlockAddress *BA = N->getBlockAddress();
390   int64_t Offset = N->getOffset();
391 
392   if (isPositionIndependent())
393     report_fatal_error("Unable to lowerBlockAddress");
394 
395   SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
396   SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO);
397   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0);
398   SDValue MNLo =
399     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0);
400   return MNLo;
401 }
402 
403 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
404                                                SelectionDAG &DAG) const {
405   SDLoc DL(Op);
406   EVT Ty = Op.getValueType();
407   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
408   const Constant *CPA = N->getConstVal();
409   int64_t Offset = N->getOffset();
410   unsigned Alignment = N->getAlignment();
411 
412   if (!isPositionIndependent()) {
413     SDValue CPAHi =
414         DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI);
415     SDValue CPALo =
416         DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO);
417     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0);
418     SDValue MNLo =
419         SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0);
420     return MNLo;
421   } else {
422     report_fatal_error("Unable to lowerConstantPool");
423   }
424 }
425 
426 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
427   SDValue CondV = Op.getOperand(0);
428   SDValue TrueV = Op.getOperand(1);
429   SDValue FalseV = Op.getOperand(2);
430   SDLoc DL(Op);
431   MVT XLenVT = Subtarget.getXLenVT();
432 
433   // If the result type is XLenVT and CondV is the output of a SETCC node
434   // which also operated on XLenVT inputs, then merge the SETCC node into the
435   // lowered RISCVISD::SELECT_CC to take advantage of the integer
436   // compare+branch instructions. i.e.:
437   // (select (setcc lhs, rhs, cc), truev, falsev)
438   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
439   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
440       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
441     SDValue LHS = CondV.getOperand(0);
442     SDValue RHS = CondV.getOperand(1);
443     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
444     ISD::CondCode CCVal = CC->get();
445 
446     normaliseSetCC(LHS, RHS, CCVal);
447 
448     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
449     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
450     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
451     return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
452   }
453 
454   // Otherwise:
455   // (select condv, truev, falsev)
456   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
457   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
458   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
459 
460   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
461   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
462 
463   return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
464 }
465 
466 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
467   MachineFunction &MF = DAG.getMachineFunction();
468   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
469 
470   SDLoc DL(Op);
471   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
472                                  getPointerTy(MF.getDataLayout()));
473 
474   // vastart just stores the address of the VarArgsFrameIndex slot into the
475   // memory location argument.
476   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
477   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
478                       MachinePointerInfo(SV));
479 }
480 
481 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
482                                             SelectionDAG &DAG) const {
483   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
484   MachineFunction &MF = DAG.getMachineFunction();
485   MachineFrameInfo &MFI = MF.getFrameInfo();
486   MFI.setFrameAddressIsTaken(true);
487   unsigned FrameReg = RI.getFrameRegister(MF);
488   int XLenInBytes = Subtarget.getXLen() / 8;
489 
490   EVT VT = Op.getValueType();
491   SDLoc DL(Op);
492   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
493   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
494   while (Depth--) {
495     int Offset = -(XLenInBytes * 2);
496     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
497                               DAG.getIntPtrConstant(Offset, DL));
498     FrameAddr =
499         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
500   }
501   return FrameAddr;
502 }
503 
504 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
505                                              SelectionDAG &DAG) const {
506   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
507   MachineFunction &MF = DAG.getMachineFunction();
508   MachineFrameInfo &MFI = MF.getFrameInfo();
509   MFI.setReturnAddressIsTaken(true);
510   MVT XLenVT = Subtarget.getXLenVT();
511   int XLenInBytes = Subtarget.getXLen() / 8;
512 
513   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
514     return SDValue();
515 
516   EVT VT = Op.getValueType();
517   SDLoc DL(Op);
518   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
519   if (Depth) {
520     int Off = -XLenInBytes;
521     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
522     SDValue Offset = DAG.getConstant(Off, DL, VT);
523     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
524                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
525                        MachinePointerInfo());
526   }
527 
528   // Return the value of the return address register, marking it an implicit
529   // live-in.
530   unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
531   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
532 }
533 
534 // Returns the opcode of the target-specific SDNode that implements the 32-bit
535 // form of the given Opcode.
536 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
537   switch (Opcode) {
538   default:
539     llvm_unreachable("Unexpected opcode");
540   case ISD::SHL:
541     return RISCVISD::SLLW;
542   case ISD::SRA:
543     return RISCVISD::SRAW;
544   case ISD::SRL:
545     return RISCVISD::SRLW;
546   case ISD::SDIV:
547     return RISCVISD::DIVW;
548   case ISD::UDIV:
549     return RISCVISD::DIVUW;
550   case ISD::UREM:
551     return RISCVISD::REMUW;
552   }
553 }
554 
555 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
556 // Because i32 isn't a legal type for RV64, these operations would otherwise
557 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
558 // later one because the fact the operation was originally of type i32 is
559 // lost.
560 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
561   SDLoc DL(N);
562   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
563   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
564   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
565   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
566   // ReplaceNodeResults requires we maintain the same type for the return value.
567   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
568 }
569 
570 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
571                                              SmallVectorImpl<SDValue> &Results,
572                                              SelectionDAG &DAG) const {
573   SDLoc DL(N);
574   switch (N->getOpcode()) {
575   default:
576     llvm_unreachable("Don't know how to custom type legalize this operation!");
577   case ISD::SHL:
578   case ISD::SRA:
579   case ISD::SRL:
580     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
581            "Unexpected custom legalisation");
582     if (N->getOperand(1).getOpcode() == ISD::Constant)
583       return;
584     Results.push_back(customLegalizeToWOp(N, DAG));
585     break;
586   case ISD::SDIV:
587   case ISD::UDIV:
588   case ISD::UREM:
589     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
590            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
591     if (N->getOperand(0).getOpcode() == ISD::Constant ||
592         N->getOperand(1).getOpcode() == ISD::Constant)
593       return;
594     Results.push_back(customLegalizeToWOp(N, DAG));
595     break;
596   case ISD::BITCAST: {
597     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
598            Subtarget.hasStdExtF() && "Unexpected custom legalisation");
599     SDLoc DL(N);
600     SDValue Op0 = N->getOperand(0);
601     if (Op0.getValueType() != MVT::f32)
602       return;
603     SDValue FPConv =
604         DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
605     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
606     break;
607   }
608   }
609 }
610 
611 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
612                                                DAGCombinerInfo &DCI) const {
613   SelectionDAG &DAG = DCI.DAG;
614 
615   switch (N->getOpcode()) {
616   default:
617     break;
618   case RISCVISD::SplitF64: {
619     SDValue Op0 = N->getOperand(0);
620     // If the input to SplitF64 is just BuildPairF64 then the operation is
621     // redundant. Instead, use BuildPairF64's operands directly.
622     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
623       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
624 
625     SDLoc DL(N);
626     // This is a target-specific version of a DAGCombine performed in
627     // DAGCombiner::visitBITCAST. It performs the equivalent of:
628     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
629     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
630     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
631         !Op0.getNode()->hasOneUse())
632       break;
633     SDValue NewSplitF64 =
634         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
635                     Op0.getOperand(0));
636     SDValue Lo = NewSplitF64.getValue(0);
637     SDValue Hi = NewSplitF64.getValue(1);
638     APInt SignBit = APInt::getSignMask(32);
639     if (Op0.getOpcode() == ISD::FNEG) {
640       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
641                                   DAG.getConstant(SignBit, DL, MVT::i32));
642       return DCI.CombineTo(N, Lo, NewHi);
643     }
644     assert(Op0.getOpcode() == ISD::FABS);
645     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
646                                 DAG.getConstant(~SignBit, DL, MVT::i32));
647     return DCI.CombineTo(N, Lo, NewHi);
648   }
649   case RISCVISD::SLLW:
650   case RISCVISD::SRAW:
651   case RISCVISD::SRLW: {
652     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
653     SDValue LHS = N->getOperand(0);
654     SDValue RHS = N->getOperand(1);
655     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
656     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
657     if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
658         (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
659       return SDValue();
660     break;
661   }
662   case RISCVISD::FMV_X_ANYEXTW_RV64: {
663     SDLoc DL(N);
664     SDValue Op0 = N->getOperand(0);
665     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
666     // conversion is unnecessary and can be replaced with an ANY_EXTEND
667     // of the FMV_W_X_RV64 operand.
668     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
669       SDValue AExtOp =
670           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
671       return DCI.CombineTo(N, AExtOp);
672     }
673 
674     // This is a target-specific version of a DAGCombine performed in
675     // DAGCombiner::visitBITCAST. It performs the equivalent of:
676     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
677     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
678     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
679         !Op0.getNode()->hasOneUse())
680       break;
681     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
682                                  Op0.getOperand(0));
683     APInt SignBit = APInt::getSignMask(32).sext(64);
684     if (Op0.getOpcode() == ISD::FNEG) {
685       return DCI.CombineTo(N,
686                            DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
687                                        DAG.getConstant(SignBit, DL, MVT::i64)));
688     }
689     assert(Op0.getOpcode() == ISD::FABS);
690     return DCI.CombineTo(N,
691                          DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
692                                      DAG.getConstant(~SignBit, DL, MVT::i64)));
693   }
694   }
695 
696   return SDValue();
697 }
698 
699 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
700     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
701     unsigned Depth) const {
702   switch (Op.getOpcode()) {
703   default:
704     break;
705   case RISCVISD::SLLW:
706   case RISCVISD::SRAW:
707   case RISCVISD::SRLW:
708   case RISCVISD::DIVW:
709   case RISCVISD::DIVUW:
710   case RISCVISD::REMUW:
711     // TODO: As the result is sign-extended, this is conservatively correct. A
712     // more precise answer could be calculated for SRAW depending on known
713     // bits in the shift amount.
714     return 33;
715   }
716 
717   return 1;
718 }
719 
720 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
721                                              MachineBasicBlock *BB) {
722   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
723 
724   MachineFunction &MF = *BB->getParent();
725   DebugLoc DL = MI.getDebugLoc();
726   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
727   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
728   unsigned LoReg = MI.getOperand(0).getReg();
729   unsigned HiReg = MI.getOperand(1).getReg();
730   unsigned SrcReg = MI.getOperand(2).getReg();
731   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
732   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
733 
734   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
735                           RI);
736   MachineMemOperand *MMO =
737       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
738                               MachineMemOperand::MOLoad, 8, 8);
739   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
740       .addFrameIndex(FI)
741       .addImm(0)
742       .addMemOperand(MMO);
743   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
744       .addFrameIndex(FI)
745       .addImm(4)
746       .addMemOperand(MMO);
747   MI.eraseFromParent(); // The pseudo instruction is gone now.
748   return BB;
749 }
750 
751 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
752                                                  MachineBasicBlock *BB) {
753   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
754          "Unexpected instruction");
755 
756   MachineFunction &MF = *BB->getParent();
757   DebugLoc DL = MI.getDebugLoc();
758   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
759   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
760   unsigned DstReg = MI.getOperand(0).getReg();
761   unsigned LoReg = MI.getOperand(1).getReg();
762   unsigned HiReg = MI.getOperand(2).getReg();
763   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
764   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
765 
766   MachineMemOperand *MMO =
767       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
768                               MachineMemOperand::MOStore, 8, 8);
769   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
770       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
771       .addFrameIndex(FI)
772       .addImm(0)
773       .addMemOperand(MMO);
774   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
775       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
776       .addFrameIndex(FI)
777       .addImm(4)
778       .addMemOperand(MMO);
779   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
780   MI.eraseFromParent(); // The pseudo instruction is gone now.
781   return BB;
782 }
783 
784 MachineBasicBlock *
785 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
786                                                  MachineBasicBlock *BB) const {
787   switch (MI.getOpcode()) {
788   default:
789     llvm_unreachable("Unexpected instr type to insert");
790   case RISCV::Select_GPR_Using_CC_GPR:
791   case RISCV::Select_FPR32_Using_CC_GPR:
792   case RISCV::Select_FPR64_Using_CC_GPR:
793     break;
794   case RISCV::BuildPairF64Pseudo:
795     return emitBuildPairF64Pseudo(MI, BB);
796   case RISCV::SplitF64Pseudo:
797     return emitSplitF64Pseudo(MI, BB);
798   }
799 
800   // To "insert" a SELECT instruction, we actually have to insert the triangle
801   // control-flow pattern.  The incoming instruction knows the destination vreg
802   // to set, the condition code register to branch on, the true/false values to
803   // select between, and the condcode to use to select the appropriate branch.
804   //
805   // We produce the following control flow:
806   //     HeadMBB
807   //     |  \
808   //     |  IfFalseMBB
809   //     | /
810   //    TailMBB
811   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
812   const BasicBlock *LLVM_BB = BB->getBasicBlock();
813   DebugLoc DL = MI.getDebugLoc();
814   MachineFunction::iterator I = ++BB->getIterator();
815 
816   MachineBasicBlock *HeadMBB = BB;
817   MachineFunction *F = BB->getParent();
818   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
819   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
820 
821   F->insert(I, IfFalseMBB);
822   F->insert(I, TailMBB);
823   // Move all remaining instructions to TailMBB.
824   TailMBB->splice(TailMBB->begin(), HeadMBB,
825                   std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end());
826   // Update machine-CFG edges by transferring all successors of the current
827   // block to the new block which will contain the Phi node for the select.
828   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
829   // Set the successors for HeadMBB.
830   HeadMBB->addSuccessor(IfFalseMBB);
831   HeadMBB->addSuccessor(TailMBB);
832 
833   // Insert appropriate branch.
834   unsigned LHS = MI.getOperand(1).getReg();
835   unsigned RHS = MI.getOperand(2).getReg();
836   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
837   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
838 
839   BuildMI(HeadMBB, DL, TII.get(Opcode))
840     .addReg(LHS)
841     .addReg(RHS)
842     .addMBB(TailMBB);
843 
844   // IfFalseMBB just falls through to TailMBB.
845   IfFalseMBB->addSuccessor(TailMBB);
846 
847   // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
848   BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI),
849           MI.getOperand(0).getReg())
850       .addReg(MI.getOperand(4).getReg())
851       .addMBB(HeadMBB)
852       .addReg(MI.getOperand(5).getReg())
853       .addMBB(IfFalseMBB);
854 
855   MI.eraseFromParent(); // The pseudo instruction is gone now.
856   return TailMBB;
857 }
858 
859 // Calling Convention Implementation.
860 // The expectations for frontend ABI lowering vary from target to target.
861 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
862 // details, but this is a longer term goal. For now, we simply try to keep the
863 // role of the frontend as simple and well-defined as possible. The rules can
864 // be summarised as:
865 // * Never split up large scalar arguments. We handle them here.
866 // * If a hardfloat calling convention is being used, and the struct may be
867 // passed in a pair of registers (fp+fp, int+fp), and both registers are
868 // available, then pass as two separate arguments. If either the GPRs or FPRs
869 // are exhausted, then pass according to the rule below.
870 // * If a struct could never be passed in registers or directly in a stack
871 // slot (as it is larger than 2*XLEN and the floating point rules don't
872 // apply), then pass it using a pointer with the byval attribute.
873 // * If a struct is less than 2*XLEN, then coerce to either a two-element
874 // word-sized array or a 2*XLEN scalar (depending on alignment).
875 // * The frontend can determine whether a struct is returned by reference or
876 // not based on its size and fields. If it will be returned by reference, the
877 // frontend must modify the prototype so a pointer with the sret annotation is
878 // passed as the first argument. This is not necessary for large scalar
879 // returns.
880 // * Struct return values and varargs should be coerced to structs containing
881 // register-size fields in the same situations they would be for fixed
882 // arguments.
883 
884 static const MCPhysReg ArgGPRs[] = {
885   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
886   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
887 };
888 
889 // Pass a 2*XLEN argument that has been split into two XLEN values through
890 // registers or the stack as necessary.
891 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
892                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
893                                 MVT ValVT2, MVT LocVT2,
894                                 ISD::ArgFlagsTy ArgFlags2) {
895   unsigned XLenInBytes = XLen / 8;
896   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
897     // At least one half can be passed via register.
898     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
899                                      VA1.getLocVT(), CCValAssign::Full));
900   } else {
901     // Both halves must be passed on the stack, with proper alignment.
902     unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
903     State.addLoc(
904         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
905                             State.AllocateStack(XLenInBytes, StackAlign),
906                             VA1.getLocVT(), CCValAssign::Full));
907     State.addLoc(CCValAssign::getMem(
908         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
909         CCValAssign::Full));
910     return false;
911   }
912 
913   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
914     // The second half can also be passed via register.
915     State.addLoc(
916         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
917   } else {
918     // The second half is passed via the stack, without additional alignment.
919     State.addLoc(CCValAssign::getMem(
920         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
921         CCValAssign::Full));
922   }
923 
924   return false;
925 }
926 
927 // Implements the RISC-V calling convention. Returns true upon failure.
928 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
929                      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
930                      CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {
931   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
932   assert(XLen == 32 || XLen == 64);
933   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
934   if (ValVT == MVT::f32) {
935     LocVT = XLenVT;
936     LocInfo = CCValAssign::BCvt;
937   }
938   if (XLen == 64 && ValVT == MVT::f64) {
939     LocVT = MVT::i64;
940     LocInfo = CCValAssign::BCvt;
941   }
942 
943   // Any return value split in to more than two values can't be returned
944   // directly.
945   if (IsRet && ValNo > 1)
946     return true;
947 
948   // If this is a variadic argument, the RISC-V calling convention requires
949   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
950   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
951   // be used regardless of whether the original argument was split during
952   // legalisation or not. The argument will not be passed by registers if the
953   // original type is larger than 2*XLEN, so the register alignment rule does
954   // not apply.
955   unsigned TwoXLenInBytes = (2 * XLen) / 8;
956   if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
957       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
958     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
959     // Skip 'odd' register if necessary.
960     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
961       State.AllocateReg(ArgGPRs);
962   }
963 
964   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
965   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
966       State.getPendingArgFlags();
967 
968   assert(PendingLocs.size() == PendingArgFlags.size() &&
969          "PendingLocs and PendingArgFlags out of sync");
970 
971   // Handle passing f64 on RV32D with a soft float ABI.
972   if (XLen == 32 && ValVT == MVT::f64) {
973     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
974            "Can't lower f64 if it is split");
975     // Depending on available argument GPRS, f64 may be passed in a pair of
976     // GPRs, split between a GPR and the stack, or passed completely on the
977     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
978     // cases.
979     unsigned Reg = State.AllocateReg(ArgGPRs);
980     LocVT = MVT::i32;
981     if (!Reg) {
982       unsigned StackOffset = State.AllocateStack(8, 8);
983       State.addLoc(
984           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
985       return false;
986     }
987     if (!State.AllocateReg(ArgGPRs))
988       State.AllocateStack(4, 4);
989     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
990     return false;
991   }
992 
993   // Split arguments might be passed indirectly, so keep track of the pending
994   // values.
995   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
996     LocVT = XLenVT;
997     LocInfo = CCValAssign::Indirect;
998     PendingLocs.push_back(
999         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
1000     PendingArgFlags.push_back(ArgFlags);
1001     if (!ArgFlags.isSplitEnd()) {
1002       return false;
1003     }
1004   }
1005 
1006   // If the split argument only had two elements, it should be passed directly
1007   // in registers or on the stack.
1008   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
1009     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
1010     // Apply the normal calling convention rules to the first half of the
1011     // split argument.
1012     CCValAssign VA = PendingLocs[0];
1013     ISD::ArgFlagsTy AF = PendingArgFlags[0];
1014     PendingLocs.clear();
1015     PendingArgFlags.clear();
1016     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
1017                                ArgFlags);
1018   }
1019 
1020   // Allocate to a register if possible, or else a stack slot.
1021   unsigned Reg = State.AllocateReg(ArgGPRs);
1022   unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
1023 
1024   // If we reach this point and PendingLocs is non-empty, we must be at the
1025   // end of a split argument that must be passed indirectly.
1026   if (!PendingLocs.empty()) {
1027     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
1028     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
1029 
1030     for (auto &It : PendingLocs) {
1031       if (Reg)
1032         It.convertToReg(Reg);
1033       else
1034         It.convertToMem(StackOffset);
1035       State.addLoc(It);
1036     }
1037     PendingLocs.clear();
1038     PendingArgFlags.clear();
1039     return false;
1040   }
1041 
1042   assert(LocVT == XLenVT && "Expected an XLenVT at this stage");
1043 
1044   if (Reg) {
1045     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1046     return false;
1047   }
1048 
1049   // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
1050   if (ValVT == MVT::f32 || ValVT == MVT::f64) {
1051     LocVT = ValVT;
1052     LocInfo = CCValAssign::Full;
1053   }
1054   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1055   return false;
1056 }
1057 
1058 void RISCVTargetLowering::analyzeInputArgs(
1059     MachineFunction &MF, CCState &CCInfo,
1060     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
1061   unsigned NumArgs = Ins.size();
1062   FunctionType *FType = MF.getFunction().getFunctionType();
1063 
1064   for (unsigned i = 0; i != NumArgs; ++i) {
1065     MVT ArgVT = Ins[i].VT;
1066     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
1067 
1068     Type *ArgTy = nullptr;
1069     if (IsRet)
1070       ArgTy = FType->getReturnType();
1071     else if (Ins[i].isOrigArg())
1072       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
1073 
1074     if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
1075                  ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
1076       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
1077                         << EVT(ArgVT).getEVTString() << '\n');
1078       llvm_unreachable(nullptr);
1079     }
1080   }
1081 }
1082 
1083 void RISCVTargetLowering::analyzeOutputArgs(
1084     MachineFunction &MF, CCState &CCInfo,
1085     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
1086     CallLoweringInfo *CLI) const {
1087   unsigned NumArgs = Outs.size();
1088 
1089   for (unsigned i = 0; i != NumArgs; i++) {
1090     MVT ArgVT = Outs[i].VT;
1091     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1092     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
1093 
1094     if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
1095                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
1096       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
1097                         << EVT(ArgVT).getEVTString() << "\n");
1098       llvm_unreachable(nullptr);
1099     }
1100   }
1101 }
1102 
1103 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
1104 // values.
1105 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
1106                                    const CCValAssign &VA, const SDLoc &DL) {
1107   switch (VA.getLocInfo()) {
1108   default:
1109     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1110   case CCValAssign::Full:
1111     break;
1112   case CCValAssign::BCvt:
1113     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1114       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
1115       break;
1116     }
1117     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
1118     break;
1119   }
1120   return Val;
1121 }
1122 
1123 // The caller is responsible for loading the full value if the argument is
1124 // passed with CCValAssign::Indirect.
1125 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
1126                                 const CCValAssign &VA, const SDLoc &DL) {
1127   MachineFunction &MF = DAG.getMachineFunction();
1128   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1129   EVT LocVT = VA.getLocVT();
1130   SDValue Val;
1131 
1132   unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1133   RegInfo.addLiveIn(VA.getLocReg(), VReg);
1134   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1135 
1136   if (VA.getLocInfo() == CCValAssign::Indirect)
1137     return Val;
1138 
1139   return convertLocVTToValVT(DAG, Val, VA, DL);
1140 }
1141 
1142 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
1143                                    const CCValAssign &VA, const SDLoc &DL) {
1144   EVT LocVT = VA.getLocVT();
1145 
1146   switch (VA.getLocInfo()) {
1147   default:
1148     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1149   case CCValAssign::Full:
1150     break;
1151   case CCValAssign::BCvt:
1152     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1153       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
1154       break;
1155     }
1156     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
1157     break;
1158   }
1159   return Val;
1160 }
1161 
1162 // The caller is responsible for loading the full value if the argument is
1163 // passed with CCValAssign::Indirect.
1164 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
1165                                 const CCValAssign &VA, const SDLoc &DL) {
1166   MachineFunction &MF = DAG.getMachineFunction();
1167   MachineFrameInfo &MFI = MF.getFrameInfo();
1168   EVT LocVT = VA.getLocVT();
1169   EVT ValVT = VA.getValVT();
1170   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
1171   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1172                                  VA.getLocMemOffset(), /*Immutable=*/true);
1173   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1174   SDValue Val;
1175 
1176   ISD::LoadExtType ExtType;
1177   switch (VA.getLocInfo()) {
1178   default:
1179     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1180   case CCValAssign::Full:
1181   case CCValAssign::Indirect:
1182   case CCValAssign::BCvt:
1183     ExtType = ISD::NON_EXTLOAD;
1184     break;
1185   }
1186   Val = DAG.getExtLoad(
1187       ExtType, DL, LocVT, Chain, FIN,
1188       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
1189   return Val;
1190 }
1191 
1192 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
1193                                        const CCValAssign &VA, const SDLoc &DL) {
1194   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
1195          "Unexpected VA");
1196   MachineFunction &MF = DAG.getMachineFunction();
1197   MachineFrameInfo &MFI = MF.getFrameInfo();
1198   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1199 
1200   if (VA.isMemLoc()) {
1201     // f64 is passed on the stack.
1202     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
1203     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1204     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
1205                        MachinePointerInfo::getFixedStack(MF, FI));
1206   }
1207 
1208   assert(VA.isRegLoc() && "Expected register VA assignment");
1209 
1210   unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1211   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
1212   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
1213   SDValue Hi;
1214   if (VA.getLocReg() == RISCV::X17) {
1215     // Second half of f64 is passed on the stack.
1216     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
1217     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1218     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
1219                      MachinePointerInfo::getFixedStack(MF, FI));
1220   } else {
1221     // Second half of f64 is passed in another GPR.
1222     unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1223     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
1224     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
1225   }
1226   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1227 }
1228 
1229 // Transform physical registers into virtual registers.
1230 SDValue RISCVTargetLowering::LowerFormalArguments(
1231     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1232     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1233     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1234 
1235   switch (CallConv) {
1236   default:
1237     report_fatal_error("Unsupported calling convention");
1238   case CallingConv::C:
1239   case CallingConv::Fast:
1240     break;
1241   }
1242 
1243   MachineFunction &MF = DAG.getMachineFunction();
1244 
1245   const Function &Func = MF.getFunction();
1246   if (Func.hasFnAttribute("interrupt")) {
1247     if (!Func.arg_empty())
1248       report_fatal_error(
1249         "Functions with the interrupt attribute cannot have arguments!");
1250 
1251     StringRef Kind =
1252       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1253 
1254     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
1255       report_fatal_error(
1256         "Function interrupt attribute argument not supported!");
1257   }
1258 
1259   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1260   MVT XLenVT = Subtarget.getXLenVT();
1261   unsigned XLenInBytes = Subtarget.getXLen() / 8;
1262   // Used with vargs to acumulate store chains.
1263   std::vector<SDValue> OutChains;
1264 
1265   // Assign locations to all of the incoming arguments.
1266   SmallVector<CCValAssign, 16> ArgLocs;
1267   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1268   analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
1269 
1270   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1271     CCValAssign &VA = ArgLocs[i];
1272     SDValue ArgValue;
1273     // Passing f64 on RV32D with a soft float ABI must be handled as a special
1274     // case.
1275     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
1276       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
1277     else if (VA.isRegLoc())
1278       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
1279     else
1280       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
1281 
1282     if (VA.getLocInfo() == CCValAssign::Indirect) {
1283       // If the original argument was split and passed by reference (e.g. i128
1284       // on RV32), we need to load all parts of it here (using the same
1285       // address).
1286       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1287                                    MachinePointerInfo()));
1288       unsigned ArgIndex = Ins[i].OrigArgIndex;
1289       assert(Ins[i].PartOffset == 0);
1290       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
1291         CCValAssign &PartVA = ArgLocs[i + 1];
1292         unsigned PartOffset = Ins[i + 1].PartOffset;
1293         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1294                                       DAG.getIntPtrConstant(PartOffset, DL));
1295         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1296                                      MachinePointerInfo()));
1297         ++i;
1298       }
1299       continue;
1300     }
1301     InVals.push_back(ArgValue);
1302   }
1303 
1304   if (IsVarArg) {
1305     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
1306     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
1307     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1308     MachineFrameInfo &MFI = MF.getFrameInfo();
1309     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1310     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1311 
1312     // Offset of the first variable argument from stack pointer, and size of
1313     // the vararg save area. For now, the varargs save area is either zero or
1314     // large enough to hold a0-a7.
1315     int VaArgOffset, VarArgsSaveSize;
1316 
1317     // If all registers are allocated, then all varargs must be passed on the
1318     // stack and we don't need to save any argregs.
1319     if (ArgRegs.size() == Idx) {
1320       VaArgOffset = CCInfo.getNextStackOffset();
1321       VarArgsSaveSize = 0;
1322     } else {
1323       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
1324       VaArgOffset = -VarArgsSaveSize;
1325     }
1326 
1327     // Record the frame index of the first variable argument
1328     // which is a value necessary to VASTART.
1329     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1330     RVFI->setVarArgsFrameIndex(FI);
1331 
1332     // If saving an odd number of registers then create an extra stack slot to
1333     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
1334     // offsets to even-numbered registered remain 2*XLEN-aligned.
1335     if (Idx % 2) {
1336       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes,
1337                                  true);
1338       VarArgsSaveSize += XLenInBytes;
1339     }
1340 
1341     // Copy the integer registers that may have been used for passing varargs
1342     // to the vararg save area.
1343     for (unsigned I = Idx; I < ArgRegs.size();
1344          ++I, VaArgOffset += XLenInBytes) {
1345       const unsigned Reg = RegInfo.createVirtualRegister(RC);
1346       RegInfo.addLiveIn(ArgRegs[I], Reg);
1347       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
1348       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1349       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1350       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
1351                                    MachinePointerInfo::getFixedStack(MF, FI));
1352       cast<StoreSDNode>(Store.getNode())
1353           ->getMemOperand()
1354           ->setValue((Value *)nullptr);
1355       OutChains.push_back(Store);
1356     }
1357     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
1358   }
1359 
1360   // All stores are grouped in one node to allow the matching between
1361   // the size of Ins and InVals. This only happens for vararg functions.
1362   if (!OutChains.empty()) {
1363     OutChains.push_back(Chain);
1364     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1365   }
1366 
1367   return Chain;
1368 }
1369 
1370 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
1371 /// for tail call optimization.
1372 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
1373 bool RISCVTargetLowering::IsEligibleForTailCallOptimization(
1374   CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
1375   const SmallVector<CCValAssign, 16> &ArgLocs) const {
1376 
1377   auto &Callee = CLI.Callee;
1378   auto CalleeCC = CLI.CallConv;
1379   auto IsVarArg = CLI.IsVarArg;
1380   auto &Outs = CLI.Outs;
1381   auto &Caller = MF.getFunction();
1382   auto CallerCC = Caller.getCallingConv();
1383 
1384   // Do not tail call opt functions with "disable-tail-calls" attribute.
1385   if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
1386     return false;
1387 
1388   // Exception-handling functions need a special set of instructions to
1389   // indicate a return to the hardware. Tail-calling another function would
1390   // probably break this.
1391   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
1392   // should be expanded as new function attributes are introduced.
1393   if (Caller.hasFnAttribute("interrupt"))
1394     return false;
1395 
1396   // Do not tail call opt functions with varargs.
1397   if (IsVarArg)
1398     return false;
1399 
1400   // Do not tail call opt if the stack is used to pass parameters.
1401   if (CCInfo.getNextStackOffset() != 0)
1402     return false;
1403 
1404   // Do not tail call opt if any parameters need to be passed indirectly.
1405   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
1406   // passed indirectly. So the address of the value will be passed in a
1407   // register, or if not available, then the address is put on the stack. In
1408   // order to pass indirectly, space on the stack often needs to be allocated
1409   // in order to store the value. In this case the CCInfo.getNextStackOffset()
1410   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
1411   // are passed CCValAssign::Indirect.
1412   for (auto &VA : ArgLocs)
1413     if (VA.getLocInfo() == CCValAssign::Indirect)
1414       return false;
1415 
1416   // Do not tail call opt if either caller or callee uses struct return
1417   // semantics.
1418   auto IsCallerStructRet = Caller.hasStructRetAttr();
1419   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
1420   if (IsCallerStructRet || IsCalleeStructRet)
1421     return false;
1422 
1423   // Externally-defined functions with weak linkage should not be
1424   // tail-called. The behaviour of branch instructions in this situation (as
1425   // used for tail calls) is implementation-defined, so we cannot rely on the
1426   // linker replacing the tail call with a return.
1427   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1428     const GlobalValue *GV = G->getGlobal();
1429     if (GV->hasExternalWeakLinkage())
1430       return false;
1431   }
1432 
1433   // The callee has to preserve all registers the caller needs to preserve.
1434   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
1435   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1436   if (CalleeCC != CallerCC) {
1437     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1438     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1439       return false;
1440   }
1441 
1442   // Byval parameters hand the function a pointer directly into the stack area
1443   // we want to reuse during a tail call. Working around this *is* possible
1444   // but less efficient and uglier in LowerCall.
1445   for (auto &Arg : Outs)
1446     if (Arg.Flags.isByVal())
1447       return false;
1448 
1449   return true;
1450 }
1451 
1452 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
1453 // and output parameter nodes.
1454 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1455                                        SmallVectorImpl<SDValue> &InVals) const {
1456   SelectionDAG &DAG = CLI.DAG;
1457   SDLoc &DL = CLI.DL;
1458   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1459   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1460   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1461   SDValue Chain = CLI.Chain;
1462   SDValue Callee = CLI.Callee;
1463   bool &IsTailCall = CLI.IsTailCall;
1464   CallingConv::ID CallConv = CLI.CallConv;
1465   bool IsVarArg = CLI.IsVarArg;
1466   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1467   MVT XLenVT = Subtarget.getXLenVT();
1468 
1469   MachineFunction &MF = DAG.getMachineFunction();
1470 
1471   // Analyze the operands of the call, assigning locations to each operand.
1472   SmallVector<CCValAssign, 16> ArgLocs;
1473   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1474   analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
1475 
1476   // Check if it's really possible to do a tail call.
1477   if (IsTailCall)
1478     IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF,
1479                                                    ArgLocs);
1480 
1481   if (IsTailCall)
1482     ++NumTailCalls;
1483   else if (CLI.CS && CLI.CS.isMustTailCall())
1484     report_fatal_error("failed to perform tail call elimination on a call "
1485                        "site marked musttail");
1486 
1487   // Get a count of how many bytes are to be pushed on the stack.
1488   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1489 
1490   // Create local copies for byval args
1491   SmallVector<SDValue, 8> ByValArgs;
1492   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1493     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1494     if (!Flags.isByVal())
1495       continue;
1496 
1497     SDValue Arg = OutVals[i];
1498     unsigned Size = Flags.getByValSize();
1499     unsigned Align = Flags.getByValAlign();
1500 
1501     int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false);
1502     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1503     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
1504 
1505     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
1506                           /*IsVolatile=*/false,
1507                           /*AlwaysInline=*/false,
1508                           IsTailCall, MachinePointerInfo(),
1509                           MachinePointerInfo());
1510     ByValArgs.push_back(FIPtr);
1511   }
1512 
1513   if (!IsTailCall)
1514     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
1515 
1516   // Copy argument values to their designated locations.
1517   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
1518   SmallVector<SDValue, 8> MemOpChains;
1519   SDValue StackPtr;
1520   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
1521     CCValAssign &VA = ArgLocs[i];
1522     SDValue ArgValue = OutVals[i];
1523     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1524 
1525     // Handle passing f64 on RV32D with a soft float ABI as a special case.
1526     bool IsF64OnRV32DSoftABI =
1527         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
1528     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
1529       SDValue SplitF64 = DAG.getNode(
1530           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
1531       SDValue Lo = SplitF64.getValue(0);
1532       SDValue Hi = SplitF64.getValue(1);
1533 
1534       unsigned RegLo = VA.getLocReg();
1535       RegsToPass.push_back(std::make_pair(RegLo, Lo));
1536 
1537       if (RegLo == RISCV::X17) {
1538         // Second half of f64 is passed on the stack.
1539         // Work out the address of the stack slot.
1540         if (!StackPtr.getNode())
1541           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1542         // Emit the store.
1543         MemOpChains.push_back(
1544             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
1545       } else {
1546         // Second half of f64 is passed in another GPR.
1547         unsigned RegHigh = RegLo + 1;
1548         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
1549       }
1550       continue;
1551     }
1552 
1553     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
1554     // as any other MemLoc.
1555 
1556     // Promote the value if needed.
1557     // For now, only handle fully promoted and indirect arguments.
1558     if (VA.getLocInfo() == CCValAssign::Indirect) {
1559       // Store the argument in a stack slot and pass its address.
1560       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
1561       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1562       MemOpChains.push_back(
1563           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1564                        MachinePointerInfo::getFixedStack(MF, FI)));
1565       // If the original argument was split (e.g. i128), we need
1566       // to store all parts of it here (and pass just one address).
1567       unsigned ArgIndex = Outs[i].OrigArgIndex;
1568       assert(Outs[i].PartOffset == 0);
1569       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
1570         SDValue PartValue = OutVals[i + 1];
1571         unsigned PartOffset = Outs[i + 1].PartOffset;
1572         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1573                                       DAG.getIntPtrConstant(PartOffset, DL));
1574         MemOpChains.push_back(
1575             DAG.getStore(Chain, DL, PartValue, Address,
1576                          MachinePointerInfo::getFixedStack(MF, FI)));
1577         ++i;
1578       }
1579       ArgValue = SpillSlot;
1580     } else {
1581       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
1582     }
1583 
1584     // Use local copy if it is a byval arg.
1585     if (Flags.isByVal())
1586       ArgValue = ByValArgs[j++];
1587 
1588     if (VA.isRegLoc()) {
1589       // Queue up the argument copies and emit them at the end.
1590       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1591     } else {
1592       assert(VA.isMemLoc() && "Argument not register or memory");
1593       assert(!IsTailCall && "Tail call not allowed if stack is used "
1594                             "for passing parameters");
1595 
1596       // Work out the address of the stack slot.
1597       if (!StackPtr.getNode())
1598         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1599       SDValue Address =
1600           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1601                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
1602 
1603       // Emit the store.
1604       MemOpChains.push_back(
1605           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1606     }
1607   }
1608 
1609   // Join the stores, which are independent of one another.
1610   if (!MemOpChains.empty())
1611     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1612 
1613   SDValue Glue;
1614 
1615   // Build a sequence of copy-to-reg nodes, chained and glued together.
1616   for (auto &Reg : RegsToPass) {
1617     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
1618     Glue = Chain.getValue(1);
1619   }
1620 
1621   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
1622   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
1623   // split it and then direct call can be matched by PseudoCALL.
1624   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
1625     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0);
1626   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1627     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0);
1628   }
1629 
1630   // The first call operand is the chain and the second is the target address.
1631   SmallVector<SDValue, 8> Ops;
1632   Ops.push_back(Chain);
1633   Ops.push_back(Callee);
1634 
1635   // Add argument registers to the end of the list so that they are
1636   // known live into the call.
1637   for (auto &Reg : RegsToPass)
1638     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1639 
1640   if (!IsTailCall) {
1641     // Add a register mask operand representing the call-preserved registers.
1642     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1643     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1644     assert(Mask && "Missing call preserved mask for calling convention");
1645     Ops.push_back(DAG.getRegisterMask(Mask));
1646   }
1647 
1648   // Glue the call to the argument copies, if any.
1649   if (Glue.getNode())
1650     Ops.push_back(Glue);
1651 
1652   // Emit the call.
1653   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1654 
1655   if (IsTailCall) {
1656     MF.getFrameInfo().setHasTailCall();
1657     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
1658   }
1659 
1660   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
1661   Glue = Chain.getValue(1);
1662 
1663   // Mark the end of the call, which is glued to the call itself.
1664   Chain = DAG.getCALLSEQ_END(Chain,
1665                              DAG.getConstant(NumBytes, DL, PtrVT, true),
1666                              DAG.getConstant(0, DL, PtrVT, true),
1667                              Glue, DL);
1668   Glue = Chain.getValue(1);
1669 
1670   // Assign locations to each value returned by this call.
1671   SmallVector<CCValAssign, 16> RVLocs;
1672   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
1673   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
1674 
1675   // Copy all of the result registers out of their specified physreg.
1676   for (auto &VA : RVLocs) {
1677     // Copy the value out
1678     SDValue RetValue =
1679         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
1680     // Glue the RetValue to the end of the call sequence
1681     Chain = RetValue.getValue(1);
1682     Glue = RetValue.getValue(2);
1683 
1684     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1685       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
1686       SDValue RetValue2 =
1687           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
1688       Chain = RetValue2.getValue(1);
1689       Glue = RetValue2.getValue(2);
1690       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
1691                              RetValue2);
1692     }
1693 
1694     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
1695 
1696     InVals.push_back(RetValue);
1697   }
1698 
1699   return Chain;
1700 }
1701 
1702 bool RISCVTargetLowering::CanLowerReturn(
1703     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
1704     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
1705   SmallVector<CCValAssign, 16> RVLocs;
1706   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
1707   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1708     MVT VT = Outs[i].VT;
1709     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1710     if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags,
1711                  CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
1712       return false;
1713   }
1714   return true;
1715 }
1716 
1717 SDValue
1718 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1719                                  bool IsVarArg,
1720                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
1721                                  const SmallVectorImpl<SDValue> &OutVals,
1722                                  const SDLoc &DL, SelectionDAG &DAG) const {
1723   // Stores the assignment of the return value to a location.
1724   SmallVector<CCValAssign, 16> RVLocs;
1725 
1726   // Info about the registers and stack slot.
1727   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
1728                  *DAG.getContext());
1729 
1730   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
1731                     nullptr);
1732 
1733   SDValue Glue;
1734   SmallVector<SDValue, 4> RetOps(1, Chain);
1735 
1736   // Copy the result values into the output registers.
1737   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
1738     SDValue Val = OutVals[i];
1739     CCValAssign &VA = RVLocs[i];
1740     assert(VA.isRegLoc() && "Can only return in registers!");
1741 
1742     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1743       // Handle returning f64 on RV32D with a soft float ABI.
1744       assert(VA.isRegLoc() && "Expected return via registers");
1745       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
1746                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
1747       SDValue Lo = SplitF64.getValue(0);
1748       SDValue Hi = SplitF64.getValue(1);
1749       unsigned RegLo = VA.getLocReg();
1750       unsigned RegHi = RegLo + 1;
1751       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
1752       Glue = Chain.getValue(1);
1753       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
1754       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
1755       Glue = Chain.getValue(1);
1756       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
1757     } else {
1758       // Handle a 'normal' return.
1759       Val = convertValVTToLocVT(DAG, Val, VA, DL);
1760       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
1761 
1762       // Guarantee that all emitted copies are stuck together.
1763       Glue = Chain.getValue(1);
1764       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
1765     }
1766   }
1767 
1768   RetOps[0] = Chain; // Update chain.
1769 
1770   // Add the glue node if we have it.
1771   if (Glue.getNode()) {
1772     RetOps.push_back(Glue);
1773   }
1774 
1775   // Interrupt service routines use different return instructions.
1776   const Function &Func = DAG.getMachineFunction().getFunction();
1777   if (Func.hasFnAttribute("interrupt")) {
1778     if (!Func.getReturnType()->isVoidTy())
1779       report_fatal_error(
1780           "Functions with the interrupt attribute must have void return type!");
1781 
1782     MachineFunction &MF = DAG.getMachineFunction();
1783     StringRef Kind =
1784       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1785 
1786     unsigned RetOpc;
1787     if (Kind == "user")
1788       RetOpc = RISCVISD::URET_FLAG;
1789     else if (Kind == "supervisor")
1790       RetOpc = RISCVISD::SRET_FLAG;
1791     else
1792       RetOpc = RISCVISD::MRET_FLAG;
1793 
1794     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
1795   }
1796 
1797   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
1798 }
1799 
1800 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
1801   switch ((RISCVISD::NodeType)Opcode) {
1802   case RISCVISD::FIRST_NUMBER:
1803     break;
1804   case RISCVISD::RET_FLAG:
1805     return "RISCVISD::RET_FLAG";
1806   case RISCVISD::URET_FLAG:
1807     return "RISCVISD::URET_FLAG";
1808   case RISCVISD::SRET_FLAG:
1809     return "RISCVISD::SRET_FLAG";
1810   case RISCVISD::MRET_FLAG:
1811     return "RISCVISD::MRET_FLAG";
1812   case RISCVISD::CALL:
1813     return "RISCVISD::CALL";
1814   case RISCVISD::SELECT_CC:
1815     return "RISCVISD::SELECT_CC";
1816   case RISCVISD::BuildPairF64:
1817     return "RISCVISD::BuildPairF64";
1818   case RISCVISD::SplitF64:
1819     return "RISCVISD::SplitF64";
1820   case RISCVISD::TAIL:
1821     return "RISCVISD::TAIL";
1822   case RISCVISD::SLLW:
1823     return "RISCVISD::SLLW";
1824   case RISCVISD::SRAW:
1825     return "RISCVISD::SRAW";
1826   case RISCVISD::SRLW:
1827     return "RISCVISD::SRLW";
1828   case RISCVISD::DIVW:
1829     return "RISCVISD::DIVW";
1830   case RISCVISD::DIVUW:
1831     return "RISCVISD::DIVUW";
1832   case RISCVISD::REMUW:
1833     return "RISCVISD::REMUW";
1834   case RISCVISD::FMV_W_X_RV64:
1835     return "RISCVISD::FMV_W_X_RV64";
1836   case RISCVISD::FMV_X_ANYEXTW_RV64:
1837     return "RISCVISD::FMV_X_ANYEXTW_RV64";
1838   }
1839   return nullptr;
1840 }
1841 
1842 std::pair<unsigned, const TargetRegisterClass *>
1843 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1844                                                   StringRef Constraint,
1845                                                   MVT VT) const {
1846   // First, see if this is a constraint that directly corresponds to a
1847   // RISCV register class.
1848   if (Constraint.size() == 1) {
1849     switch (Constraint[0]) {
1850     case 'r':
1851       return std::make_pair(0U, &RISCV::GPRRegClass);
1852     default:
1853       break;
1854     }
1855   }
1856 
1857   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1858 }
1859 
1860 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
1861                                                    Instruction *Inst,
1862                                                    AtomicOrdering Ord) const {
1863   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
1864     return Builder.CreateFence(Ord);
1865   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
1866     return Builder.CreateFence(AtomicOrdering::Release);
1867   return nullptr;
1868 }
1869 
1870 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
1871                                                     Instruction *Inst,
1872                                                     AtomicOrdering Ord) const {
1873   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
1874     return Builder.CreateFence(AtomicOrdering::Acquire);
1875   return nullptr;
1876 }
1877 
1878 TargetLowering::AtomicExpansionKind
1879 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1880   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
1881   // point operations can't be used in an lr/sc sequence without breaking the
1882   // forward-progress guarantee.
1883   if (AI->isFloatingPointOperation())
1884     return AtomicExpansionKind::CmpXChg;
1885 
1886   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
1887   if (Size == 8 || Size == 16)
1888     return AtomicExpansionKind::MaskedIntrinsic;
1889   return AtomicExpansionKind::None;
1890 }
1891 
1892 static Intrinsic::ID
1893 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
1894   if (XLen == 32) {
1895     switch (BinOp) {
1896     default:
1897       llvm_unreachable("Unexpected AtomicRMW BinOp");
1898     case AtomicRMWInst::Xchg:
1899       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
1900     case AtomicRMWInst::Add:
1901       return Intrinsic::riscv_masked_atomicrmw_add_i32;
1902     case AtomicRMWInst::Sub:
1903       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
1904     case AtomicRMWInst::Nand:
1905       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
1906     case AtomicRMWInst::Max:
1907       return Intrinsic::riscv_masked_atomicrmw_max_i32;
1908     case AtomicRMWInst::Min:
1909       return Intrinsic::riscv_masked_atomicrmw_min_i32;
1910     case AtomicRMWInst::UMax:
1911       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
1912     case AtomicRMWInst::UMin:
1913       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
1914     }
1915   }
1916 
1917   if (XLen == 64) {
1918     switch (BinOp) {
1919     default:
1920       llvm_unreachable("Unexpected AtomicRMW BinOp");
1921     case AtomicRMWInst::Xchg:
1922       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
1923     case AtomicRMWInst::Add:
1924       return Intrinsic::riscv_masked_atomicrmw_add_i64;
1925     case AtomicRMWInst::Sub:
1926       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
1927     case AtomicRMWInst::Nand:
1928       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
1929     case AtomicRMWInst::Max:
1930       return Intrinsic::riscv_masked_atomicrmw_max_i64;
1931     case AtomicRMWInst::Min:
1932       return Intrinsic::riscv_masked_atomicrmw_min_i64;
1933     case AtomicRMWInst::UMax:
1934       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
1935     case AtomicRMWInst::UMin:
1936       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
1937     }
1938   }
1939 
1940   llvm_unreachable("Unexpected XLen\n");
1941 }
1942 
1943 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
1944     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
1945     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
1946   unsigned XLen = Subtarget.getXLen();
1947   Value *Ordering =
1948       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
1949   Type *Tys[] = {AlignedAddr->getType()};
1950   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
1951       AI->getModule(),
1952       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
1953 
1954   if (XLen == 64) {
1955     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
1956     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
1957     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
1958   }
1959 
1960   Value *Result;
1961 
1962   // Must pass the shift amount needed to sign extend the loaded value prior
1963   // to performing a signed comparison for min/max. ShiftAmt is the number of
1964   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
1965   // is the number of bits to left+right shift the value in order to
1966   // sign-extend.
1967   if (AI->getOperation() == AtomicRMWInst::Min ||
1968       AI->getOperation() == AtomicRMWInst::Max) {
1969     const DataLayout &DL = AI->getModule()->getDataLayout();
1970     unsigned ValWidth =
1971         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
1972     Value *SextShamt =
1973         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
1974     Result = Builder.CreateCall(LrwOpScwLoop,
1975                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
1976   } else {
1977     Result =
1978         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
1979   }
1980 
1981   if (XLen == 64)
1982     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
1983   return Result;
1984 }
1985 
1986 TargetLowering::AtomicExpansionKind
1987 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
1988     AtomicCmpXchgInst *CI) const {
1989   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
1990   if (Size == 8 || Size == 16)
1991     return AtomicExpansionKind::MaskedIntrinsic;
1992   return AtomicExpansionKind::None;
1993 }
1994 
1995 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
1996     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
1997     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
1998   unsigned XLen = Subtarget.getXLen();
1999   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
2000   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
2001   if (XLen == 64) {
2002     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
2003     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
2004     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2005     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
2006   }
2007   Type *Tys[] = {AlignedAddr->getType()};
2008   Function *MaskedCmpXchg =
2009       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
2010   Value *Result = Builder.CreateCall(
2011       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
2012   if (XLen == 64)
2013     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2014   return Result;
2015 }
2016