1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "llvm/ADT/SmallSet.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAGISel.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "riscv-lower"
39 
40 STATISTIC(NumTailCalls, "Number of tail calls");
41 
42 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
43                                          const RISCVSubtarget &STI)
44     : TargetLowering(TM), Subtarget(STI) {
45 
46   if (Subtarget.isRV32E())
47     report_fatal_error("Codegen not yet implemented for RV32E");
48 
49   RISCVABI::ABI ABI = Subtarget.getTargetABI();
50   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
51 
52   switch (ABI) {
53   default:
54     report_fatal_error("Don't know how to lower this ABI");
55   case RISCVABI::ABI_ILP32:
56   case RISCVABI::ABI_ILP32F:
57   case RISCVABI::ABI_ILP32D:
58   case RISCVABI::ABI_LP64:
59   case RISCVABI::ABI_LP64F:
60   case RISCVABI::ABI_LP64D:
61     break;
62   }
63 
64   MVT XLenVT = Subtarget.getXLenVT();
65 
66   // Set up the register classes.
67   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
68 
69   if (Subtarget.hasStdExtF())
70     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
71   if (Subtarget.hasStdExtD())
72     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
73 
74   // Compute derived properties from the register classes.
75   computeRegisterProperties(STI.getRegisterInfo());
76 
77   setStackPointerRegisterToSaveRestore(RISCV::X2);
78 
79   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
80     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
81 
82   // TODO: add all necessary setOperationAction calls.
83   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
84 
85   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
86   setOperationAction(ISD::BR_CC, XLenVT, Expand);
87   setOperationAction(ISD::SELECT, XLenVT, Custom);
88   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
89 
90   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
91   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
92 
93   setOperationAction(ISD::VASTART, MVT::Other, Custom);
94   setOperationAction(ISD::VAARG, MVT::Other, Expand);
95   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
96   setOperationAction(ISD::VAEND, MVT::Other, Expand);
97 
98   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
99     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
100 
101   if (Subtarget.is64Bit()) {
102     setOperationAction(ISD::SHL, MVT::i32, Custom);
103     setOperationAction(ISD::SRA, MVT::i32, Custom);
104     setOperationAction(ISD::SRL, MVT::i32, Custom);
105   }
106 
107   if (!Subtarget.hasStdExtM()) {
108     setOperationAction(ISD::MUL, XLenVT, Expand);
109     setOperationAction(ISD::MULHS, XLenVT, Expand);
110     setOperationAction(ISD::MULHU, XLenVT, Expand);
111     setOperationAction(ISD::SDIV, XLenVT, Expand);
112     setOperationAction(ISD::UDIV, XLenVT, Expand);
113     setOperationAction(ISD::SREM, XLenVT, Expand);
114     setOperationAction(ISD::UREM, XLenVT, Expand);
115   }
116 
117   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
118     setOperationAction(ISD::SDIV, MVT::i32, Custom);
119     setOperationAction(ISD::UDIV, MVT::i32, Custom);
120     setOperationAction(ISD::UREM, MVT::i32, Custom);
121   }
122 
123   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
124   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
125   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
126   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
127 
128   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
129   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
130   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
131 
132   setOperationAction(ISD::ROTL, XLenVT, Expand);
133   setOperationAction(ISD::ROTR, XLenVT, Expand);
134   setOperationAction(ISD::BSWAP, XLenVT, Expand);
135   setOperationAction(ISD::CTTZ, XLenVT, Expand);
136   setOperationAction(ISD::CTLZ, XLenVT, Expand);
137   setOperationAction(ISD::CTPOP, XLenVT, Expand);
138 
139   ISD::CondCode FPCCToExtend[] = {
140       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
141       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
142       ISD::SETGE,  ISD::SETNE};
143 
144   ISD::NodeType FPOpToExtend[] = {
145       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
146 
147   if (Subtarget.hasStdExtF()) {
148     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
149     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
150     for (auto CC : FPCCToExtend)
151       setCondCodeAction(CC, MVT::f32, Expand);
152     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
153     setOperationAction(ISD::SELECT, MVT::f32, Custom);
154     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
155     for (auto Op : FPOpToExtend)
156       setOperationAction(Op, MVT::f32, Expand);
157   }
158 
159   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
160     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
161 
162   if (Subtarget.hasStdExtD()) {
163     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
164     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
165     for (auto CC : FPCCToExtend)
166       setCondCodeAction(CC, MVT::f64, Expand);
167     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
168     setOperationAction(ISD::SELECT, MVT::f64, Custom);
169     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
170     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
171     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
172     for (auto Op : FPOpToExtend)
173       setOperationAction(Op, MVT::f64, Expand);
174   }
175 
176   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
177   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
178   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
179 
180   if (Subtarget.hasStdExtA()) {
181     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
182     setMinCmpXchgSizeInBits(32);
183   } else {
184     setMaxAtomicSizeInBitsSupported(0);
185   }
186 
187   setBooleanContents(ZeroOrOneBooleanContent);
188 
189   // Function alignments (log2).
190   unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2;
191   setMinFunctionAlignment(FunctionAlignment);
192   setPrefFunctionAlignment(FunctionAlignment);
193 
194   // Effectively disable jump table generation.
195   setMinimumJumpTableEntries(INT_MAX);
196 }
197 
198 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
199                                             EVT VT) const {
200   if (!VT.isVector())
201     return getPointerTy(DL);
202   return VT.changeVectorElementTypeToInteger();
203 }
204 
205 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
206                                              const CallInst &I,
207                                              MachineFunction &MF,
208                                              unsigned Intrinsic) const {
209   switch (Intrinsic) {
210   default:
211     return false;
212   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
213   case Intrinsic::riscv_masked_atomicrmw_add_i32:
214   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
215   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
216   case Intrinsic::riscv_masked_atomicrmw_max_i32:
217   case Intrinsic::riscv_masked_atomicrmw_min_i32:
218   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
219   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
220   case Intrinsic::riscv_masked_cmpxchg_i32:
221     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
222     Info.opc = ISD::INTRINSIC_W_CHAIN;
223     Info.memVT = MVT::getVT(PtrTy->getElementType());
224     Info.ptrVal = I.getArgOperand(0);
225     Info.offset = 0;
226     Info.align = 4;
227     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
228                  MachineMemOperand::MOVolatile;
229     return true;
230   }
231 }
232 
233 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
234                                                 const AddrMode &AM, Type *Ty,
235                                                 unsigned AS,
236                                                 Instruction *I) const {
237   // No global is ever allowed as a base.
238   if (AM.BaseGV)
239     return false;
240 
241   // Require a 12-bit signed offset.
242   if (!isInt<12>(AM.BaseOffs))
243     return false;
244 
245   switch (AM.Scale) {
246   case 0: // "r+i" or just "i", depending on HasBaseReg.
247     break;
248   case 1:
249     if (!AM.HasBaseReg) // allow "r+i".
250       break;
251     return false; // disallow "r+r" or "r+r+i".
252   default:
253     return false;
254   }
255 
256   return true;
257 }
258 
259 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
260   return isInt<12>(Imm);
261 }
262 
263 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
264   return isInt<12>(Imm);
265 }
266 
267 // On RV32, 64-bit integers are split into their high and low parts and held
268 // in two different registers, so the trunc is free since the low register can
269 // just be used.
270 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
271   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
272     return false;
273   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
274   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
275   return (SrcBits == 64 && DestBits == 32);
276 }
277 
278 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
279   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
280       !SrcVT.isInteger() || !DstVT.isInteger())
281     return false;
282   unsigned SrcBits = SrcVT.getSizeInBits();
283   unsigned DestBits = DstVT.getSizeInBits();
284   return (SrcBits == 64 && DestBits == 32);
285 }
286 
287 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
288   // Zexts are free if they can be combined with a load.
289   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
290     EVT MemVT = LD->getMemoryVT();
291     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
292          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
293         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
294          LD->getExtensionType() == ISD::ZEXTLOAD))
295       return true;
296   }
297 
298   return TargetLowering::isZExtFree(Val, VT2);
299 }
300 
301 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
302   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
303 }
304 
305 // Changes the condition code and swaps operands if necessary, so the SetCC
306 // operation matches one of the comparisons supported directly in the RISC-V
307 // ISA.
308 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
309   switch (CC) {
310   default:
311     break;
312   case ISD::SETGT:
313   case ISD::SETLE:
314   case ISD::SETUGT:
315   case ISD::SETULE:
316     CC = ISD::getSetCCSwappedOperands(CC);
317     std::swap(LHS, RHS);
318     break;
319   }
320 }
321 
322 // Return the RISC-V branch opcode that matches the given DAG integer
323 // condition code. The CondCode must be one of those supported by the RISC-V
324 // ISA (see normaliseSetCC).
325 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
326   switch (CC) {
327   default:
328     llvm_unreachable("Unsupported CondCode");
329   case ISD::SETEQ:
330     return RISCV::BEQ;
331   case ISD::SETNE:
332     return RISCV::BNE;
333   case ISD::SETLT:
334     return RISCV::BLT;
335   case ISD::SETGE:
336     return RISCV::BGE;
337   case ISD::SETULT:
338     return RISCV::BLTU;
339   case ISD::SETUGE:
340     return RISCV::BGEU;
341   }
342 }
343 
344 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
345                                             SelectionDAG &DAG) const {
346   switch (Op.getOpcode()) {
347   default:
348     report_fatal_error("unimplemented operand");
349   case ISD::GlobalAddress:
350     return lowerGlobalAddress(Op, DAG);
351   case ISD::BlockAddress:
352     return lowerBlockAddress(Op, DAG);
353   case ISD::ConstantPool:
354     return lowerConstantPool(Op, DAG);
355   case ISD::SELECT:
356     return lowerSELECT(Op, DAG);
357   case ISD::VASTART:
358     return lowerVASTART(Op, DAG);
359   case ISD::FRAMEADDR:
360     return lowerFRAMEADDR(Op, DAG);
361   case ISD::RETURNADDR:
362     return lowerRETURNADDR(Op, DAG);
363   case ISD::SHL_PARTS:
364     return lowerShiftLeftParts(Op, DAG);
365   case ISD::SRA_PARTS:
366     return lowerShiftRightParts(Op, DAG, true);
367   case ISD::SRL_PARTS:
368     return lowerShiftRightParts(Op, DAG, false);
369   case ISD::BITCAST: {
370     assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
371            "Unexpected custom legalisation");
372     SDLoc DL(Op);
373     SDValue Op0 = Op.getOperand(0);
374     if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
375       return SDValue();
376     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
377     SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
378     return FPConv;
379   }
380   }
381 }
382 
383 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
384                              SelectionDAG &DAG, unsigned Flags) {
385   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
386 }
387 
388 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
389                              SelectionDAG &DAG, unsigned Flags) {
390   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
391                                    Flags);
392 }
393 
394 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
395                              SelectionDAG &DAG, unsigned Flags) {
396   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
397                                    N->getOffset(), Flags);
398 }
399 
400 template <class NodeTy>
401 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const {
402   SDLoc DL(N);
403   EVT Ty = getPointerTy(DAG.getDataLayout());
404 
405   switch (getTargetMachine().getCodeModel()) {
406   default:
407     report_fatal_error("Unsupported code model for lowering");
408   case CodeModel::Small: {
409     // Generate a sequence for accessing addresses within the first 2 GiB of
410     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
411     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
412     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
413     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
414     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
415   }
416   case CodeModel::Medium: {
417     // Generate a sequence for accessing addresses within any 2GiB range within
418     // the address space. This generates the pattern (PseudoLLA sym), which
419     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
420     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
421     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
422   }
423   }
424 }
425 
426 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
427                                                 SelectionDAG &DAG) const {
428   SDLoc DL(Op);
429   EVT Ty = Op.getValueType();
430   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
431   int64_t Offset = N->getOffset();
432   MVT XLenVT = Subtarget.getXLenVT();
433 
434   if (isPositionIndependent())
435     report_fatal_error("Unable to lowerGlobalAddress");
436 
437   SDValue Addr = getAddr(N, DAG);
438 
439   // In order to maximise the opportunity for common subexpression elimination,
440   // emit a separate ADD node for the global address offset instead of folding
441   // it in the global address node. Later peephole optimisations may choose to
442   // fold it back in when profitable.
443   if (Offset != 0)
444     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
445                        DAG.getConstant(Offset, DL, XLenVT));
446   return Addr;
447 }
448 
449 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
450                                                SelectionDAG &DAG) const {
451   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
452 
453   if (isPositionIndependent())
454     report_fatal_error("Unable to lowerBlockAddress");
455 
456   return getAddr(N, DAG);
457 }
458 
459 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
460                                                SelectionDAG &DAG) const {
461   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
462 
463   if (isPositionIndependent())
464     report_fatal_error("Unable to lowerConstantPool");
465 
466   return getAddr(N, DAG);
467 }
468 
469 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
470   SDValue CondV = Op.getOperand(0);
471   SDValue TrueV = Op.getOperand(1);
472   SDValue FalseV = Op.getOperand(2);
473   SDLoc DL(Op);
474   MVT XLenVT = Subtarget.getXLenVT();
475 
476   // If the result type is XLenVT and CondV is the output of a SETCC node
477   // which also operated on XLenVT inputs, then merge the SETCC node into the
478   // lowered RISCVISD::SELECT_CC to take advantage of the integer
479   // compare+branch instructions. i.e.:
480   // (select (setcc lhs, rhs, cc), truev, falsev)
481   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
482   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
483       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
484     SDValue LHS = CondV.getOperand(0);
485     SDValue RHS = CondV.getOperand(1);
486     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
487     ISD::CondCode CCVal = CC->get();
488 
489     normaliseSetCC(LHS, RHS, CCVal);
490 
491     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
492     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
493     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
494     return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
495   }
496 
497   // Otherwise:
498   // (select condv, truev, falsev)
499   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
500   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
501   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
502 
503   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
504   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
505 
506   return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
507 }
508 
509 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
510   MachineFunction &MF = DAG.getMachineFunction();
511   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
512 
513   SDLoc DL(Op);
514   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
515                                  getPointerTy(MF.getDataLayout()));
516 
517   // vastart just stores the address of the VarArgsFrameIndex slot into the
518   // memory location argument.
519   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
520   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
521                       MachinePointerInfo(SV));
522 }
523 
524 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
525                                             SelectionDAG &DAG) const {
526   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
527   MachineFunction &MF = DAG.getMachineFunction();
528   MachineFrameInfo &MFI = MF.getFrameInfo();
529   MFI.setFrameAddressIsTaken(true);
530   unsigned FrameReg = RI.getFrameRegister(MF);
531   int XLenInBytes = Subtarget.getXLen() / 8;
532 
533   EVT VT = Op.getValueType();
534   SDLoc DL(Op);
535   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
536   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
537   while (Depth--) {
538     int Offset = -(XLenInBytes * 2);
539     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
540                               DAG.getIntPtrConstant(Offset, DL));
541     FrameAddr =
542         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
543   }
544   return FrameAddr;
545 }
546 
547 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
548                                              SelectionDAG &DAG) const {
549   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
550   MachineFunction &MF = DAG.getMachineFunction();
551   MachineFrameInfo &MFI = MF.getFrameInfo();
552   MFI.setReturnAddressIsTaken(true);
553   MVT XLenVT = Subtarget.getXLenVT();
554   int XLenInBytes = Subtarget.getXLen() / 8;
555 
556   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
557     return SDValue();
558 
559   EVT VT = Op.getValueType();
560   SDLoc DL(Op);
561   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
562   if (Depth) {
563     int Off = -XLenInBytes;
564     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
565     SDValue Offset = DAG.getConstant(Off, DL, VT);
566     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
567                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
568                        MachinePointerInfo());
569   }
570 
571   // Return the value of the return address register, marking it an implicit
572   // live-in.
573   unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
574   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
575 }
576 
577 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
578                                                  SelectionDAG &DAG) const {
579   SDLoc DL(Op);
580   SDValue Lo = Op.getOperand(0);
581   SDValue Hi = Op.getOperand(1);
582   SDValue Shamt = Op.getOperand(2);
583   EVT VT = Lo.getValueType();
584 
585   // if Shamt-XLEN < 0: // Shamt < XLEN
586   //   Lo = Lo << Shamt
587   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
588   // else:
589   //   Lo = 0
590   //   Hi = Lo << (Shamt-XLEN)
591 
592   SDValue Zero = DAG.getConstant(0, DL, VT);
593   SDValue One = DAG.getConstant(1, DL, VT);
594   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
595   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
596   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
597   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
598 
599   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
600   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
601   SDValue ShiftRightLo =
602       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
603   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
604   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
605   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
606 
607   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
608 
609   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
610   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
611 
612   SDValue Parts[2] = {Lo, Hi};
613   return DAG.getMergeValues(Parts, DL);
614 }
615 
616 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
617                                                   bool IsSRA) const {
618   SDLoc DL(Op);
619   SDValue Lo = Op.getOperand(0);
620   SDValue Hi = Op.getOperand(1);
621   SDValue Shamt = Op.getOperand(2);
622   EVT VT = Lo.getValueType();
623 
624   // SRA expansion:
625   //   if Shamt-XLEN < 0: // Shamt < XLEN
626   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
627   //     Hi = Hi >>s Shamt
628   //   else:
629   //     Lo = Hi >>s (Shamt-XLEN);
630   //     Hi = Hi >>s (XLEN-1)
631   //
632   // SRL expansion:
633   //   if Shamt-XLEN < 0: // Shamt < XLEN
634   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
635   //     Hi = Hi >>u Shamt
636   //   else:
637   //     Lo = Hi >>u (Shamt-XLEN);
638   //     Hi = 0;
639 
640   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
641 
642   SDValue Zero = DAG.getConstant(0, DL, VT);
643   SDValue One = DAG.getConstant(1, DL, VT);
644   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
645   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
646   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
647   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
648 
649   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
650   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
651   SDValue ShiftLeftHi =
652       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
653   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
654   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
655   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
656   SDValue HiFalse =
657       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
658 
659   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
660 
661   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
662   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
663 
664   SDValue Parts[2] = {Lo, Hi};
665   return DAG.getMergeValues(Parts, DL);
666 }
667 
668 // Returns the opcode of the target-specific SDNode that implements the 32-bit
669 // form of the given Opcode.
670 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
671   switch (Opcode) {
672   default:
673     llvm_unreachable("Unexpected opcode");
674   case ISD::SHL:
675     return RISCVISD::SLLW;
676   case ISD::SRA:
677     return RISCVISD::SRAW;
678   case ISD::SRL:
679     return RISCVISD::SRLW;
680   case ISD::SDIV:
681     return RISCVISD::DIVW;
682   case ISD::UDIV:
683     return RISCVISD::DIVUW;
684   case ISD::UREM:
685     return RISCVISD::REMUW;
686   }
687 }
688 
689 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
690 // Because i32 isn't a legal type for RV64, these operations would otherwise
691 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
692 // later one because the fact the operation was originally of type i32 is
693 // lost.
694 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
695   SDLoc DL(N);
696   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
697   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
698   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
699   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
700   // ReplaceNodeResults requires we maintain the same type for the return value.
701   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
702 }
703 
704 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
705                                              SmallVectorImpl<SDValue> &Results,
706                                              SelectionDAG &DAG) const {
707   SDLoc DL(N);
708   switch (N->getOpcode()) {
709   default:
710     llvm_unreachable("Don't know how to custom type legalize this operation!");
711   case ISD::SHL:
712   case ISD::SRA:
713   case ISD::SRL:
714     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
715            "Unexpected custom legalisation");
716     if (N->getOperand(1).getOpcode() == ISD::Constant)
717       return;
718     Results.push_back(customLegalizeToWOp(N, DAG));
719     break;
720   case ISD::SDIV:
721   case ISD::UDIV:
722   case ISD::UREM:
723     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
724            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
725     if (N->getOperand(0).getOpcode() == ISD::Constant ||
726         N->getOperand(1).getOpcode() == ISD::Constant)
727       return;
728     Results.push_back(customLegalizeToWOp(N, DAG));
729     break;
730   case ISD::BITCAST: {
731     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
732            Subtarget.hasStdExtF() && "Unexpected custom legalisation");
733     SDLoc DL(N);
734     SDValue Op0 = N->getOperand(0);
735     if (Op0.getValueType() != MVT::f32)
736       return;
737     SDValue FPConv =
738         DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
739     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
740     break;
741   }
742   }
743 }
744 
745 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
746                                                DAGCombinerInfo &DCI) const {
747   SelectionDAG &DAG = DCI.DAG;
748 
749   switch (N->getOpcode()) {
750   default:
751     break;
752   case RISCVISD::SplitF64: {
753     SDValue Op0 = N->getOperand(0);
754     // If the input to SplitF64 is just BuildPairF64 then the operation is
755     // redundant. Instead, use BuildPairF64's operands directly.
756     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
757       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
758 
759     SDLoc DL(N);
760 
761     // It's cheaper to materialise two 32-bit integers than to load a double
762     // from the constant pool and transfer it to integer registers through the
763     // stack.
764     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
765       APInt V = C->getValueAPF().bitcastToAPInt();
766       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
767       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
768       return DCI.CombineTo(N, Lo, Hi);
769     }
770 
771     // This is a target-specific version of a DAGCombine performed in
772     // DAGCombiner::visitBITCAST. It performs the equivalent of:
773     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
774     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
775     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
776         !Op0.getNode()->hasOneUse())
777       break;
778     SDValue NewSplitF64 =
779         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
780                     Op0.getOperand(0));
781     SDValue Lo = NewSplitF64.getValue(0);
782     SDValue Hi = NewSplitF64.getValue(1);
783     APInt SignBit = APInt::getSignMask(32);
784     if (Op0.getOpcode() == ISD::FNEG) {
785       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
786                                   DAG.getConstant(SignBit, DL, MVT::i32));
787       return DCI.CombineTo(N, Lo, NewHi);
788     }
789     assert(Op0.getOpcode() == ISD::FABS);
790     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
791                                 DAG.getConstant(~SignBit, DL, MVT::i32));
792     return DCI.CombineTo(N, Lo, NewHi);
793   }
794   case RISCVISD::SLLW:
795   case RISCVISD::SRAW:
796   case RISCVISD::SRLW: {
797     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
798     SDValue LHS = N->getOperand(0);
799     SDValue RHS = N->getOperand(1);
800     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
801     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
802     if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
803         (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
804       return SDValue();
805     break;
806   }
807   case RISCVISD::FMV_X_ANYEXTW_RV64: {
808     SDLoc DL(N);
809     SDValue Op0 = N->getOperand(0);
810     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
811     // conversion is unnecessary and can be replaced with an ANY_EXTEND
812     // of the FMV_W_X_RV64 operand.
813     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
814       SDValue AExtOp =
815           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
816       return DCI.CombineTo(N, AExtOp);
817     }
818 
819     // This is a target-specific version of a DAGCombine performed in
820     // DAGCombiner::visitBITCAST. It performs the equivalent of:
821     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
822     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
823     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
824         !Op0.getNode()->hasOneUse())
825       break;
826     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
827                                  Op0.getOperand(0));
828     APInt SignBit = APInt::getSignMask(32).sext(64);
829     if (Op0.getOpcode() == ISD::FNEG) {
830       return DCI.CombineTo(N,
831                            DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
832                                        DAG.getConstant(SignBit, DL, MVT::i64)));
833     }
834     assert(Op0.getOpcode() == ISD::FABS);
835     return DCI.CombineTo(N,
836                          DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
837                                      DAG.getConstant(~SignBit, DL, MVT::i64)));
838   }
839   }
840 
841   return SDValue();
842 }
843 
844 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
845     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
846     unsigned Depth) const {
847   switch (Op.getOpcode()) {
848   default:
849     break;
850   case RISCVISD::SLLW:
851   case RISCVISD::SRAW:
852   case RISCVISD::SRLW:
853   case RISCVISD::DIVW:
854   case RISCVISD::DIVUW:
855   case RISCVISD::REMUW:
856     // TODO: As the result is sign-extended, this is conservatively correct. A
857     // more precise answer could be calculated for SRAW depending on known
858     // bits in the shift amount.
859     return 33;
860   }
861 
862   return 1;
863 }
864 
865 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
866                                              MachineBasicBlock *BB) {
867   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
868 
869   MachineFunction &MF = *BB->getParent();
870   DebugLoc DL = MI.getDebugLoc();
871   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
872   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
873   unsigned LoReg = MI.getOperand(0).getReg();
874   unsigned HiReg = MI.getOperand(1).getReg();
875   unsigned SrcReg = MI.getOperand(2).getReg();
876   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
877   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
878 
879   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
880                           RI);
881   MachineMemOperand *MMO =
882       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
883                               MachineMemOperand::MOLoad, 8, 8);
884   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
885       .addFrameIndex(FI)
886       .addImm(0)
887       .addMemOperand(MMO);
888   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
889       .addFrameIndex(FI)
890       .addImm(4)
891       .addMemOperand(MMO);
892   MI.eraseFromParent(); // The pseudo instruction is gone now.
893   return BB;
894 }
895 
896 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
897                                                  MachineBasicBlock *BB) {
898   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
899          "Unexpected instruction");
900 
901   MachineFunction &MF = *BB->getParent();
902   DebugLoc DL = MI.getDebugLoc();
903   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
904   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
905   unsigned DstReg = MI.getOperand(0).getReg();
906   unsigned LoReg = MI.getOperand(1).getReg();
907   unsigned HiReg = MI.getOperand(2).getReg();
908   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
909   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
910 
911   MachineMemOperand *MMO =
912       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
913                               MachineMemOperand::MOStore, 8, 8);
914   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
915       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
916       .addFrameIndex(FI)
917       .addImm(0)
918       .addMemOperand(MMO);
919   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
920       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
921       .addFrameIndex(FI)
922       .addImm(4)
923       .addMemOperand(MMO);
924   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
925   MI.eraseFromParent(); // The pseudo instruction is gone now.
926   return BB;
927 }
928 
929 static bool isSelectPseudo(MachineInstr &MI) {
930   switch (MI.getOpcode()) {
931   default:
932     return false;
933   case RISCV::Select_GPR_Using_CC_GPR:
934   case RISCV::Select_FPR32_Using_CC_GPR:
935   case RISCV::Select_FPR64_Using_CC_GPR:
936     return true;
937   }
938 }
939 
940 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
941                                            MachineBasicBlock *BB) {
942   // To "insert" Select_* instructions, we actually have to insert the triangle
943   // control-flow pattern.  The incoming instructions know the destination vreg
944   // to set, the condition code register to branch on, the true/false values to
945   // select between, and the condcode to use to select the appropriate branch.
946   //
947   // We produce the following control flow:
948   //     HeadMBB
949   //     |  \
950   //     |  IfFalseMBB
951   //     | /
952   //    TailMBB
953   //
954   // When we find a sequence of selects we attempt to optimize their emission
955   // by sharing the control flow. Currently we only handle cases where we have
956   // multiple selects with the exact same condition (same LHS, RHS and CC).
957   // The selects may be interleaved with other instructions if the other
958   // instructions meet some requirements we deem safe:
959   // - They are debug instructions. Otherwise,
960   // - They do not have side-effects, do not access memory and their inputs do
961   //   not depend on the results of the select pseudo-instructions.
962   // The TrueV/FalseV operands of the selects cannot depend on the result of
963   // previous selects in the sequence.
964   // These conditions could be further relaxed. See the X86 target for a
965   // related approach and more information.
966   unsigned LHS = MI.getOperand(1).getReg();
967   unsigned RHS = MI.getOperand(2).getReg();
968   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
969 
970   SmallVector<MachineInstr *, 4> SelectDebugValues;
971   SmallSet<unsigned, 4> SelectDests;
972   SelectDests.insert(MI.getOperand(0).getReg());
973 
974   MachineInstr *LastSelectPseudo = &MI;
975 
976   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
977        SequenceMBBI != E; ++SequenceMBBI) {
978     if (SequenceMBBI->isDebugInstr())
979       continue;
980     else if (isSelectPseudo(*SequenceMBBI)) {
981       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
982           SequenceMBBI->getOperand(2).getReg() != RHS ||
983           SequenceMBBI->getOperand(3).getImm() != CC ||
984           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
985           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
986         break;
987       LastSelectPseudo = &*SequenceMBBI;
988       SequenceMBBI->collectDebugValues(SelectDebugValues);
989       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
990     } else {
991       if (SequenceMBBI->hasUnmodeledSideEffects() ||
992           SequenceMBBI->mayLoadOrStore())
993         break;
994       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
995             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
996           }))
997         break;
998     }
999   }
1000 
1001   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1002   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1003   DebugLoc DL = MI.getDebugLoc();
1004   MachineFunction::iterator I = ++BB->getIterator();
1005 
1006   MachineBasicBlock *HeadMBB = BB;
1007   MachineFunction *F = BB->getParent();
1008   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
1009   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
1010 
1011   F->insert(I, IfFalseMBB);
1012   F->insert(I, TailMBB);
1013 
1014   // Transfer debug instructions associated with the selects to TailMBB.
1015   for (MachineInstr *DebugInstr : SelectDebugValues) {
1016     TailMBB->push_back(DebugInstr->removeFromParent());
1017   }
1018 
1019   // Move all instructions after the sequence to TailMBB.
1020   TailMBB->splice(TailMBB->end(), HeadMBB,
1021                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
1022   // Update machine-CFG edges by transferring all successors of the current
1023   // block to the new block which will contain the Phi nodes for the selects.
1024   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
1025   // Set the successors for HeadMBB.
1026   HeadMBB->addSuccessor(IfFalseMBB);
1027   HeadMBB->addSuccessor(TailMBB);
1028 
1029   // Insert appropriate branch.
1030   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
1031 
1032   BuildMI(HeadMBB, DL, TII.get(Opcode))
1033     .addReg(LHS)
1034     .addReg(RHS)
1035     .addMBB(TailMBB);
1036 
1037   // IfFalseMBB just falls through to TailMBB.
1038   IfFalseMBB->addSuccessor(TailMBB);
1039 
1040   // Create PHIs for all of the select pseudo-instructions.
1041   auto SelectMBBI = MI.getIterator();
1042   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
1043   auto InsertionPoint = TailMBB->begin();
1044   while (SelectMBBI != SelectEnd) {
1045     auto Next = std::next(SelectMBBI);
1046     if (isSelectPseudo(*SelectMBBI)) {
1047       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
1048       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
1049               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
1050           .addReg(SelectMBBI->getOperand(4).getReg())
1051           .addMBB(HeadMBB)
1052           .addReg(SelectMBBI->getOperand(5).getReg())
1053           .addMBB(IfFalseMBB);
1054       SelectMBBI->eraseFromParent();
1055     }
1056     SelectMBBI = Next;
1057   }
1058 
1059   return TailMBB;
1060 }
1061 
1062 MachineBasicBlock *
1063 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1064                                                  MachineBasicBlock *BB) const {
1065   switch (MI.getOpcode()) {
1066   default:
1067     llvm_unreachable("Unexpected instr type to insert");
1068   case RISCV::Select_GPR_Using_CC_GPR:
1069   case RISCV::Select_FPR32_Using_CC_GPR:
1070   case RISCV::Select_FPR64_Using_CC_GPR:
1071     return emitSelectPseudo(MI, BB);
1072   case RISCV::BuildPairF64Pseudo:
1073     return emitBuildPairF64Pseudo(MI, BB);
1074   case RISCV::SplitF64Pseudo:
1075     return emitSplitF64Pseudo(MI, BB);
1076   }
1077 }
1078 
1079 // Calling Convention Implementation.
1080 // The expectations for frontend ABI lowering vary from target to target.
1081 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
1082 // details, but this is a longer term goal. For now, we simply try to keep the
1083 // role of the frontend as simple and well-defined as possible. The rules can
1084 // be summarised as:
1085 // * Never split up large scalar arguments. We handle them here.
1086 // * If a hardfloat calling convention is being used, and the struct may be
1087 // passed in a pair of registers (fp+fp, int+fp), and both registers are
1088 // available, then pass as two separate arguments. If either the GPRs or FPRs
1089 // are exhausted, then pass according to the rule below.
1090 // * If a struct could never be passed in registers or directly in a stack
1091 // slot (as it is larger than 2*XLEN and the floating point rules don't
1092 // apply), then pass it using a pointer with the byval attribute.
1093 // * If a struct is less than 2*XLEN, then coerce to either a two-element
1094 // word-sized array or a 2*XLEN scalar (depending on alignment).
1095 // * The frontend can determine whether a struct is returned by reference or
1096 // not based on its size and fields. If it will be returned by reference, the
1097 // frontend must modify the prototype so a pointer with the sret annotation is
1098 // passed as the first argument. This is not necessary for large scalar
1099 // returns.
1100 // * Struct return values and varargs should be coerced to structs containing
1101 // register-size fields in the same situations they would be for fixed
1102 // arguments.
1103 
1104 static const MCPhysReg ArgGPRs[] = {
1105   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
1106   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
1107 };
1108 static const MCPhysReg ArgFPR32s[] = {
1109   RISCV::F10_32, RISCV::F11_32, RISCV::F12_32, RISCV::F13_32,
1110   RISCV::F14_32, RISCV::F15_32, RISCV::F16_32, RISCV::F17_32
1111 };
1112 static const MCPhysReg ArgFPR64s[] = {
1113   RISCV::F10_64, RISCV::F11_64, RISCV::F12_64, RISCV::F13_64,
1114   RISCV::F14_64, RISCV::F15_64, RISCV::F16_64, RISCV::F17_64
1115 };
1116 
1117 // Pass a 2*XLEN argument that has been split into two XLEN values through
1118 // registers or the stack as necessary.
1119 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
1120                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
1121                                 MVT ValVT2, MVT LocVT2,
1122                                 ISD::ArgFlagsTy ArgFlags2) {
1123   unsigned XLenInBytes = XLen / 8;
1124   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
1125     // At least one half can be passed via register.
1126     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
1127                                      VA1.getLocVT(), CCValAssign::Full));
1128   } else {
1129     // Both halves must be passed on the stack, with proper alignment.
1130     unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
1131     State.addLoc(
1132         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
1133                             State.AllocateStack(XLenInBytes, StackAlign),
1134                             VA1.getLocVT(), CCValAssign::Full));
1135     State.addLoc(CCValAssign::getMem(
1136         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
1137         CCValAssign::Full));
1138     return false;
1139   }
1140 
1141   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
1142     // The second half can also be passed via register.
1143     State.addLoc(
1144         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
1145   } else {
1146     // The second half is passed via the stack, without additional alignment.
1147     State.addLoc(CCValAssign::getMem(
1148         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
1149         CCValAssign::Full));
1150   }
1151 
1152   return false;
1153 }
1154 
1155 // Implements the RISC-V calling convention. Returns true upon failure.
1156 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1157                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1158                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1159                      bool IsRet, Type *OrigTy) {
1160   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
1161   assert(XLen == 32 || XLen == 64);
1162   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
1163 
1164   // Any return value split in to more than two values can't be returned
1165   // directly.
1166   if (IsRet && ValNo > 1)
1167     return true;
1168 
1169   // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a
1170   // variadic argument, or if no F32 argument registers are available.
1171   bool UseGPRForF32 = true;
1172   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
1173   // variadic argument, or if no F64 argument registers are available.
1174   bool UseGPRForF64 = true;
1175 
1176   switch (ABI) {
1177   default:
1178     llvm_unreachable("Unexpected ABI");
1179   case RISCVABI::ABI_ILP32:
1180   case RISCVABI::ABI_LP64:
1181     break;
1182   case RISCVABI::ABI_ILP32F:
1183   case RISCVABI::ABI_LP64F:
1184     UseGPRForF32 = !IsFixed;
1185     break;
1186   case RISCVABI::ABI_ILP32D:
1187   case RISCVABI::ABI_LP64D:
1188     UseGPRForF32 = !IsFixed;
1189     UseGPRForF64 = !IsFixed;
1190     break;
1191   }
1192 
1193   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s))
1194     UseGPRForF32 = true;
1195   if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s))
1196     UseGPRForF64 = true;
1197 
1198   // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local
1199   // variables rather than directly checking against the target ABI.
1200 
1201   if (UseGPRForF32 && ValVT == MVT::f32) {
1202     LocVT = XLenVT;
1203     LocInfo = CCValAssign::BCvt;
1204   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
1205     LocVT = MVT::i64;
1206     LocInfo = CCValAssign::BCvt;
1207   }
1208 
1209   // If this is a variadic argument, the RISC-V calling convention requires
1210   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
1211   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
1212   // be used regardless of whether the original argument was split during
1213   // legalisation or not. The argument will not be passed by registers if the
1214   // original type is larger than 2*XLEN, so the register alignment rule does
1215   // not apply.
1216   unsigned TwoXLenInBytes = (2 * XLen) / 8;
1217   if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
1218       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
1219     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
1220     // Skip 'odd' register if necessary.
1221     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
1222       State.AllocateReg(ArgGPRs);
1223   }
1224 
1225   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
1226   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
1227       State.getPendingArgFlags();
1228 
1229   assert(PendingLocs.size() == PendingArgFlags.size() &&
1230          "PendingLocs and PendingArgFlags out of sync");
1231 
1232   // Handle passing f64 on RV32D with a soft float ABI or when floating point
1233   // registers are exhausted.
1234   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
1235     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
1236            "Can't lower f64 if it is split");
1237     // Depending on available argument GPRS, f64 may be passed in a pair of
1238     // GPRs, split between a GPR and the stack, or passed completely on the
1239     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
1240     // cases.
1241     unsigned Reg = State.AllocateReg(ArgGPRs);
1242     LocVT = MVT::i32;
1243     if (!Reg) {
1244       unsigned StackOffset = State.AllocateStack(8, 8);
1245       State.addLoc(
1246           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1247       return false;
1248     }
1249     if (!State.AllocateReg(ArgGPRs))
1250       State.AllocateStack(4, 4);
1251     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1252     return false;
1253   }
1254 
1255   // Split arguments might be passed indirectly, so keep track of the pending
1256   // values.
1257   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
1258     LocVT = XLenVT;
1259     LocInfo = CCValAssign::Indirect;
1260     PendingLocs.push_back(
1261         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
1262     PendingArgFlags.push_back(ArgFlags);
1263     if (!ArgFlags.isSplitEnd()) {
1264       return false;
1265     }
1266   }
1267 
1268   // If the split argument only had two elements, it should be passed directly
1269   // in registers or on the stack.
1270   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
1271     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
1272     // Apply the normal calling convention rules to the first half of the
1273     // split argument.
1274     CCValAssign VA = PendingLocs[0];
1275     ISD::ArgFlagsTy AF = PendingArgFlags[0];
1276     PendingLocs.clear();
1277     PendingArgFlags.clear();
1278     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
1279                                ArgFlags);
1280   }
1281 
1282   // Allocate to a register if possible, or else a stack slot.
1283   unsigned Reg;
1284   if (ValVT == MVT::f32 && !UseGPRForF32)
1285     Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s);
1286   else if (ValVT == MVT::f64 && !UseGPRForF64)
1287     Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s);
1288   else
1289     Reg = State.AllocateReg(ArgGPRs);
1290   unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
1291 
1292   // If we reach this point and PendingLocs is non-empty, we must be at the
1293   // end of a split argument that must be passed indirectly.
1294   if (!PendingLocs.empty()) {
1295     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
1296     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
1297 
1298     for (auto &It : PendingLocs) {
1299       if (Reg)
1300         It.convertToReg(Reg);
1301       else
1302         It.convertToMem(StackOffset);
1303       State.addLoc(It);
1304     }
1305     PendingLocs.clear();
1306     PendingArgFlags.clear();
1307     return false;
1308   }
1309 
1310   assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) &&
1311          "Expected an XLenVT at this stage");
1312 
1313   if (Reg) {
1314     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1315     return false;
1316   }
1317 
1318   // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
1319   if (ValVT == MVT::f32 || ValVT == MVT::f64) {
1320     LocVT = ValVT;
1321     LocInfo = CCValAssign::Full;
1322   }
1323   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1324   return false;
1325 }
1326 
1327 void RISCVTargetLowering::analyzeInputArgs(
1328     MachineFunction &MF, CCState &CCInfo,
1329     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
1330   unsigned NumArgs = Ins.size();
1331   FunctionType *FType = MF.getFunction().getFunctionType();
1332 
1333   for (unsigned i = 0; i != NumArgs; ++i) {
1334     MVT ArgVT = Ins[i].VT;
1335     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
1336 
1337     Type *ArgTy = nullptr;
1338     if (IsRet)
1339       ArgTy = FType->getReturnType();
1340     else if (Ins[i].isOrigArg())
1341       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
1342 
1343     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
1344     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
1345                  ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
1346       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
1347                         << EVT(ArgVT).getEVTString() << '\n');
1348       llvm_unreachable(nullptr);
1349     }
1350   }
1351 }
1352 
1353 void RISCVTargetLowering::analyzeOutputArgs(
1354     MachineFunction &MF, CCState &CCInfo,
1355     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
1356     CallLoweringInfo *CLI) const {
1357   unsigned NumArgs = Outs.size();
1358 
1359   for (unsigned i = 0; i != NumArgs; i++) {
1360     MVT ArgVT = Outs[i].VT;
1361     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1362     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
1363 
1364     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
1365     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
1366                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
1367       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
1368                         << EVT(ArgVT).getEVTString() << "\n");
1369       llvm_unreachable(nullptr);
1370     }
1371   }
1372 }
1373 
1374 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
1375 // values.
1376 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
1377                                    const CCValAssign &VA, const SDLoc &DL) {
1378   switch (VA.getLocInfo()) {
1379   default:
1380     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1381   case CCValAssign::Full:
1382     break;
1383   case CCValAssign::BCvt:
1384     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1385       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
1386       break;
1387     }
1388     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
1389     break;
1390   }
1391   return Val;
1392 }
1393 
1394 // The caller is responsible for loading the full value if the argument is
1395 // passed with CCValAssign::Indirect.
1396 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
1397                                 const CCValAssign &VA, const SDLoc &DL) {
1398   MachineFunction &MF = DAG.getMachineFunction();
1399   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1400   EVT LocVT = VA.getLocVT();
1401   SDValue Val;
1402   const TargetRegisterClass *RC;
1403 
1404   switch (LocVT.getSimpleVT().SimpleTy) {
1405   default:
1406     llvm_unreachable("Unexpected register type");
1407   case MVT::i32:
1408   case MVT::i64:
1409     RC = &RISCV::GPRRegClass;
1410     break;
1411   case MVT::f32:
1412     RC = &RISCV::FPR32RegClass;
1413     break;
1414   case MVT::f64:
1415     RC = &RISCV::FPR64RegClass;
1416     break;
1417   }
1418 
1419   unsigned VReg = RegInfo.createVirtualRegister(RC);
1420   RegInfo.addLiveIn(VA.getLocReg(), VReg);
1421   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1422 
1423   if (VA.getLocInfo() == CCValAssign::Indirect)
1424     return Val;
1425 
1426   return convertLocVTToValVT(DAG, Val, VA, DL);
1427 }
1428 
1429 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
1430                                    const CCValAssign &VA, const SDLoc &DL) {
1431   EVT LocVT = VA.getLocVT();
1432 
1433   switch (VA.getLocInfo()) {
1434   default:
1435     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1436   case CCValAssign::Full:
1437     break;
1438   case CCValAssign::BCvt:
1439     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1440       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
1441       break;
1442     }
1443     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
1444     break;
1445   }
1446   return Val;
1447 }
1448 
1449 // The caller is responsible for loading the full value if the argument is
1450 // passed with CCValAssign::Indirect.
1451 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
1452                                 const CCValAssign &VA, const SDLoc &DL) {
1453   MachineFunction &MF = DAG.getMachineFunction();
1454   MachineFrameInfo &MFI = MF.getFrameInfo();
1455   EVT LocVT = VA.getLocVT();
1456   EVT ValVT = VA.getValVT();
1457   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
1458   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1459                                  VA.getLocMemOffset(), /*Immutable=*/true);
1460   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1461   SDValue Val;
1462 
1463   ISD::LoadExtType ExtType;
1464   switch (VA.getLocInfo()) {
1465   default:
1466     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1467   case CCValAssign::Full:
1468   case CCValAssign::Indirect:
1469   case CCValAssign::BCvt:
1470     ExtType = ISD::NON_EXTLOAD;
1471     break;
1472   }
1473   Val = DAG.getExtLoad(
1474       ExtType, DL, LocVT, Chain, FIN,
1475       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
1476   return Val;
1477 }
1478 
1479 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
1480                                        const CCValAssign &VA, const SDLoc &DL) {
1481   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
1482          "Unexpected VA");
1483   MachineFunction &MF = DAG.getMachineFunction();
1484   MachineFrameInfo &MFI = MF.getFrameInfo();
1485   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1486 
1487   if (VA.isMemLoc()) {
1488     // f64 is passed on the stack.
1489     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
1490     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1491     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
1492                        MachinePointerInfo::getFixedStack(MF, FI));
1493   }
1494 
1495   assert(VA.isRegLoc() && "Expected register VA assignment");
1496 
1497   unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1498   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
1499   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
1500   SDValue Hi;
1501   if (VA.getLocReg() == RISCV::X17) {
1502     // Second half of f64 is passed on the stack.
1503     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
1504     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1505     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
1506                      MachinePointerInfo::getFixedStack(MF, FI));
1507   } else {
1508     // Second half of f64 is passed in another GPR.
1509     unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1510     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
1511     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
1512   }
1513   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1514 }
1515 
1516 // Transform physical registers into virtual registers.
1517 SDValue RISCVTargetLowering::LowerFormalArguments(
1518     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1519     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1520     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1521 
1522   switch (CallConv) {
1523   default:
1524     report_fatal_error("Unsupported calling convention");
1525   case CallingConv::C:
1526   case CallingConv::Fast:
1527     break;
1528   }
1529 
1530   MachineFunction &MF = DAG.getMachineFunction();
1531 
1532   const Function &Func = MF.getFunction();
1533   if (Func.hasFnAttribute("interrupt")) {
1534     if (!Func.arg_empty())
1535       report_fatal_error(
1536         "Functions with the interrupt attribute cannot have arguments!");
1537 
1538     StringRef Kind =
1539       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1540 
1541     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
1542       report_fatal_error(
1543         "Function interrupt attribute argument not supported!");
1544   }
1545 
1546   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1547   MVT XLenVT = Subtarget.getXLenVT();
1548   unsigned XLenInBytes = Subtarget.getXLen() / 8;
1549   // Used with vargs to acumulate store chains.
1550   std::vector<SDValue> OutChains;
1551 
1552   // Assign locations to all of the incoming arguments.
1553   SmallVector<CCValAssign, 16> ArgLocs;
1554   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1555   analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
1556 
1557   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1558     CCValAssign &VA = ArgLocs[i];
1559     SDValue ArgValue;
1560     // Passing f64 on RV32D with a soft float ABI must be handled as a special
1561     // case.
1562     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
1563       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
1564     else if (VA.isRegLoc())
1565       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
1566     else
1567       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
1568 
1569     if (VA.getLocInfo() == CCValAssign::Indirect) {
1570       // If the original argument was split and passed by reference (e.g. i128
1571       // on RV32), we need to load all parts of it here (using the same
1572       // address).
1573       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1574                                    MachinePointerInfo()));
1575       unsigned ArgIndex = Ins[i].OrigArgIndex;
1576       assert(Ins[i].PartOffset == 0);
1577       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
1578         CCValAssign &PartVA = ArgLocs[i + 1];
1579         unsigned PartOffset = Ins[i + 1].PartOffset;
1580         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1581                                       DAG.getIntPtrConstant(PartOffset, DL));
1582         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1583                                      MachinePointerInfo()));
1584         ++i;
1585       }
1586       continue;
1587     }
1588     InVals.push_back(ArgValue);
1589   }
1590 
1591   if (IsVarArg) {
1592     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
1593     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
1594     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1595     MachineFrameInfo &MFI = MF.getFrameInfo();
1596     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1597     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1598 
1599     // Offset of the first variable argument from stack pointer, and size of
1600     // the vararg save area. For now, the varargs save area is either zero or
1601     // large enough to hold a0-a7.
1602     int VaArgOffset, VarArgsSaveSize;
1603 
1604     // If all registers are allocated, then all varargs must be passed on the
1605     // stack and we don't need to save any argregs.
1606     if (ArgRegs.size() == Idx) {
1607       VaArgOffset = CCInfo.getNextStackOffset();
1608       VarArgsSaveSize = 0;
1609     } else {
1610       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
1611       VaArgOffset = -VarArgsSaveSize;
1612     }
1613 
1614     // Record the frame index of the first variable argument
1615     // which is a value necessary to VASTART.
1616     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1617     RVFI->setVarArgsFrameIndex(FI);
1618 
1619     // If saving an odd number of registers then create an extra stack slot to
1620     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
1621     // offsets to even-numbered registered remain 2*XLEN-aligned.
1622     if (Idx % 2) {
1623       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes,
1624                                  true);
1625       VarArgsSaveSize += XLenInBytes;
1626     }
1627 
1628     // Copy the integer registers that may have been used for passing varargs
1629     // to the vararg save area.
1630     for (unsigned I = Idx; I < ArgRegs.size();
1631          ++I, VaArgOffset += XLenInBytes) {
1632       const unsigned Reg = RegInfo.createVirtualRegister(RC);
1633       RegInfo.addLiveIn(ArgRegs[I], Reg);
1634       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
1635       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1636       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1637       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
1638                                    MachinePointerInfo::getFixedStack(MF, FI));
1639       cast<StoreSDNode>(Store.getNode())
1640           ->getMemOperand()
1641           ->setValue((Value *)nullptr);
1642       OutChains.push_back(Store);
1643     }
1644     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
1645   }
1646 
1647   // All stores are grouped in one node to allow the matching between
1648   // the size of Ins and InVals. This only happens for vararg functions.
1649   if (!OutChains.empty()) {
1650     OutChains.push_back(Chain);
1651     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1652   }
1653 
1654   return Chain;
1655 }
1656 
1657 /// isEligibleForTailCallOptimization - Check whether the call is eligible
1658 /// for tail call optimization.
1659 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
1660 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
1661     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
1662     const SmallVector<CCValAssign, 16> &ArgLocs) const {
1663 
1664   auto &Callee = CLI.Callee;
1665   auto CalleeCC = CLI.CallConv;
1666   auto IsVarArg = CLI.IsVarArg;
1667   auto &Outs = CLI.Outs;
1668   auto &Caller = MF.getFunction();
1669   auto CallerCC = Caller.getCallingConv();
1670 
1671   // Do not tail call opt functions with "disable-tail-calls" attribute.
1672   if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
1673     return false;
1674 
1675   // Exception-handling functions need a special set of instructions to
1676   // indicate a return to the hardware. Tail-calling another function would
1677   // probably break this.
1678   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
1679   // should be expanded as new function attributes are introduced.
1680   if (Caller.hasFnAttribute("interrupt"))
1681     return false;
1682 
1683   // Do not tail call opt functions with varargs.
1684   if (IsVarArg)
1685     return false;
1686 
1687   // Do not tail call opt if the stack is used to pass parameters.
1688   if (CCInfo.getNextStackOffset() != 0)
1689     return false;
1690 
1691   // Do not tail call opt if any parameters need to be passed indirectly.
1692   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
1693   // passed indirectly. So the address of the value will be passed in a
1694   // register, or if not available, then the address is put on the stack. In
1695   // order to pass indirectly, space on the stack often needs to be allocated
1696   // in order to store the value. In this case the CCInfo.getNextStackOffset()
1697   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
1698   // are passed CCValAssign::Indirect.
1699   for (auto &VA : ArgLocs)
1700     if (VA.getLocInfo() == CCValAssign::Indirect)
1701       return false;
1702 
1703   // Do not tail call opt if either caller or callee uses struct return
1704   // semantics.
1705   auto IsCallerStructRet = Caller.hasStructRetAttr();
1706   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
1707   if (IsCallerStructRet || IsCalleeStructRet)
1708     return false;
1709 
1710   // Externally-defined functions with weak linkage should not be
1711   // tail-called. The behaviour of branch instructions in this situation (as
1712   // used for tail calls) is implementation-defined, so we cannot rely on the
1713   // linker replacing the tail call with a return.
1714   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1715     const GlobalValue *GV = G->getGlobal();
1716     if (GV->hasExternalWeakLinkage())
1717       return false;
1718   }
1719 
1720   // The callee has to preserve all registers the caller needs to preserve.
1721   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
1722   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1723   if (CalleeCC != CallerCC) {
1724     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1725     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1726       return false;
1727   }
1728 
1729   // Byval parameters hand the function a pointer directly into the stack area
1730   // we want to reuse during a tail call. Working around this *is* possible
1731   // but less efficient and uglier in LowerCall.
1732   for (auto &Arg : Outs)
1733     if (Arg.Flags.isByVal())
1734       return false;
1735 
1736   return true;
1737 }
1738 
1739 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
1740 // and output parameter nodes.
1741 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1742                                        SmallVectorImpl<SDValue> &InVals) const {
1743   SelectionDAG &DAG = CLI.DAG;
1744   SDLoc &DL = CLI.DL;
1745   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1746   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1747   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1748   SDValue Chain = CLI.Chain;
1749   SDValue Callee = CLI.Callee;
1750   bool &IsTailCall = CLI.IsTailCall;
1751   CallingConv::ID CallConv = CLI.CallConv;
1752   bool IsVarArg = CLI.IsVarArg;
1753   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1754   MVT XLenVT = Subtarget.getXLenVT();
1755 
1756   MachineFunction &MF = DAG.getMachineFunction();
1757 
1758   // Analyze the operands of the call, assigning locations to each operand.
1759   SmallVector<CCValAssign, 16> ArgLocs;
1760   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1761   analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
1762 
1763   // Check if it's really possible to do a tail call.
1764   if (IsTailCall)
1765     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
1766 
1767   if (IsTailCall)
1768     ++NumTailCalls;
1769   else if (CLI.CS && CLI.CS.isMustTailCall())
1770     report_fatal_error("failed to perform tail call elimination on a call "
1771                        "site marked musttail");
1772 
1773   // Get a count of how many bytes are to be pushed on the stack.
1774   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1775 
1776   // Create local copies for byval args
1777   SmallVector<SDValue, 8> ByValArgs;
1778   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1779     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1780     if (!Flags.isByVal())
1781       continue;
1782 
1783     SDValue Arg = OutVals[i];
1784     unsigned Size = Flags.getByValSize();
1785     unsigned Align = Flags.getByValAlign();
1786 
1787     int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false);
1788     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1789     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
1790 
1791     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
1792                           /*IsVolatile=*/false,
1793                           /*AlwaysInline=*/false,
1794                           IsTailCall, MachinePointerInfo(),
1795                           MachinePointerInfo());
1796     ByValArgs.push_back(FIPtr);
1797   }
1798 
1799   if (!IsTailCall)
1800     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
1801 
1802   // Copy argument values to their designated locations.
1803   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
1804   SmallVector<SDValue, 8> MemOpChains;
1805   SDValue StackPtr;
1806   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
1807     CCValAssign &VA = ArgLocs[i];
1808     SDValue ArgValue = OutVals[i];
1809     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1810 
1811     // Handle passing f64 on RV32D with a soft float ABI as a special case.
1812     bool IsF64OnRV32DSoftABI =
1813         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
1814     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
1815       SDValue SplitF64 = DAG.getNode(
1816           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
1817       SDValue Lo = SplitF64.getValue(0);
1818       SDValue Hi = SplitF64.getValue(1);
1819 
1820       unsigned RegLo = VA.getLocReg();
1821       RegsToPass.push_back(std::make_pair(RegLo, Lo));
1822 
1823       if (RegLo == RISCV::X17) {
1824         // Second half of f64 is passed on the stack.
1825         // Work out the address of the stack slot.
1826         if (!StackPtr.getNode())
1827           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1828         // Emit the store.
1829         MemOpChains.push_back(
1830             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
1831       } else {
1832         // Second half of f64 is passed in another GPR.
1833         unsigned RegHigh = RegLo + 1;
1834         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
1835       }
1836       continue;
1837     }
1838 
1839     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
1840     // as any other MemLoc.
1841 
1842     // Promote the value if needed.
1843     // For now, only handle fully promoted and indirect arguments.
1844     if (VA.getLocInfo() == CCValAssign::Indirect) {
1845       // Store the argument in a stack slot and pass its address.
1846       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
1847       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1848       MemOpChains.push_back(
1849           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1850                        MachinePointerInfo::getFixedStack(MF, FI)));
1851       // If the original argument was split (e.g. i128), we need
1852       // to store all parts of it here (and pass just one address).
1853       unsigned ArgIndex = Outs[i].OrigArgIndex;
1854       assert(Outs[i].PartOffset == 0);
1855       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
1856         SDValue PartValue = OutVals[i + 1];
1857         unsigned PartOffset = Outs[i + 1].PartOffset;
1858         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1859                                       DAG.getIntPtrConstant(PartOffset, DL));
1860         MemOpChains.push_back(
1861             DAG.getStore(Chain, DL, PartValue, Address,
1862                          MachinePointerInfo::getFixedStack(MF, FI)));
1863         ++i;
1864       }
1865       ArgValue = SpillSlot;
1866     } else {
1867       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
1868     }
1869 
1870     // Use local copy if it is a byval arg.
1871     if (Flags.isByVal())
1872       ArgValue = ByValArgs[j++];
1873 
1874     if (VA.isRegLoc()) {
1875       // Queue up the argument copies and emit them at the end.
1876       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1877     } else {
1878       assert(VA.isMemLoc() && "Argument not register or memory");
1879       assert(!IsTailCall && "Tail call not allowed if stack is used "
1880                             "for passing parameters");
1881 
1882       // Work out the address of the stack slot.
1883       if (!StackPtr.getNode())
1884         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1885       SDValue Address =
1886           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1887                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
1888 
1889       // Emit the store.
1890       MemOpChains.push_back(
1891           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1892     }
1893   }
1894 
1895   // Join the stores, which are independent of one another.
1896   if (!MemOpChains.empty())
1897     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1898 
1899   SDValue Glue;
1900 
1901   // Build a sequence of copy-to-reg nodes, chained and glued together.
1902   for (auto &Reg : RegsToPass) {
1903     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
1904     Glue = Chain.getValue(1);
1905   }
1906 
1907   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
1908   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
1909   // split it and then direct call can be matched by PseudoCALL.
1910   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
1911     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0,
1912                                         RISCVII::MO_CALL);
1913   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1914     Callee =
1915         DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
1916   }
1917 
1918   // The first call operand is the chain and the second is the target address.
1919   SmallVector<SDValue, 8> Ops;
1920   Ops.push_back(Chain);
1921   Ops.push_back(Callee);
1922 
1923   // Add argument registers to the end of the list so that they are
1924   // known live into the call.
1925   for (auto &Reg : RegsToPass)
1926     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1927 
1928   if (!IsTailCall) {
1929     // Add a register mask operand representing the call-preserved registers.
1930     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1931     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1932     assert(Mask && "Missing call preserved mask for calling convention");
1933     Ops.push_back(DAG.getRegisterMask(Mask));
1934   }
1935 
1936   // Glue the call to the argument copies, if any.
1937   if (Glue.getNode())
1938     Ops.push_back(Glue);
1939 
1940   // Emit the call.
1941   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1942 
1943   if (IsTailCall) {
1944     MF.getFrameInfo().setHasTailCall();
1945     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
1946   }
1947 
1948   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
1949   Glue = Chain.getValue(1);
1950 
1951   // Mark the end of the call, which is glued to the call itself.
1952   Chain = DAG.getCALLSEQ_END(Chain,
1953                              DAG.getConstant(NumBytes, DL, PtrVT, true),
1954                              DAG.getConstant(0, DL, PtrVT, true),
1955                              Glue, DL);
1956   Glue = Chain.getValue(1);
1957 
1958   // Assign locations to each value returned by this call.
1959   SmallVector<CCValAssign, 16> RVLocs;
1960   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
1961   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
1962 
1963   // Copy all of the result registers out of their specified physreg.
1964   for (auto &VA : RVLocs) {
1965     // Copy the value out
1966     SDValue RetValue =
1967         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
1968     // Glue the RetValue to the end of the call sequence
1969     Chain = RetValue.getValue(1);
1970     Glue = RetValue.getValue(2);
1971 
1972     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1973       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
1974       SDValue RetValue2 =
1975           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
1976       Chain = RetValue2.getValue(1);
1977       Glue = RetValue2.getValue(2);
1978       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
1979                              RetValue2);
1980     }
1981 
1982     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
1983 
1984     InVals.push_back(RetValue);
1985   }
1986 
1987   return Chain;
1988 }
1989 
1990 bool RISCVTargetLowering::CanLowerReturn(
1991     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
1992     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
1993   SmallVector<CCValAssign, 16> RVLocs;
1994   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
1995   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1996     MVT VT = Outs[i].VT;
1997     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1998     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
1999     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
2000                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
2001       return false;
2002   }
2003   return true;
2004 }
2005 
2006 SDValue
2007 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2008                                  bool IsVarArg,
2009                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
2010                                  const SmallVectorImpl<SDValue> &OutVals,
2011                                  const SDLoc &DL, SelectionDAG &DAG) const {
2012   // Stores the assignment of the return value to a location.
2013   SmallVector<CCValAssign, 16> RVLocs;
2014 
2015   // Info about the registers and stack slot.
2016   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
2017                  *DAG.getContext());
2018 
2019   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
2020                     nullptr);
2021 
2022   SDValue Glue;
2023   SmallVector<SDValue, 4> RetOps(1, Chain);
2024 
2025   // Copy the result values into the output registers.
2026   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
2027     SDValue Val = OutVals[i];
2028     CCValAssign &VA = RVLocs[i];
2029     assert(VA.isRegLoc() && "Can only return in registers!");
2030 
2031     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
2032       // Handle returning f64 on RV32D with a soft float ABI.
2033       assert(VA.isRegLoc() && "Expected return via registers");
2034       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
2035                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
2036       SDValue Lo = SplitF64.getValue(0);
2037       SDValue Hi = SplitF64.getValue(1);
2038       unsigned RegLo = VA.getLocReg();
2039       unsigned RegHi = RegLo + 1;
2040       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
2041       Glue = Chain.getValue(1);
2042       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
2043       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
2044       Glue = Chain.getValue(1);
2045       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
2046     } else {
2047       // Handle a 'normal' return.
2048       Val = convertValVTToLocVT(DAG, Val, VA, DL);
2049       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
2050 
2051       // Guarantee that all emitted copies are stuck together.
2052       Glue = Chain.getValue(1);
2053       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2054     }
2055   }
2056 
2057   RetOps[0] = Chain; // Update chain.
2058 
2059   // Add the glue node if we have it.
2060   if (Glue.getNode()) {
2061     RetOps.push_back(Glue);
2062   }
2063 
2064   // Interrupt service routines use different return instructions.
2065   const Function &Func = DAG.getMachineFunction().getFunction();
2066   if (Func.hasFnAttribute("interrupt")) {
2067     if (!Func.getReturnType()->isVoidTy())
2068       report_fatal_error(
2069           "Functions with the interrupt attribute must have void return type!");
2070 
2071     MachineFunction &MF = DAG.getMachineFunction();
2072     StringRef Kind =
2073       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
2074 
2075     unsigned RetOpc;
2076     if (Kind == "user")
2077       RetOpc = RISCVISD::URET_FLAG;
2078     else if (Kind == "supervisor")
2079       RetOpc = RISCVISD::SRET_FLAG;
2080     else
2081       RetOpc = RISCVISD::MRET_FLAG;
2082 
2083     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
2084   }
2085 
2086   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
2087 }
2088 
2089 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
2090   switch ((RISCVISD::NodeType)Opcode) {
2091   case RISCVISD::FIRST_NUMBER:
2092     break;
2093   case RISCVISD::RET_FLAG:
2094     return "RISCVISD::RET_FLAG";
2095   case RISCVISD::URET_FLAG:
2096     return "RISCVISD::URET_FLAG";
2097   case RISCVISD::SRET_FLAG:
2098     return "RISCVISD::SRET_FLAG";
2099   case RISCVISD::MRET_FLAG:
2100     return "RISCVISD::MRET_FLAG";
2101   case RISCVISD::CALL:
2102     return "RISCVISD::CALL";
2103   case RISCVISD::SELECT_CC:
2104     return "RISCVISD::SELECT_CC";
2105   case RISCVISD::BuildPairF64:
2106     return "RISCVISD::BuildPairF64";
2107   case RISCVISD::SplitF64:
2108     return "RISCVISD::SplitF64";
2109   case RISCVISD::TAIL:
2110     return "RISCVISD::TAIL";
2111   case RISCVISD::SLLW:
2112     return "RISCVISD::SLLW";
2113   case RISCVISD::SRAW:
2114     return "RISCVISD::SRAW";
2115   case RISCVISD::SRLW:
2116     return "RISCVISD::SRLW";
2117   case RISCVISD::DIVW:
2118     return "RISCVISD::DIVW";
2119   case RISCVISD::DIVUW:
2120     return "RISCVISD::DIVUW";
2121   case RISCVISD::REMUW:
2122     return "RISCVISD::REMUW";
2123   case RISCVISD::FMV_W_X_RV64:
2124     return "RISCVISD::FMV_W_X_RV64";
2125   case RISCVISD::FMV_X_ANYEXTW_RV64:
2126     return "RISCVISD::FMV_X_ANYEXTW_RV64";
2127   }
2128   return nullptr;
2129 }
2130 
2131 std::pair<unsigned, const TargetRegisterClass *>
2132 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2133                                                   StringRef Constraint,
2134                                                   MVT VT) const {
2135   // First, see if this is a constraint that directly corresponds to a
2136   // RISCV register class.
2137   if (Constraint.size() == 1) {
2138     switch (Constraint[0]) {
2139     case 'r':
2140       return std::make_pair(0U, &RISCV::GPRRegClass);
2141     default:
2142       break;
2143     }
2144   }
2145 
2146   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2147 }
2148 
2149 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
2150                                                    Instruction *Inst,
2151                                                    AtomicOrdering Ord) const {
2152   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
2153     return Builder.CreateFence(Ord);
2154   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
2155     return Builder.CreateFence(AtomicOrdering::Release);
2156   return nullptr;
2157 }
2158 
2159 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
2160                                                     Instruction *Inst,
2161                                                     AtomicOrdering Ord) const {
2162   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
2163     return Builder.CreateFence(AtomicOrdering::Acquire);
2164   return nullptr;
2165 }
2166 
2167 TargetLowering::AtomicExpansionKind
2168 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2169   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
2170   // point operations can't be used in an lr/sc sequence without breaking the
2171   // forward-progress guarantee.
2172   if (AI->isFloatingPointOperation())
2173     return AtomicExpansionKind::CmpXChg;
2174 
2175   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
2176   if (Size == 8 || Size == 16)
2177     return AtomicExpansionKind::MaskedIntrinsic;
2178   return AtomicExpansionKind::None;
2179 }
2180 
2181 static Intrinsic::ID
2182 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
2183   if (XLen == 32) {
2184     switch (BinOp) {
2185     default:
2186       llvm_unreachable("Unexpected AtomicRMW BinOp");
2187     case AtomicRMWInst::Xchg:
2188       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
2189     case AtomicRMWInst::Add:
2190       return Intrinsic::riscv_masked_atomicrmw_add_i32;
2191     case AtomicRMWInst::Sub:
2192       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
2193     case AtomicRMWInst::Nand:
2194       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
2195     case AtomicRMWInst::Max:
2196       return Intrinsic::riscv_masked_atomicrmw_max_i32;
2197     case AtomicRMWInst::Min:
2198       return Intrinsic::riscv_masked_atomicrmw_min_i32;
2199     case AtomicRMWInst::UMax:
2200       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
2201     case AtomicRMWInst::UMin:
2202       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
2203     }
2204   }
2205 
2206   if (XLen == 64) {
2207     switch (BinOp) {
2208     default:
2209       llvm_unreachable("Unexpected AtomicRMW BinOp");
2210     case AtomicRMWInst::Xchg:
2211       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
2212     case AtomicRMWInst::Add:
2213       return Intrinsic::riscv_masked_atomicrmw_add_i64;
2214     case AtomicRMWInst::Sub:
2215       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
2216     case AtomicRMWInst::Nand:
2217       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
2218     case AtomicRMWInst::Max:
2219       return Intrinsic::riscv_masked_atomicrmw_max_i64;
2220     case AtomicRMWInst::Min:
2221       return Intrinsic::riscv_masked_atomicrmw_min_i64;
2222     case AtomicRMWInst::UMax:
2223       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
2224     case AtomicRMWInst::UMin:
2225       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
2226     }
2227   }
2228 
2229   llvm_unreachable("Unexpected XLen\n");
2230 }
2231 
2232 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
2233     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
2234     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
2235   unsigned XLen = Subtarget.getXLen();
2236   Value *Ordering =
2237       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
2238   Type *Tys[] = {AlignedAddr->getType()};
2239   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
2240       AI->getModule(),
2241       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
2242 
2243   if (XLen == 64) {
2244     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
2245     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2246     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
2247   }
2248 
2249   Value *Result;
2250 
2251   // Must pass the shift amount needed to sign extend the loaded value prior
2252   // to performing a signed comparison for min/max. ShiftAmt is the number of
2253   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
2254   // is the number of bits to left+right shift the value in order to
2255   // sign-extend.
2256   if (AI->getOperation() == AtomicRMWInst::Min ||
2257       AI->getOperation() == AtomicRMWInst::Max) {
2258     const DataLayout &DL = AI->getModule()->getDataLayout();
2259     unsigned ValWidth =
2260         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
2261     Value *SextShamt =
2262         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
2263     Result = Builder.CreateCall(LrwOpScwLoop,
2264                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
2265   } else {
2266     Result =
2267         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
2268   }
2269 
2270   if (XLen == 64)
2271     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2272   return Result;
2273 }
2274 
2275 TargetLowering::AtomicExpansionKind
2276 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
2277     AtomicCmpXchgInst *CI) const {
2278   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
2279   if (Size == 8 || Size == 16)
2280     return AtomicExpansionKind::MaskedIntrinsic;
2281   return AtomicExpansionKind::None;
2282 }
2283 
2284 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2285     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2286     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2287   unsigned XLen = Subtarget.getXLen();
2288   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
2289   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
2290   if (XLen == 64) {
2291     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
2292     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
2293     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2294     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
2295   }
2296   Type *Tys[] = {AlignedAddr->getType()};
2297   Function *MaskedCmpXchg =
2298       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
2299   Value *Result = Builder.CreateCall(
2300       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
2301   if (XLen == 64)
2302     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2303   return Result;
2304 }
2305