1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "llvm/ADT/SmallSet.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAGISel.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "riscv-lower"
39 
40 STATISTIC(NumTailCalls, "Number of tail calls");
41 
42 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
43                                          const RISCVSubtarget &STI)
44     : TargetLowering(TM), Subtarget(STI) {
45 
46   if (Subtarget.isRV32E())
47     report_fatal_error("Codegen not yet implemented for RV32E");
48 
49   RISCVABI::ABI ABI = Subtarget.getTargetABI();
50   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
51 
52   switch (ABI) {
53   default:
54     report_fatal_error("Don't know how to lower this ABI");
55   case RISCVABI::ABI_ILP32:
56   case RISCVABI::ABI_ILP32F:
57   case RISCVABI::ABI_ILP32D:
58   case RISCVABI::ABI_LP64:
59   case RISCVABI::ABI_LP64F:
60   case RISCVABI::ABI_LP64D:
61     break;
62   }
63 
64   MVT XLenVT = Subtarget.getXLenVT();
65 
66   // Set up the register classes.
67   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
68 
69   if (Subtarget.hasStdExtF())
70     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
71   if (Subtarget.hasStdExtD())
72     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
73 
74   // Compute derived properties from the register classes.
75   computeRegisterProperties(STI.getRegisterInfo());
76 
77   setStackPointerRegisterToSaveRestore(RISCV::X2);
78 
79   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
80     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
81 
82   // TODO: add all necessary setOperationAction calls.
83   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
84 
85   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
86   setOperationAction(ISD::BR_CC, XLenVT, Expand);
87   setOperationAction(ISD::SELECT, XLenVT, Custom);
88   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
89 
90   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
91   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
92 
93   setOperationAction(ISD::VASTART, MVT::Other, Custom);
94   setOperationAction(ISD::VAARG, MVT::Other, Expand);
95   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
96   setOperationAction(ISD::VAEND, MVT::Other, Expand);
97 
98   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
99     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
100 
101   if (Subtarget.is64Bit()) {
102     setOperationAction(ISD::SHL, MVT::i32, Custom);
103     setOperationAction(ISD::SRA, MVT::i32, Custom);
104     setOperationAction(ISD::SRL, MVT::i32, Custom);
105   }
106 
107   if (!Subtarget.hasStdExtM()) {
108     setOperationAction(ISD::MUL, XLenVT, Expand);
109     setOperationAction(ISD::MULHS, XLenVT, Expand);
110     setOperationAction(ISD::MULHU, XLenVT, Expand);
111     setOperationAction(ISD::SDIV, XLenVT, Expand);
112     setOperationAction(ISD::UDIV, XLenVT, Expand);
113     setOperationAction(ISD::SREM, XLenVT, Expand);
114     setOperationAction(ISD::UREM, XLenVT, Expand);
115   }
116 
117   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
118     setOperationAction(ISD::SDIV, MVT::i32, Custom);
119     setOperationAction(ISD::UDIV, MVT::i32, Custom);
120     setOperationAction(ISD::UREM, MVT::i32, Custom);
121   }
122 
123   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
124   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
125   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
126   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
127 
128   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
129   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
130   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
131 
132   setOperationAction(ISD::ROTL, XLenVT, Expand);
133   setOperationAction(ISD::ROTR, XLenVT, Expand);
134   setOperationAction(ISD::BSWAP, XLenVT, Expand);
135   setOperationAction(ISD::CTTZ, XLenVT, Expand);
136   setOperationAction(ISD::CTLZ, XLenVT, Expand);
137   setOperationAction(ISD::CTPOP, XLenVT, Expand);
138 
139   ISD::CondCode FPCCToExtend[] = {
140       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
141       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
142       ISD::SETGE,  ISD::SETNE};
143 
144   ISD::NodeType FPOpToExtend[] = {
145       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
146 
147   if (Subtarget.hasStdExtF()) {
148     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
149     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
150     for (auto CC : FPCCToExtend)
151       setCondCodeAction(CC, MVT::f32, Expand);
152     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
153     setOperationAction(ISD::SELECT, MVT::f32, Custom);
154     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
155     for (auto Op : FPOpToExtend)
156       setOperationAction(Op, MVT::f32, Expand);
157   }
158 
159   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
160     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
161 
162   if (Subtarget.hasStdExtD()) {
163     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
164     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
165     for (auto CC : FPCCToExtend)
166       setCondCodeAction(CC, MVT::f64, Expand);
167     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
168     setOperationAction(ISD::SELECT, MVT::f64, Custom);
169     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
170     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
171     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
172     for (auto Op : FPOpToExtend)
173       setOperationAction(Op, MVT::f64, Expand);
174   }
175 
176   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
177   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
178   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
179 
180   if (Subtarget.hasStdExtA()) {
181     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
182     setMinCmpXchgSizeInBits(32);
183   } else {
184     setMaxAtomicSizeInBitsSupported(0);
185   }
186 
187   setBooleanContents(ZeroOrOneBooleanContent);
188 
189   // Function alignments (log2).
190   unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2;
191   setMinFunctionAlignment(FunctionAlignment);
192   setPrefFunctionAlignment(FunctionAlignment);
193 
194   // Effectively disable jump table generation.
195   setMinimumJumpTableEntries(INT_MAX);
196 }
197 
198 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
199                                             EVT VT) const {
200   if (!VT.isVector())
201     return getPointerTy(DL);
202   return VT.changeVectorElementTypeToInteger();
203 }
204 
205 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
206                                              const CallInst &I,
207                                              MachineFunction &MF,
208                                              unsigned Intrinsic) const {
209   switch (Intrinsic) {
210   default:
211     return false;
212   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
213   case Intrinsic::riscv_masked_atomicrmw_add_i32:
214   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
215   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
216   case Intrinsic::riscv_masked_atomicrmw_max_i32:
217   case Intrinsic::riscv_masked_atomicrmw_min_i32:
218   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
219   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
220   case Intrinsic::riscv_masked_cmpxchg_i32:
221     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
222     Info.opc = ISD::INTRINSIC_W_CHAIN;
223     Info.memVT = MVT::getVT(PtrTy->getElementType());
224     Info.ptrVal = I.getArgOperand(0);
225     Info.offset = 0;
226     Info.align = 4;
227     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
228                  MachineMemOperand::MOVolatile;
229     return true;
230   }
231 }
232 
233 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
234                                                 const AddrMode &AM, Type *Ty,
235                                                 unsigned AS,
236                                                 Instruction *I) const {
237   // No global is ever allowed as a base.
238   if (AM.BaseGV)
239     return false;
240 
241   // Require a 12-bit signed offset.
242   if (!isInt<12>(AM.BaseOffs))
243     return false;
244 
245   switch (AM.Scale) {
246   case 0: // "r+i" or just "i", depending on HasBaseReg.
247     break;
248   case 1:
249     if (!AM.HasBaseReg) // allow "r+i".
250       break;
251     return false; // disallow "r+r" or "r+r+i".
252   default:
253     return false;
254   }
255 
256   return true;
257 }
258 
259 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
260   return isInt<12>(Imm);
261 }
262 
263 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
264   return isInt<12>(Imm);
265 }
266 
267 // On RV32, 64-bit integers are split into their high and low parts and held
268 // in two different registers, so the trunc is free since the low register can
269 // just be used.
270 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
271   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
272     return false;
273   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
274   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
275   return (SrcBits == 64 && DestBits == 32);
276 }
277 
278 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
279   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
280       !SrcVT.isInteger() || !DstVT.isInteger())
281     return false;
282   unsigned SrcBits = SrcVT.getSizeInBits();
283   unsigned DestBits = DstVT.getSizeInBits();
284   return (SrcBits == 64 && DestBits == 32);
285 }
286 
287 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
288   // Zexts are free if they can be combined with a load.
289   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
290     EVT MemVT = LD->getMemoryVT();
291     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
292          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
293         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
294          LD->getExtensionType() == ISD::ZEXTLOAD))
295       return true;
296   }
297 
298   return TargetLowering::isZExtFree(Val, VT2);
299 }
300 
301 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
302   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
303 }
304 
305 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
306   return (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
307          (VT == MVT::f64 && Subtarget.hasStdExtD());
308 }
309 
310 // Changes the condition code and swaps operands if necessary, so the SetCC
311 // operation matches one of the comparisons supported directly in the RISC-V
312 // ISA.
313 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
314   switch (CC) {
315   default:
316     break;
317   case ISD::SETGT:
318   case ISD::SETLE:
319   case ISD::SETUGT:
320   case ISD::SETULE:
321     CC = ISD::getSetCCSwappedOperands(CC);
322     std::swap(LHS, RHS);
323     break;
324   }
325 }
326 
327 // Return the RISC-V branch opcode that matches the given DAG integer
328 // condition code. The CondCode must be one of those supported by the RISC-V
329 // ISA (see normaliseSetCC).
330 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
331   switch (CC) {
332   default:
333     llvm_unreachable("Unsupported CondCode");
334   case ISD::SETEQ:
335     return RISCV::BEQ;
336   case ISD::SETNE:
337     return RISCV::BNE;
338   case ISD::SETLT:
339     return RISCV::BLT;
340   case ISD::SETGE:
341     return RISCV::BGE;
342   case ISD::SETULT:
343     return RISCV::BLTU;
344   case ISD::SETUGE:
345     return RISCV::BGEU;
346   }
347 }
348 
349 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
350                                             SelectionDAG &DAG) const {
351   switch (Op.getOpcode()) {
352   default:
353     report_fatal_error("unimplemented operand");
354   case ISD::GlobalAddress:
355     return lowerGlobalAddress(Op, DAG);
356   case ISD::BlockAddress:
357     return lowerBlockAddress(Op, DAG);
358   case ISD::ConstantPool:
359     return lowerConstantPool(Op, DAG);
360   case ISD::SELECT:
361     return lowerSELECT(Op, DAG);
362   case ISD::VASTART:
363     return lowerVASTART(Op, DAG);
364   case ISD::FRAMEADDR:
365     return lowerFRAMEADDR(Op, DAG);
366   case ISD::RETURNADDR:
367     return lowerRETURNADDR(Op, DAG);
368   case ISD::SHL_PARTS:
369     return lowerShiftLeftParts(Op, DAG);
370   case ISD::SRA_PARTS:
371     return lowerShiftRightParts(Op, DAG, true);
372   case ISD::SRL_PARTS:
373     return lowerShiftRightParts(Op, DAG, false);
374   case ISD::BITCAST: {
375     assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
376            "Unexpected custom legalisation");
377     SDLoc DL(Op);
378     SDValue Op0 = Op.getOperand(0);
379     if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
380       return SDValue();
381     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
382     SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
383     return FPConv;
384   }
385   }
386 }
387 
388 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
389                              SelectionDAG &DAG, unsigned Flags) {
390   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
391 }
392 
393 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
394                              SelectionDAG &DAG, unsigned Flags) {
395   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
396                                    Flags);
397 }
398 
399 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
400                              SelectionDAG &DAG, unsigned Flags) {
401   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
402                                    N->getOffset(), Flags);
403 }
404 
405 template <class NodeTy>
406 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
407                                      bool IsLocal) const {
408   SDLoc DL(N);
409   EVT Ty = getPointerTy(DAG.getDataLayout());
410 
411   if (isPositionIndependent()) {
412     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
413     if (IsLocal)
414       // Use PC-relative addressing to access the symbol. This generates the
415       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
416       // %pcrel_lo(auipc)).
417       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
418 
419     // Use PC-relative addressing to access the GOT for this symbol, then load
420     // the address from the GOT. This generates the pattern (PseudoLA sym),
421     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
422     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
423   }
424 
425   switch (getTargetMachine().getCodeModel()) {
426   default:
427     report_fatal_error("Unsupported code model for lowering");
428   case CodeModel::Small: {
429     // Generate a sequence for accessing addresses within the first 2 GiB of
430     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
431     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
432     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
433     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
434     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
435   }
436   case CodeModel::Medium: {
437     // Generate a sequence for accessing addresses within any 2GiB range within
438     // the address space. This generates the pattern (PseudoLLA sym), which
439     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
440     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
441     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
442   }
443   }
444 }
445 
446 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
447                                                 SelectionDAG &DAG) const {
448   SDLoc DL(Op);
449   EVT Ty = Op.getValueType();
450   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
451   int64_t Offset = N->getOffset();
452   MVT XLenVT = Subtarget.getXLenVT();
453 
454   const GlobalValue *GV = N->getGlobal();
455   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
456   SDValue Addr = getAddr(N, DAG, IsLocal);
457 
458   // In order to maximise the opportunity for common subexpression elimination,
459   // emit a separate ADD node for the global address offset instead of folding
460   // it in the global address node. Later peephole optimisations may choose to
461   // fold it back in when profitable.
462   if (Offset != 0)
463     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
464                        DAG.getConstant(Offset, DL, XLenVT));
465   return Addr;
466 }
467 
468 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
469                                                SelectionDAG &DAG) const {
470   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
471 
472   return getAddr(N, DAG);
473 }
474 
475 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
476                                                SelectionDAG &DAG) const {
477   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
478 
479   return getAddr(N, DAG);
480 }
481 
482 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
483   SDValue CondV = Op.getOperand(0);
484   SDValue TrueV = Op.getOperand(1);
485   SDValue FalseV = Op.getOperand(2);
486   SDLoc DL(Op);
487   MVT XLenVT = Subtarget.getXLenVT();
488 
489   // If the result type is XLenVT and CondV is the output of a SETCC node
490   // which also operated on XLenVT inputs, then merge the SETCC node into the
491   // lowered RISCVISD::SELECT_CC to take advantage of the integer
492   // compare+branch instructions. i.e.:
493   // (select (setcc lhs, rhs, cc), truev, falsev)
494   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
495   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
496       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
497     SDValue LHS = CondV.getOperand(0);
498     SDValue RHS = CondV.getOperand(1);
499     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
500     ISD::CondCode CCVal = CC->get();
501 
502     normaliseSetCC(LHS, RHS, CCVal);
503 
504     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
505     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
506     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
507     return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
508   }
509 
510   // Otherwise:
511   // (select condv, truev, falsev)
512   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
513   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
514   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
515 
516   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
517   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
518 
519   return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
520 }
521 
522 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
523   MachineFunction &MF = DAG.getMachineFunction();
524   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
525 
526   SDLoc DL(Op);
527   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
528                                  getPointerTy(MF.getDataLayout()));
529 
530   // vastart just stores the address of the VarArgsFrameIndex slot into the
531   // memory location argument.
532   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
533   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
534                       MachinePointerInfo(SV));
535 }
536 
537 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
538                                             SelectionDAG &DAG) const {
539   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
540   MachineFunction &MF = DAG.getMachineFunction();
541   MachineFrameInfo &MFI = MF.getFrameInfo();
542   MFI.setFrameAddressIsTaken(true);
543   unsigned FrameReg = RI.getFrameRegister(MF);
544   int XLenInBytes = Subtarget.getXLen() / 8;
545 
546   EVT VT = Op.getValueType();
547   SDLoc DL(Op);
548   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
549   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
550   while (Depth--) {
551     int Offset = -(XLenInBytes * 2);
552     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
553                               DAG.getIntPtrConstant(Offset, DL));
554     FrameAddr =
555         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
556   }
557   return FrameAddr;
558 }
559 
560 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
561                                              SelectionDAG &DAG) const {
562   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
563   MachineFunction &MF = DAG.getMachineFunction();
564   MachineFrameInfo &MFI = MF.getFrameInfo();
565   MFI.setReturnAddressIsTaken(true);
566   MVT XLenVT = Subtarget.getXLenVT();
567   int XLenInBytes = Subtarget.getXLen() / 8;
568 
569   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
570     return SDValue();
571 
572   EVT VT = Op.getValueType();
573   SDLoc DL(Op);
574   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
575   if (Depth) {
576     int Off = -XLenInBytes;
577     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
578     SDValue Offset = DAG.getConstant(Off, DL, VT);
579     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
580                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
581                        MachinePointerInfo());
582   }
583 
584   // Return the value of the return address register, marking it an implicit
585   // live-in.
586   unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
587   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
588 }
589 
590 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
591                                                  SelectionDAG &DAG) const {
592   SDLoc DL(Op);
593   SDValue Lo = Op.getOperand(0);
594   SDValue Hi = Op.getOperand(1);
595   SDValue Shamt = Op.getOperand(2);
596   EVT VT = Lo.getValueType();
597 
598   // if Shamt-XLEN < 0: // Shamt < XLEN
599   //   Lo = Lo << Shamt
600   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
601   // else:
602   //   Lo = 0
603   //   Hi = Lo << (Shamt-XLEN)
604 
605   SDValue Zero = DAG.getConstant(0, DL, VT);
606   SDValue One = DAG.getConstant(1, DL, VT);
607   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
608   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
609   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
610   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
611 
612   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
613   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
614   SDValue ShiftRightLo =
615       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
616   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
617   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
618   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
619 
620   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
621 
622   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
623   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
624 
625   SDValue Parts[2] = {Lo, Hi};
626   return DAG.getMergeValues(Parts, DL);
627 }
628 
629 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
630                                                   bool IsSRA) const {
631   SDLoc DL(Op);
632   SDValue Lo = Op.getOperand(0);
633   SDValue Hi = Op.getOperand(1);
634   SDValue Shamt = Op.getOperand(2);
635   EVT VT = Lo.getValueType();
636 
637   // SRA expansion:
638   //   if Shamt-XLEN < 0: // Shamt < XLEN
639   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
640   //     Hi = Hi >>s Shamt
641   //   else:
642   //     Lo = Hi >>s (Shamt-XLEN);
643   //     Hi = Hi >>s (XLEN-1)
644   //
645   // SRL expansion:
646   //   if Shamt-XLEN < 0: // Shamt < XLEN
647   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
648   //     Hi = Hi >>u Shamt
649   //   else:
650   //     Lo = Hi >>u (Shamt-XLEN);
651   //     Hi = 0;
652 
653   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
654 
655   SDValue Zero = DAG.getConstant(0, DL, VT);
656   SDValue One = DAG.getConstant(1, DL, VT);
657   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
658   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
659   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
660   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
661 
662   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
663   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
664   SDValue ShiftLeftHi =
665       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
666   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
667   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
668   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
669   SDValue HiFalse =
670       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
671 
672   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
673 
674   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
675   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
676 
677   SDValue Parts[2] = {Lo, Hi};
678   return DAG.getMergeValues(Parts, DL);
679 }
680 
681 // Returns the opcode of the target-specific SDNode that implements the 32-bit
682 // form of the given Opcode.
683 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
684   switch (Opcode) {
685   default:
686     llvm_unreachable("Unexpected opcode");
687   case ISD::SHL:
688     return RISCVISD::SLLW;
689   case ISD::SRA:
690     return RISCVISD::SRAW;
691   case ISD::SRL:
692     return RISCVISD::SRLW;
693   case ISD::SDIV:
694     return RISCVISD::DIVW;
695   case ISD::UDIV:
696     return RISCVISD::DIVUW;
697   case ISD::UREM:
698     return RISCVISD::REMUW;
699   }
700 }
701 
702 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
703 // Because i32 isn't a legal type for RV64, these operations would otherwise
704 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
705 // later one because the fact the operation was originally of type i32 is
706 // lost.
707 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
708   SDLoc DL(N);
709   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
710   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
711   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
712   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
713   // ReplaceNodeResults requires we maintain the same type for the return value.
714   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
715 }
716 
717 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
718                                              SmallVectorImpl<SDValue> &Results,
719                                              SelectionDAG &DAG) const {
720   SDLoc DL(N);
721   switch (N->getOpcode()) {
722   default:
723     llvm_unreachable("Don't know how to custom type legalize this operation!");
724   case ISD::SHL:
725   case ISD::SRA:
726   case ISD::SRL:
727     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
728            "Unexpected custom legalisation");
729     if (N->getOperand(1).getOpcode() == ISD::Constant)
730       return;
731     Results.push_back(customLegalizeToWOp(N, DAG));
732     break;
733   case ISD::SDIV:
734   case ISD::UDIV:
735   case ISD::UREM:
736     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
737            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
738     if (N->getOperand(0).getOpcode() == ISD::Constant ||
739         N->getOperand(1).getOpcode() == ISD::Constant)
740       return;
741     Results.push_back(customLegalizeToWOp(N, DAG));
742     break;
743   case ISD::BITCAST: {
744     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
745            Subtarget.hasStdExtF() && "Unexpected custom legalisation");
746     SDLoc DL(N);
747     SDValue Op0 = N->getOperand(0);
748     if (Op0.getValueType() != MVT::f32)
749       return;
750     SDValue FPConv =
751         DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
752     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
753     break;
754   }
755   }
756 }
757 
758 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
759                                                DAGCombinerInfo &DCI) const {
760   SelectionDAG &DAG = DCI.DAG;
761 
762   switch (N->getOpcode()) {
763   default:
764     break;
765   case RISCVISD::SplitF64: {
766     SDValue Op0 = N->getOperand(0);
767     // If the input to SplitF64 is just BuildPairF64 then the operation is
768     // redundant. Instead, use BuildPairF64's operands directly.
769     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
770       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
771 
772     SDLoc DL(N);
773 
774     // It's cheaper to materialise two 32-bit integers than to load a double
775     // from the constant pool and transfer it to integer registers through the
776     // stack.
777     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
778       APInt V = C->getValueAPF().bitcastToAPInt();
779       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
780       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
781       return DCI.CombineTo(N, Lo, Hi);
782     }
783 
784     // This is a target-specific version of a DAGCombine performed in
785     // DAGCombiner::visitBITCAST. It performs the equivalent of:
786     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
787     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
788     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
789         !Op0.getNode()->hasOneUse())
790       break;
791     SDValue NewSplitF64 =
792         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
793                     Op0.getOperand(0));
794     SDValue Lo = NewSplitF64.getValue(0);
795     SDValue Hi = NewSplitF64.getValue(1);
796     APInt SignBit = APInt::getSignMask(32);
797     if (Op0.getOpcode() == ISD::FNEG) {
798       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
799                                   DAG.getConstant(SignBit, DL, MVT::i32));
800       return DCI.CombineTo(N, Lo, NewHi);
801     }
802     assert(Op0.getOpcode() == ISD::FABS);
803     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
804                                 DAG.getConstant(~SignBit, DL, MVT::i32));
805     return DCI.CombineTo(N, Lo, NewHi);
806   }
807   case RISCVISD::SLLW:
808   case RISCVISD::SRAW:
809   case RISCVISD::SRLW: {
810     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
811     SDValue LHS = N->getOperand(0);
812     SDValue RHS = N->getOperand(1);
813     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
814     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
815     if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
816         (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
817       return SDValue();
818     break;
819   }
820   case RISCVISD::FMV_X_ANYEXTW_RV64: {
821     SDLoc DL(N);
822     SDValue Op0 = N->getOperand(0);
823     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
824     // conversion is unnecessary and can be replaced with an ANY_EXTEND
825     // of the FMV_W_X_RV64 operand.
826     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
827       SDValue AExtOp =
828           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
829       return DCI.CombineTo(N, AExtOp);
830     }
831 
832     // This is a target-specific version of a DAGCombine performed in
833     // DAGCombiner::visitBITCAST. It performs the equivalent of:
834     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
835     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
836     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
837         !Op0.getNode()->hasOneUse())
838       break;
839     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
840                                  Op0.getOperand(0));
841     APInt SignBit = APInt::getSignMask(32).sext(64);
842     if (Op0.getOpcode() == ISD::FNEG) {
843       return DCI.CombineTo(N,
844                            DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
845                                        DAG.getConstant(SignBit, DL, MVT::i64)));
846     }
847     assert(Op0.getOpcode() == ISD::FABS);
848     return DCI.CombineTo(N,
849                          DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
850                                      DAG.getConstant(~SignBit, DL, MVT::i64)));
851   }
852   }
853 
854   return SDValue();
855 }
856 
857 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
858     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
859     unsigned Depth) const {
860   switch (Op.getOpcode()) {
861   default:
862     break;
863   case RISCVISD::SLLW:
864   case RISCVISD::SRAW:
865   case RISCVISD::SRLW:
866   case RISCVISD::DIVW:
867   case RISCVISD::DIVUW:
868   case RISCVISD::REMUW:
869     // TODO: As the result is sign-extended, this is conservatively correct. A
870     // more precise answer could be calculated for SRAW depending on known
871     // bits in the shift amount.
872     return 33;
873   }
874 
875   return 1;
876 }
877 
878 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
879                                              MachineBasicBlock *BB) {
880   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
881 
882   MachineFunction &MF = *BB->getParent();
883   DebugLoc DL = MI.getDebugLoc();
884   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
885   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
886   unsigned LoReg = MI.getOperand(0).getReg();
887   unsigned HiReg = MI.getOperand(1).getReg();
888   unsigned SrcReg = MI.getOperand(2).getReg();
889   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
890   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
891 
892   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
893                           RI);
894   MachineMemOperand *MMO =
895       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
896                               MachineMemOperand::MOLoad, 8, 8);
897   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
898       .addFrameIndex(FI)
899       .addImm(0)
900       .addMemOperand(MMO);
901   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
902       .addFrameIndex(FI)
903       .addImm(4)
904       .addMemOperand(MMO);
905   MI.eraseFromParent(); // The pseudo instruction is gone now.
906   return BB;
907 }
908 
909 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
910                                                  MachineBasicBlock *BB) {
911   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
912          "Unexpected instruction");
913 
914   MachineFunction &MF = *BB->getParent();
915   DebugLoc DL = MI.getDebugLoc();
916   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
917   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
918   unsigned DstReg = MI.getOperand(0).getReg();
919   unsigned LoReg = MI.getOperand(1).getReg();
920   unsigned HiReg = MI.getOperand(2).getReg();
921   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
922   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
923 
924   MachineMemOperand *MMO =
925       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
926                               MachineMemOperand::MOStore, 8, 8);
927   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
928       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
929       .addFrameIndex(FI)
930       .addImm(0)
931       .addMemOperand(MMO);
932   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
933       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
934       .addFrameIndex(FI)
935       .addImm(4)
936       .addMemOperand(MMO);
937   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
938   MI.eraseFromParent(); // The pseudo instruction is gone now.
939   return BB;
940 }
941 
942 static bool isSelectPseudo(MachineInstr &MI) {
943   switch (MI.getOpcode()) {
944   default:
945     return false;
946   case RISCV::Select_GPR_Using_CC_GPR:
947   case RISCV::Select_FPR32_Using_CC_GPR:
948   case RISCV::Select_FPR64_Using_CC_GPR:
949     return true;
950   }
951 }
952 
953 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
954                                            MachineBasicBlock *BB) {
955   // To "insert" Select_* instructions, we actually have to insert the triangle
956   // control-flow pattern.  The incoming instructions know the destination vreg
957   // to set, the condition code register to branch on, the true/false values to
958   // select between, and the condcode to use to select the appropriate branch.
959   //
960   // We produce the following control flow:
961   //     HeadMBB
962   //     |  \
963   //     |  IfFalseMBB
964   //     | /
965   //    TailMBB
966   //
967   // When we find a sequence of selects we attempt to optimize their emission
968   // by sharing the control flow. Currently we only handle cases where we have
969   // multiple selects with the exact same condition (same LHS, RHS and CC).
970   // The selects may be interleaved with other instructions if the other
971   // instructions meet some requirements we deem safe:
972   // - They are debug instructions. Otherwise,
973   // - They do not have side-effects, do not access memory and their inputs do
974   //   not depend on the results of the select pseudo-instructions.
975   // The TrueV/FalseV operands of the selects cannot depend on the result of
976   // previous selects in the sequence.
977   // These conditions could be further relaxed. See the X86 target for a
978   // related approach and more information.
979   unsigned LHS = MI.getOperand(1).getReg();
980   unsigned RHS = MI.getOperand(2).getReg();
981   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
982 
983   SmallVector<MachineInstr *, 4> SelectDebugValues;
984   SmallSet<unsigned, 4> SelectDests;
985   SelectDests.insert(MI.getOperand(0).getReg());
986 
987   MachineInstr *LastSelectPseudo = &MI;
988 
989   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
990        SequenceMBBI != E; ++SequenceMBBI) {
991     if (SequenceMBBI->isDebugInstr())
992       continue;
993     else if (isSelectPseudo(*SequenceMBBI)) {
994       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
995           SequenceMBBI->getOperand(2).getReg() != RHS ||
996           SequenceMBBI->getOperand(3).getImm() != CC ||
997           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
998           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
999         break;
1000       LastSelectPseudo = &*SequenceMBBI;
1001       SequenceMBBI->collectDebugValues(SelectDebugValues);
1002       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
1003     } else {
1004       if (SequenceMBBI->hasUnmodeledSideEffects() ||
1005           SequenceMBBI->mayLoadOrStore())
1006         break;
1007       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
1008             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
1009           }))
1010         break;
1011     }
1012   }
1013 
1014   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1015   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1016   DebugLoc DL = MI.getDebugLoc();
1017   MachineFunction::iterator I = ++BB->getIterator();
1018 
1019   MachineBasicBlock *HeadMBB = BB;
1020   MachineFunction *F = BB->getParent();
1021   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
1022   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
1023 
1024   F->insert(I, IfFalseMBB);
1025   F->insert(I, TailMBB);
1026 
1027   // Transfer debug instructions associated with the selects to TailMBB.
1028   for (MachineInstr *DebugInstr : SelectDebugValues) {
1029     TailMBB->push_back(DebugInstr->removeFromParent());
1030   }
1031 
1032   // Move all instructions after the sequence to TailMBB.
1033   TailMBB->splice(TailMBB->end(), HeadMBB,
1034                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
1035   // Update machine-CFG edges by transferring all successors of the current
1036   // block to the new block which will contain the Phi nodes for the selects.
1037   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
1038   // Set the successors for HeadMBB.
1039   HeadMBB->addSuccessor(IfFalseMBB);
1040   HeadMBB->addSuccessor(TailMBB);
1041 
1042   // Insert appropriate branch.
1043   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
1044 
1045   BuildMI(HeadMBB, DL, TII.get(Opcode))
1046     .addReg(LHS)
1047     .addReg(RHS)
1048     .addMBB(TailMBB);
1049 
1050   // IfFalseMBB just falls through to TailMBB.
1051   IfFalseMBB->addSuccessor(TailMBB);
1052 
1053   // Create PHIs for all of the select pseudo-instructions.
1054   auto SelectMBBI = MI.getIterator();
1055   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
1056   auto InsertionPoint = TailMBB->begin();
1057   while (SelectMBBI != SelectEnd) {
1058     auto Next = std::next(SelectMBBI);
1059     if (isSelectPseudo(*SelectMBBI)) {
1060       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
1061       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
1062               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
1063           .addReg(SelectMBBI->getOperand(4).getReg())
1064           .addMBB(HeadMBB)
1065           .addReg(SelectMBBI->getOperand(5).getReg())
1066           .addMBB(IfFalseMBB);
1067       SelectMBBI->eraseFromParent();
1068     }
1069     SelectMBBI = Next;
1070   }
1071 
1072   return TailMBB;
1073 }
1074 
1075 MachineBasicBlock *
1076 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1077                                                  MachineBasicBlock *BB) const {
1078   switch (MI.getOpcode()) {
1079   default:
1080     llvm_unreachable("Unexpected instr type to insert");
1081   case RISCV::Select_GPR_Using_CC_GPR:
1082   case RISCV::Select_FPR32_Using_CC_GPR:
1083   case RISCV::Select_FPR64_Using_CC_GPR:
1084     return emitSelectPseudo(MI, BB);
1085   case RISCV::BuildPairF64Pseudo:
1086     return emitBuildPairF64Pseudo(MI, BB);
1087   case RISCV::SplitF64Pseudo:
1088     return emitSplitF64Pseudo(MI, BB);
1089   }
1090 }
1091 
1092 // Calling Convention Implementation.
1093 // The expectations for frontend ABI lowering vary from target to target.
1094 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
1095 // details, but this is a longer term goal. For now, we simply try to keep the
1096 // role of the frontend as simple and well-defined as possible. The rules can
1097 // be summarised as:
1098 // * Never split up large scalar arguments. We handle them here.
1099 // * If a hardfloat calling convention is being used, and the struct may be
1100 // passed in a pair of registers (fp+fp, int+fp), and both registers are
1101 // available, then pass as two separate arguments. If either the GPRs or FPRs
1102 // are exhausted, then pass according to the rule below.
1103 // * If a struct could never be passed in registers or directly in a stack
1104 // slot (as it is larger than 2*XLEN and the floating point rules don't
1105 // apply), then pass it using a pointer with the byval attribute.
1106 // * If a struct is less than 2*XLEN, then coerce to either a two-element
1107 // word-sized array or a 2*XLEN scalar (depending on alignment).
1108 // * The frontend can determine whether a struct is returned by reference or
1109 // not based on its size and fields. If it will be returned by reference, the
1110 // frontend must modify the prototype so a pointer with the sret annotation is
1111 // passed as the first argument. This is not necessary for large scalar
1112 // returns.
1113 // * Struct return values and varargs should be coerced to structs containing
1114 // register-size fields in the same situations they would be for fixed
1115 // arguments.
1116 
1117 static const MCPhysReg ArgGPRs[] = {
1118   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
1119   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
1120 };
1121 static const MCPhysReg ArgFPR32s[] = {
1122   RISCV::F10_32, RISCV::F11_32, RISCV::F12_32, RISCV::F13_32,
1123   RISCV::F14_32, RISCV::F15_32, RISCV::F16_32, RISCV::F17_32
1124 };
1125 static const MCPhysReg ArgFPR64s[] = {
1126   RISCV::F10_64, RISCV::F11_64, RISCV::F12_64, RISCV::F13_64,
1127   RISCV::F14_64, RISCV::F15_64, RISCV::F16_64, RISCV::F17_64
1128 };
1129 
1130 // Pass a 2*XLEN argument that has been split into two XLEN values through
1131 // registers or the stack as necessary.
1132 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
1133                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
1134                                 MVT ValVT2, MVT LocVT2,
1135                                 ISD::ArgFlagsTy ArgFlags2) {
1136   unsigned XLenInBytes = XLen / 8;
1137   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
1138     // At least one half can be passed via register.
1139     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
1140                                      VA1.getLocVT(), CCValAssign::Full));
1141   } else {
1142     // Both halves must be passed on the stack, with proper alignment.
1143     unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
1144     State.addLoc(
1145         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
1146                             State.AllocateStack(XLenInBytes, StackAlign),
1147                             VA1.getLocVT(), CCValAssign::Full));
1148     State.addLoc(CCValAssign::getMem(
1149         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
1150         CCValAssign::Full));
1151     return false;
1152   }
1153 
1154   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
1155     // The second half can also be passed via register.
1156     State.addLoc(
1157         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
1158   } else {
1159     // The second half is passed via the stack, without additional alignment.
1160     State.addLoc(CCValAssign::getMem(
1161         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
1162         CCValAssign::Full));
1163   }
1164 
1165   return false;
1166 }
1167 
1168 // Implements the RISC-V calling convention. Returns true upon failure.
1169 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1170                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1171                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1172                      bool IsRet, Type *OrigTy) {
1173   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
1174   assert(XLen == 32 || XLen == 64);
1175   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
1176 
1177   // Any return value split in to more than two values can't be returned
1178   // directly.
1179   if (IsRet && ValNo > 1)
1180     return true;
1181 
1182   // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a
1183   // variadic argument, or if no F32 argument registers are available.
1184   bool UseGPRForF32 = true;
1185   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
1186   // variadic argument, or if no F64 argument registers are available.
1187   bool UseGPRForF64 = true;
1188 
1189   switch (ABI) {
1190   default:
1191     llvm_unreachable("Unexpected ABI");
1192   case RISCVABI::ABI_ILP32:
1193   case RISCVABI::ABI_LP64:
1194     break;
1195   case RISCVABI::ABI_ILP32F:
1196   case RISCVABI::ABI_LP64F:
1197     UseGPRForF32 = !IsFixed;
1198     break;
1199   case RISCVABI::ABI_ILP32D:
1200   case RISCVABI::ABI_LP64D:
1201     UseGPRForF32 = !IsFixed;
1202     UseGPRForF64 = !IsFixed;
1203     break;
1204   }
1205 
1206   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s))
1207     UseGPRForF32 = true;
1208   if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s))
1209     UseGPRForF64 = true;
1210 
1211   // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local
1212   // variables rather than directly checking against the target ABI.
1213 
1214   if (UseGPRForF32 && ValVT == MVT::f32) {
1215     LocVT = XLenVT;
1216     LocInfo = CCValAssign::BCvt;
1217   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
1218     LocVT = MVT::i64;
1219     LocInfo = CCValAssign::BCvt;
1220   }
1221 
1222   // If this is a variadic argument, the RISC-V calling convention requires
1223   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
1224   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
1225   // be used regardless of whether the original argument was split during
1226   // legalisation or not. The argument will not be passed by registers if the
1227   // original type is larger than 2*XLEN, so the register alignment rule does
1228   // not apply.
1229   unsigned TwoXLenInBytes = (2 * XLen) / 8;
1230   if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
1231       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
1232     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
1233     // Skip 'odd' register if necessary.
1234     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
1235       State.AllocateReg(ArgGPRs);
1236   }
1237 
1238   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
1239   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
1240       State.getPendingArgFlags();
1241 
1242   assert(PendingLocs.size() == PendingArgFlags.size() &&
1243          "PendingLocs and PendingArgFlags out of sync");
1244 
1245   // Handle passing f64 on RV32D with a soft float ABI or when floating point
1246   // registers are exhausted.
1247   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
1248     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
1249            "Can't lower f64 if it is split");
1250     // Depending on available argument GPRS, f64 may be passed in a pair of
1251     // GPRs, split between a GPR and the stack, or passed completely on the
1252     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
1253     // cases.
1254     unsigned Reg = State.AllocateReg(ArgGPRs);
1255     LocVT = MVT::i32;
1256     if (!Reg) {
1257       unsigned StackOffset = State.AllocateStack(8, 8);
1258       State.addLoc(
1259           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1260       return false;
1261     }
1262     if (!State.AllocateReg(ArgGPRs))
1263       State.AllocateStack(4, 4);
1264     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1265     return false;
1266   }
1267 
1268   // Split arguments might be passed indirectly, so keep track of the pending
1269   // values.
1270   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
1271     LocVT = XLenVT;
1272     LocInfo = CCValAssign::Indirect;
1273     PendingLocs.push_back(
1274         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
1275     PendingArgFlags.push_back(ArgFlags);
1276     if (!ArgFlags.isSplitEnd()) {
1277       return false;
1278     }
1279   }
1280 
1281   // If the split argument only had two elements, it should be passed directly
1282   // in registers or on the stack.
1283   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
1284     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
1285     // Apply the normal calling convention rules to the first half of the
1286     // split argument.
1287     CCValAssign VA = PendingLocs[0];
1288     ISD::ArgFlagsTy AF = PendingArgFlags[0];
1289     PendingLocs.clear();
1290     PendingArgFlags.clear();
1291     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
1292                                ArgFlags);
1293   }
1294 
1295   // Allocate to a register if possible, or else a stack slot.
1296   unsigned Reg;
1297   if (ValVT == MVT::f32 && !UseGPRForF32)
1298     Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s);
1299   else if (ValVT == MVT::f64 && !UseGPRForF64)
1300     Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s);
1301   else
1302     Reg = State.AllocateReg(ArgGPRs);
1303   unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
1304 
1305   // If we reach this point and PendingLocs is non-empty, we must be at the
1306   // end of a split argument that must be passed indirectly.
1307   if (!PendingLocs.empty()) {
1308     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
1309     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
1310 
1311     for (auto &It : PendingLocs) {
1312       if (Reg)
1313         It.convertToReg(Reg);
1314       else
1315         It.convertToMem(StackOffset);
1316       State.addLoc(It);
1317     }
1318     PendingLocs.clear();
1319     PendingArgFlags.clear();
1320     return false;
1321   }
1322 
1323   assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) &&
1324          "Expected an XLenVT at this stage");
1325 
1326   if (Reg) {
1327     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1328     return false;
1329   }
1330 
1331   // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
1332   if (ValVT == MVT::f32 || ValVT == MVT::f64) {
1333     LocVT = ValVT;
1334     LocInfo = CCValAssign::Full;
1335   }
1336   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1337   return false;
1338 }
1339 
1340 void RISCVTargetLowering::analyzeInputArgs(
1341     MachineFunction &MF, CCState &CCInfo,
1342     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
1343   unsigned NumArgs = Ins.size();
1344   FunctionType *FType = MF.getFunction().getFunctionType();
1345 
1346   for (unsigned i = 0; i != NumArgs; ++i) {
1347     MVT ArgVT = Ins[i].VT;
1348     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
1349 
1350     Type *ArgTy = nullptr;
1351     if (IsRet)
1352       ArgTy = FType->getReturnType();
1353     else if (Ins[i].isOrigArg())
1354       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
1355 
1356     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
1357     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
1358                  ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
1359       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
1360                         << EVT(ArgVT).getEVTString() << '\n');
1361       llvm_unreachable(nullptr);
1362     }
1363   }
1364 }
1365 
1366 void RISCVTargetLowering::analyzeOutputArgs(
1367     MachineFunction &MF, CCState &CCInfo,
1368     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
1369     CallLoweringInfo *CLI) const {
1370   unsigned NumArgs = Outs.size();
1371 
1372   for (unsigned i = 0; i != NumArgs; i++) {
1373     MVT ArgVT = Outs[i].VT;
1374     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1375     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
1376 
1377     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
1378     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
1379                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
1380       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
1381                         << EVT(ArgVT).getEVTString() << "\n");
1382       llvm_unreachable(nullptr);
1383     }
1384   }
1385 }
1386 
1387 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
1388 // values.
1389 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
1390                                    const CCValAssign &VA, const SDLoc &DL) {
1391   switch (VA.getLocInfo()) {
1392   default:
1393     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1394   case CCValAssign::Full:
1395     break;
1396   case CCValAssign::BCvt:
1397     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1398       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
1399       break;
1400     }
1401     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
1402     break;
1403   }
1404   return Val;
1405 }
1406 
1407 // The caller is responsible for loading the full value if the argument is
1408 // passed with CCValAssign::Indirect.
1409 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
1410                                 const CCValAssign &VA, const SDLoc &DL) {
1411   MachineFunction &MF = DAG.getMachineFunction();
1412   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1413   EVT LocVT = VA.getLocVT();
1414   SDValue Val;
1415   const TargetRegisterClass *RC;
1416 
1417   switch (LocVT.getSimpleVT().SimpleTy) {
1418   default:
1419     llvm_unreachable("Unexpected register type");
1420   case MVT::i32:
1421   case MVT::i64:
1422     RC = &RISCV::GPRRegClass;
1423     break;
1424   case MVT::f32:
1425     RC = &RISCV::FPR32RegClass;
1426     break;
1427   case MVT::f64:
1428     RC = &RISCV::FPR64RegClass;
1429     break;
1430   }
1431 
1432   unsigned VReg = RegInfo.createVirtualRegister(RC);
1433   RegInfo.addLiveIn(VA.getLocReg(), VReg);
1434   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1435 
1436   if (VA.getLocInfo() == CCValAssign::Indirect)
1437     return Val;
1438 
1439   return convertLocVTToValVT(DAG, Val, VA, DL);
1440 }
1441 
1442 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
1443                                    const CCValAssign &VA, const SDLoc &DL) {
1444   EVT LocVT = VA.getLocVT();
1445 
1446   switch (VA.getLocInfo()) {
1447   default:
1448     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1449   case CCValAssign::Full:
1450     break;
1451   case CCValAssign::BCvt:
1452     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1453       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
1454       break;
1455     }
1456     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
1457     break;
1458   }
1459   return Val;
1460 }
1461 
1462 // The caller is responsible for loading the full value if the argument is
1463 // passed with CCValAssign::Indirect.
1464 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
1465                                 const CCValAssign &VA, const SDLoc &DL) {
1466   MachineFunction &MF = DAG.getMachineFunction();
1467   MachineFrameInfo &MFI = MF.getFrameInfo();
1468   EVT LocVT = VA.getLocVT();
1469   EVT ValVT = VA.getValVT();
1470   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
1471   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1472                                  VA.getLocMemOffset(), /*Immutable=*/true);
1473   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1474   SDValue Val;
1475 
1476   ISD::LoadExtType ExtType;
1477   switch (VA.getLocInfo()) {
1478   default:
1479     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1480   case CCValAssign::Full:
1481   case CCValAssign::Indirect:
1482   case CCValAssign::BCvt:
1483     ExtType = ISD::NON_EXTLOAD;
1484     break;
1485   }
1486   Val = DAG.getExtLoad(
1487       ExtType, DL, LocVT, Chain, FIN,
1488       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
1489   return Val;
1490 }
1491 
1492 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
1493                                        const CCValAssign &VA, const SDLoc &DL) {
1494   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
1495          "Unexpected VA");
1496   MachineFunction &MF = DAG.getMachineFunction();
1497   MachineFrameInfo &MFI = MF.getFrameInfo();
1498   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1499 
1500   if (VA.isMemLoc()) {
1501     // f64 is passed on the stack.
1502     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
1503     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1504     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
1505                        MachinePointerInfo::getFixedStack(MF, FI));
1506   }
1507 
1508   assert(VA.isRegLoc() && "Expected register VA assignment");
1509 
1510   unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1511   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
1512   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
1513   SDValue Hi;
1514   if (VA.getLocReg() == RISCV::X17) {
1515     // Second half of f64 is passed on the stack.
1516     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
1517     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1518     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
1519                      MachinePointerInfo::getFixedStack(MF, FI));
1520   } else {
1521     // Second half of f64 is passed in another GPR.
1522     unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1523     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
1524     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
1525   }
1526   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1527 }
1528 
1529 // Transform physical registers into virtual registers.
1530 SDValue RISCVTargetLowering::LowerFormalArguments(
1531     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1532     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1533     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1534 
1535   switch (CallConv) {
1536   default:
1537     report_fatal_error("Unsupported calling convention");
1538   case CallingConv::C:
1539   case CallingConv::Fast:
1540     break;
1541   }
1542 
1543   MachineFunction &MF = DAG.getMachineFunction();
1544 
1545   const Function &Func = MF.getFunction();
1546   if (Func.hasFnAttribute("interrupt")) {
1547     if (!Func.arg_empty())
1548       report_fatal_error(
1549         "Functions with the interrupt attribute cannot have arguments!");
1550 
1551     StringRef Kind =
1552       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1553 
1554     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
1555       report_fatal_error(
1556         "Function interrupt attribute argument not supported!");
1557   }
1558 
1559   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1560   MVT XLenVT = Subtarget.getXLenVT();
1561   unsigned XLenInBytes = Subtarget.getXLen() / 8;
1562   // Used with vargs to acumulate store chains.
1563   std::vector<SDValue> OutChains;
1564 
1565   // Assign locations to all of the incoming arguments.
1566   SmallVector<CCValAssign, 16> ArgLocs;
1567   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1568   analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
1569 
1570   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1571     CCValAssign &VA = ArgLocs[i];
1572     SDValue ArgValue;
1573     // Passing f64 on RV32D with a soft float ABI must be handled as a special
1574     // case.
1575     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
1576       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
1577     else if (VA.isRegLoc())
1578       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
1579     else
1580       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
1581 
1582     if (VA.getLocInfo() == CCValAssign::Indirect) {
1583       // If the original argument was split and passed by reference (e.g. i128
1584       // on RV32), we need to load all parts of it here (using the same
1585       // address).
1586       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1587                                    MachinePointerInfo()));
1588       unsigned ArgIndex = Ins[i].OrigArgIndex;
1589       assert(Ins[i].PartOffset == 0);
1590       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
1591         CCValAssign &PartVA = ArgLocs[i + 1];
1592         unsigned PartOffset = Ins[i + 1].PartOffset;
1593         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1594                                       DAG.getIntPtrConstant(PartOffset, DL));
1595         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1596                                      MachinePointerInfo()));
1597         ++i;
1598       }
1599       continue;
1600     }
1601     InVals.push_back(ArgValue);
1602   }
1603 
1604   if (IsVarArg) {
1605     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
1606     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
1607     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1608     MachineFrameInfo &MFI = MF.getFrameInfo();
1609     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1610     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1611 
1612     // Offset of the first variable argument from stack pointer, and size of
1613     // the vararg save area. For now, the varargs save area is either zero or
1614     // large enough to hold a0-a7.
1615     int VaArgOffset, VarArgsSaveSize;
1616 
1617     // If all registers are allocated, then all varargs must be passed on the
1618     // stack and we don't need to save any argregs.
1619     if (ArgRegs.size() == Idx) {
1620       VaArgOffset = CCInfo.getNextStackOffset();
1621       VarArgsSaveSize = 0;
1622     } else {
1623       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
1624       VaArgOffset = -VarArgsSaveSize;
1625     }
1626 
1627     // Record the frame index of the first variable argument
1628     // which is a value necessary to VASTART.
1629     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1630     RVFI->setVarArgsFrameIndex(FI);
1631 
1632     // If saving an odd number of registers then create an extra stack slot to
1633     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
1634     // offsets to even-numbered registered remain 2*XLEN-aligned.
1635     if (Idx % 2) {
1636       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes,
1637                                  true);
1638       VarArgsSaveSize += XLenInBytes;
1639     }
1640 
1641     // Copy the integer registers that may have been used for passing varargs
1642     // to the vararg save area.
1643     for (unsigned I = Idx; I < ArgRegs.size();
1644          ++I, VaArgOffset += XLenInBytes) {
1645       const unsigned Reg = RegInfo.createVirtualRegister(RC);
1646       RegInfo.addLiveIn(ArgRegs[I], Reg);
1647       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
1648       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1649       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1650       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
1651                                    MachinePointerInfo::getFixedStack(MF, FI));
1652       cast<StoreSDNode>(Store.getNode())
1653           ->getMemOperand()
1654           ->setValue((Value *)nullptr);
1655       OutChains.push_back(Store);
1656     }
1657     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
1658   }
1659 
1660   // All stores are grouped in one node to allow the matching between
1661   // the size of Ins and InVals. This only happens for vararg functions.
1662   if (!OutChains.empty()) {
1663     OutChains.push_back(Chain);
1664     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1665   }
1666 
1667   return Chain;
1668 }
1669 
1670 /// isEligibleForTailCallOptimization - Check whether the call is eligible
1671 /// for tail call optimization.
1672 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
1673 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
1674     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
1675     const SmallVector<CCValAssign, 16> &ArgLocs) const {
1676 
1677   auto &Callee = CLI.Callee;
1678   auto CalleeCC = CLI.CallConv;
1679   auto IsVarArg = CLI.IsVarArg;
1680   auto &Outs = CLI.Outs;
1681   auto &Caller = MF.getFunction();
1682   auto CallerCC = Caller.getCallingConv();
1683 
1684   // Do not tail call opt functions with "disable-tail-calls" attribute.
1685   if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
1686     return false;
1687 
1688   // Exception-handling functions need a special set of instructions to
1689   // indicate a return to the hardware. Tail-calling another function would
1690   // probably break this.
1691   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
1692   // should be expanded as new function attributes are introduced.
1693   if (Caller.hasFnAttribute("interrupt"))
1694     return false;
1695 
1696   // Do not tail call opt functions with varargs.
1697   if (IsVarArg)
1698     return false;
1699 
1700   // Do not tail call opt if the stack is used to pass parameters.
1701   if (CCInfo.getNextStackOffset() != 0)
1702     return false;
1703 
1704   // Do not tail call opt if any parameters need to be passed indirectly.
1705   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
1706   // passed indirectly. So the address of the value will be passed in a
1707   // register, or if not available, then the address is put on the stack. In
1708   // order to pass indirectly, space on the stack often needs to be allocated
1709   // in order to store the value. In this case the CCInfo.getNextStackOffset()
1710   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
1711   // are passed CCValAssign::Indirect.
1712   for (auto &VA : ArgLocs)
1713     if (VA.getLocInfo() == CCValAssign::Indirect)
1714       return false;
1715 
1716   // Do not tail call opt if either caller or callee uses struct return
1717   // semantics.
1718   auto IsCallerStructRet = Caller.hasStructRetAttr();
1719   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
1720   if (IsCallerStructRet || IsCalleeStructRet)
1721     return false;
1722 
1723   // Externally-defined functions with weak linkage should not be
1724   // tail-called. The behaviour of branch instructions in this situation (as
1725   // used for tail calls) is implementation-defined, so we cannot rely on the
1726   // linker replacing the tail call with a return.
1727   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1728     const GlobalValue *GV = G->getGlobal();
1729     if (GV->hasExternalWeakLinkage())
1730       return false;
1731   }
1732 
1733   // The callee has to preserve all registers the caller needs to preserve.
1734   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
1735   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1736   if (CalleeCC != CallerCC) {
1737     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1738     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1739       return false;
1740   }
1741 
1742   // Byval parameters hand the function a pointer directly into the stack area
1743   // we want to reuse during a tail call. Working around this *is* possible
1744   // but less efficient and uglier in LowerCall.
1745   for (auto &Arg : Outs)
1746     if (Arg.Flags.isByVal())
1747       return false;
1748 
1749   return true;
1750 }
1751 
1752 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
1753 // and output parameter nodes.
1754 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1755                                        SmallVectorImpl<SDValue> &InVals) const {
1756   SelectionDAG &DAG = CLI.DAG;
1757   SDLoc &DL = CLI.DL;
1758   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1759   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1760   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1761   SDValue Chain = CLI.Chain;
1762   SDValue Callee = CLI.Callee;
1763   bool &IsTailCall = CLI.IsTailCall;
1764   CallingConv::ID CallConv = CLI.CallConv;
1765   bool IsVarArg = CLI.IsVarArg;
1766   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1767   MVT XLenVT = Subtarget.getXLenVT();
1768 
1769   MachineFunction &MF = DAG.getMachineFunction();
1770 
1771   // Analyze the operands of the call, assigning locations to each operand.
1772   SmallVector<CCValAssign, 16> ArgLocs;
1773   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1774   analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
1775 
1776   // Check if it's really possible to do a tail call.
1777   if (IsTailCall)
1778     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
1779 
1780   if (IsTailCall)
1781     ++NumTailCalls;
1782   else if (CLI.CS && CLI.CS.isMustTailCall())
1783     report_fatal_error("failed to perform tail call elimination on a call "
1784                        "site marked musttail");
1785 
1786   // Get a count of how many bytes are to be pushed on the stack.
1787   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1788 
1789   // Create local copies for byval args
1790   SmallVector<SDValue, 8> ByValArgs;
1791   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1792     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1793     if (!Flags.isByVal())
1794       continue;
1795 
1796     SDValue Arg = OutVals[i];
1797     unsigned Size = Flags.getByValSize();
1798     unsigned Align = Flags.getByValAlign();
1799 
1800     int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false);
1801     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1802     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
1803 
1804     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
1805                           /*IsVolatile=*/false,
1806                           /*AlwaysInline=*/false,
1807                           IsTailCall, MachinePointerInfo(),
1808                           MachinePointerInfo());
1809     ByValArgs.push_back(FIPtr);
1810   }
1811 
1812   if (!IsTailCall)
1813     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
1814 
1815   // Copy argument values to their designated locations.
1816   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
1817   SmallVector<SDValue, 8> MemOpChains;
1818   SDValue StackPtr;
1819   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
1820     CCValAssign &VA = ArgLocs[i];
1821     SDValue ArgValue = OutVals[i];
1822     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1823 
1824     // Handle passing f64 on RV32D with a soft float ABI as a special case.
1825     bool IsF64OnRV32DSoftABI =
1826         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
1827     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
1828       SDValue SplitF64 = DAG.getNode(
1829           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
1830       SDValue Lo = SplitF64.getValue(0);
1831       SDValue Hi = SplitF64.getValue(1);
1832 
1833       unsigned RegLo = VA.getLocReg();
1834       RegsToPass.push_back(std::make_pair(RegLo, Lo));
1835 
1836       if (RegLo == RISCV::X17) {
1837         // Second half of f64 is passed on the stack.
1838         // Work out the address of the stack slot.
1839         if (!StackPtr.getNode())
1840           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1841         // Emit the store.
1842         MemOpChains.push_back(
1843             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
1844       } else {
1845         // Second half of f64 is passed in another GPR.
1846         unsigned RegHigh = RegLo + 1;
1847         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
1848       }
1849       continue;
1850     }
1851 
1852     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
1853     // as any other MemLoc.
1854 
1855     // Promote the value if needed.
1856     // For now, only handle fully promoted and indirect arguments.
1857     if (VA.getLocInfo() == CCValAssign::Indirect) {
1858       // Store the argument in a stack slot and pass its address.
1859       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
1860       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1861       MemOpChains.push_back(
1862           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1863                        MachinePointerInfo::getFixedStack(MF, FI)));
1864       // If the original argument was split (e.g. i128), we need
1865       // to store all parts of it here (and pass just one address).
1866       unsigned ArgIndex = Outs[i].OrigArgIndex;
1867       assert(Outs[i].PartOffset == 0);
1868       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
1869         SDValue PartValue = OutVals[i + 1];
1870         unsigned PartOffset = Outs[i + 1].PartOffset;
1871         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1872                                       DAG.getIntPtrConstant(PartOffset, DL));
1873         MemOpChains.push_back(
1874             DAG.getStore(Chain, DL, PartValue, Address,
1875                          MachinePointerInfo::getFixedStack(MF, FI)));
1876         ++i;
1877       }
1878       ArgValue = SpillSlot;
1879     } else {
1880       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
1881     }
1882 
1883     // Use local copy if it is a byval arg.
1884     if (Flags.isByVal())
1885       ArgValue = ByValArgs[j++];
1886 
1887     if (VA.isRegLoc()) {
1888       // Queue up the argument copies and emit them at the end.
1889       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1890     } else {
1891       assert(VA.isMemLoc() && "Argument not register or memory");
1892       assert(!IsTailCall && "Tail call not allowed if stack is used "
1893                             "for passing parameters");
1894 
1895       // Work out the address of the stack slot.
1896       if (!StackPtr.getNode())
1897         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1898       SDValue Address =
1899           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1900                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
1901 
1902       // Emit the store.
1903       MemOpChains.push_back(
1904           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1905     }
1906   }
1907 
1908   // Join the stores, which are independent of one another.
1909   if (!MemOpChains.empty())
1910     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1911 
1912   SDValue Glue;
1913 
1914   // Build a sequence of copy-to-reg nodes, chained and glued together.
1915   for (auto &Reg : RegsToPass) {
1916     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
1917     Glue = Chain.getValue(1);
1918   }
1919 
1920   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
1921   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
1922   // split it and then direct call can be matched by PseudoCALL.
1923   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
1924     const GlobalValue *GV = S->getGlobal();
1925 
1926     unsigned OpFlags = RISCVII::MO_CALL;
1927     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
1928       OpFlags = RISCVII::MO_PLT;
1929 
1930     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
1931   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1932     unsigned OpFlags = RISCVII::MO_CALL;
1933 
1934     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
1935                                                  nullptr))
1936       OpFlags = RISCVII::MO_PLT;
1937 
1938     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
1939   }
1940 
1941   // The first call operand is the chain and the second is the target address.
1942   SmallVector<SDValue, 8> Ops;
1943   Ops.push_back(Chain);
1944   Ops.push_back(Callee);
1945 
1946   // Add argument registers to the end of the list so that they are
1947   // known live into the call.
1948   for (auto &Reg : RegsToPass)
1949     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1950 
1951   if (!IsTailCall) {
1952     // Add a register mask operand representing the call-preserved registers.
1953     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1954     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1955     assert(Mask && "Missing call preserved mask for calling convention");
1956     Ops.push_back(DAG.getRegisterMask(Mask));
1957   }
1958 
1959   // Glue the call to the argument copies, if any.
1960   if (Glue.getNode())
1961     Ops.push_back(Glue);
1962 
1963   // Emit the call.
1964   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1965 
1966   if (IsTailCall) {
1967     MF.getFrameInfo().setHasTailCall();
1968     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
1969   }
1970 
1971   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
1972   Glue = Chain.getValue(1);
1973 
1974   // Mark the end of the call, which is glued to the call itself.
1975   Chain = DAG.getCALLSEQ_END(Chain,
1976                              DAG.getConstant(NumBytes, DL, PtrVT, true),
1977                              DAG.getConstant(0, DL, PtrVT, true),
1978                              Glue, DL);
1979   Glue = Chain.getValue(1);
1980 
1981   // Assign locations to each value returned by this call.
1982   SmallVector<CCValAssign, 16> RVLocs;
1983   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
1984   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
1985 
1986   // Copy all of the result registers out of their specified physreg.
1987   for (auto &VA : RVLocs) {
1988     // Copy the value out
1989     SDValue RetValue =
1990         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
1991     // Glue the RetValue to the end of the call sequence
1992     Chain = RetValue.getValue(1);
1993     Glue = RetValue.getValue(2);
1994 
1995     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1996       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
1997       SDValue RetValue2 =
1998           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
1999       Chain = RetValue2.getValue(1);
2000       Glue = RetValue2.getValue(2);
2001       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
2002                              RetValue2);
2003     }
2004 
2005     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
2006 
2007     InVals.push_back(RetValue);
2008   }
2009 
2010   return Chain;
2011 }
2012 
2013 bool RISCVTargetLowering::CanLowerReturn(
2014     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2015     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2016   SmallVector<CCValAssign, 16> RVLocs;
2017   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
2018   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2019     MVT VT = Outs[i].VT;
2020     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2021     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2022     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
2023                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
2024       return false;
2025   }
2026   return true;
2027 }
2028 
2029 SDValue
2030 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2031                                  bool IsVarArg,
2032                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
2033                                  const SmallVectorImpl<SDValue> &OutVals,
2034                                  const SDLoc &DL, SelectionDAG &DAG) const {
2035   // Stores the assignment of the return value to a location.
2036   SmallVector<CCValAssign, 16> RVLocs;
2037 
2038   // Info about the registers and stack slot.
2039   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
2040                  *DAG.getContext());
2041 
2042   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
2043                     nullptr);
2044 
2045   SDValue Glue;
2046   SmallVector<SDValue, 4> RetOps(1, Chain);
2047 
2048   // Copy the result values into the output registers.
2049   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
2050     SDValue Val = OutVals[i];
2051     CCValAssign &VA = RVLocs[i];
2052     assert(VA.isRegLoc() && "Can only return in registers!");
2053 
2054     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
2055       // Handle returning f64 on RV32D with a soft float ABI.
2056       assert(VA.isRegLoc() && "Expected return via registers");
2057       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
2058                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
2059       SDValue Lo = SplitF64.getValue(0);
2060       SDValue Hi = SplitF64.getValue(1);
2061       unsigned RegLo = VA.getLocReg();
2062       unsigned RegHi = RegLo + 1;
2063       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
2064       Glue = Chain.getValue(1);
2065       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
2066       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
2067       Glue = Chain.getValue(1);
2068       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
2069     } else {
2070       // Handle a 'normal' return.
2071       Val = convertValVTToLocVT(DAG, Val, VA, DL);
2072       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
2073 
2074       // Guarantee that all emitted copies are stuck together.
2075       Glue = Chain.getValue(1);
2076       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2077     }
2078   }
2079 
2080   RetOps[0] = Chain; // Update chain.
2081 
2082   // Add the glue node if we have it.
2083   if (Glue.getNode()) {
2084     RetOps.push_back(Glue);
2085   }
2086 
2087   // Interrupt service routines use different return instructions.
2088   const Function &Func = DAG.getMachineFunction().getFunction();
2089   if (Func.hasFnAttribute("interrupt")) {
2090     if (!Func.getReturnType()->isVoidTy())
2091       report_fatal_error(
2092           "Functions with the interrupt attribute must have void return type!");
2093 
2094     MachineFunction &MF = DAG.getMachineFunction();
2095     StringRef Kind =
2096       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
2097 
2098     unsigned RetOpc;
2099     if (Kind == "user")
2100       RetOpc = RISCVISD::URET_FLAG;
2101     else if (Kind == "supervisor")
2102       RetOpc = RISCVISD::SRET_FLAG;
2103     else
2104       RetOpc = RISCVISD::MRET_FLAG;
2105 
2106     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
2107   }
2108 
2109   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
2110 }
2111 
2112 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
2113   switch ((RISCVISD::NodeType)Opcode) {
2114   case RISCVISD::FIRST_NUMBER:
2115     break;
2116   case RISCVISD::RET_FLAG:
2117     return "RISCVISD::RET_FLAG";
2118   case RISCVISD::URET_FLAG:
2119     return "RISCVISD::URET_FLAG";
2120   case RISCVISD::SRET_FLAG:
2121     return "RISCVISD::SRET_FLAG";
2122   case RISCVISD::MRET_FLAG:
2123     return "RISCVISD::MRET_FLAG";
2124   case RISCVISD::CALL:
2125     return "RISCVISD::CALL";
2126   case RISCVISD::SELECT_CC:
2127     return "RISCVISD::SELECT_CC";
2128   case RISCVISD::BuildPairF64:
2129     return "RISCVISD::BuildPairF64";
2130   case RISCVISD::SplitF64:
2131     return "RISCVISD::SplitF64";
2132   case RISCVISD::TAIL:
2133     return "RISCVISD::TAIL";
2134   case RISCVISD::SLLW:
2135     return "RISCVISD::SLLW";
2136   case RISCVISD::SRAW:
2137     return "RISCVISD::SRAW";
2138   case RISCVISD::SRLW:
2139     return "RISCVISD::SRLW";
2140   case RISCVISD::DIVW:
2141     return "RISCVISD::DIVW";
2142   case RISCVISD::DIVUW:
2143     return "RISCVISD::DIVUW";
2144   case RISCVISD::REMUW:
2145     return "RISCVISD::REMUW";
2146   case RISCVISD::FMV_W_X_RV64:
2147     return "RISCVISD::FMV_W_X_RV64";
2148   case RISCVISD::FMV_X_ANYEXTW_RV64:
2149     return "RISCVISD::FMV_X_ANYEXTW_RV64";
2150   }
2151   return nullptr;
2152 }
2153 
2154 std::pair<unsigned, const TargetRegisterClass *>
2155 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2156                                                   StringRef Constraint,
2157                                                   MVT VT) const {
2158   // First, see if this is a constraint that directly corresponds to a
2159   // RISCV register class.
2160   if (Constraint.size() == 1) {
2161     switch (Constraint[0]) {
2162     case 'r':
2163       return std::make_pair(0U, &RISCV::GPRRegClass);
2164     default:
2165       break;
2166     }
2167   }
2168 
2169   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2170 }
2171 
2172 void RISCVTargetLowering::LowerAsmOperandForConstraint(
2173     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
2174     SelectionDAG &DAG) const {
2175   // Currently only support length 1 constraints.
2176   if (Constraint.length() == 1) {
2177     switch (Constraint[0]) {
2178     case 'I':
2179       // Validate & create a 12-bit signed immediate operand.
2180       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
2181         uint64_t CVal = C->getSExtValue();
2182         if (isInt<12>(CVal))
2183           Ops.push_back(
2184               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
2185       }
2186       return;
2187     case 'J':
2188       // Validate & create an integer zero operand.
2189       if (auto *C = dyn_cast<ConstantSDNode>(Op))
2190         if (C->getZExtValue() == 0)
2191           Ops.push_back(
2192               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
2193       return;
2194     case 'K':
2195       // Validate & create a 5-bit unsigned immediate operand.
2196       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
2197         uint64_t CVal = C->getZExtValue();
2198         if (isUInt<5>(CVal))
2199           Ops.push_back(
2200               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
2201       }
2202       return;
2203     default:
2204       break;
2205     }
2206   }
2207   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2208 }
2209 
2210 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
2211                                                    Instruction *Inst,
2212                                                    AtomicOrdering Ord) const {
2213   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
2214     return Builder.CreateFence(Ord);
2215   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
2216     return Builder.CreateFence(AtomicOrdering::Release);
2217   return nullptr;
2218 }
2219 
2220 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
2221                                                     Instruction *Inst,
2222                                                     AtomicOrdering Ord) const {
2223   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
2224     return Builder.CreateFence(AtomicOrdering::Acquire);
2225   return nullptr;
2226 }
2227 
2228 TargetLowering::AtomicExpansionKind
2229 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2230   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
2231   // point operations can't be used in an lr/sc sequence without breaking the
2232   // forward-progress guarantee.
2233   if (AI->isFloatingPointOperation())
2234     return AtomicExpansionKind::CmpXChg;
2235 
2236   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
2237   if (Size == 8 || Size == 16)
2238     return AtomicExpansionKind::MaskedIntrinsic;
2239   return AtomicExpansionKind::None;
2240 }
2241 
2242 static Intrinsic::ID
2243 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
2244   if (XLen == 32) {
2245     switch (BinOp) {
2246     default:
2247       llvm_unreachable("Unexpected AtomicRMW BinOp");
2248     case AtomicRMWInst::Xchg:
2249       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
2250     case AtomicRMWInst::Add:
2251       return Intrinsic::riscv_masked_atomicrmw_add_i32;
2252     case AtomicRMWInst::Sub:
2253       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
2254     case AtomicRMWInst::Nand:
2255       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
2256     case AtomicRMWInst::Max:
2257       return Intrinsic::riscv_masked_atomicrmw_max_i32;
2258     case AtomicRMWInst::Min:
2259       return Intrinsic::riscv_masked_atomicrmw_min_i32;
2260     case AtomicRMWInst::UMax:
2261       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
2262     case AtomicRMWInst::UMin:
2263       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
2264     }
2265   }
2266 
2267   if (XLen == 64) {
2268     switch (BinOp) {
2269     default:
2270       llvm_unreachable("Unexpected AtomicRMW BinOp");
2271     case AtomicRMWInst::Xchg:
2272       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
2273     case AtomicRMWInst::Add:
2274       return Intrinsic::riscv_masked_atomicrmw_add_i64;
2275     case AtomicRMWInst::Sub:
2276       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
2277     case AtomicRMWInst::Nand:
2278       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
2279     case AtomicRMWInst::Max:
2280       return Intrinsic::riscv_masked_atomicrmw_max_i64;
2281     case AtomicRMWInst::Min:
2282       return Intrinsic::riscv_masked_atomicrmw_min_i64;
2283     case AtomicRMWInst::UMax:
2284       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
2285     case AtomicRMWInst::UMin:
2286       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
2287     }
2288   }
2289 
2290   llvm_unreachable("Unexpected XLen\n");
2291 }
2292 
2293 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
2294     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
2295     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
2296   unsigned XLen = Subtarget.getXLen();
2297   Value *Ordering =
2298       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
2299   Type *Tys[] = {AlignedAddr->getType()};
2300   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
2301       AI->getModule(),
2302       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
2303 
2304   if (XLen == 64) {
2305     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
2306     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2307     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
2308   }
2309 
2310   Value *Result;
2311 
2312   // Must pass the shift amount needed to sign extend the loaded value prior
2313   // to performing a signed comparison for min/max. ShiftAmt is the number of
2314   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
2315   // is the number of bits to left+right shift the value in order to
2316   // sign-extend.
2317   if (AI->getOperation() == AtomicRMWInst::Min ||
2318       AI->getOperation() == AtomicRMWInst::Max) {
2319     const DataLayout &DL = AI->getModule()->getDataLayout();
2320     unsigned ValWidth =
2321         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
2322     Value *SextShamt =
2323         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
2324     Result = Builder.CreateCall(LrwOpScwLoop,
2325                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
2326   } else {
2327     Result =
2328         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
2329   }
2330 
2331   if (XLen == 64)
2332     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2333   return Result;
2334 }
2335 
2336 TargetLowering::AtomicExpansionKind
2337 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
2338     AtomicCmpXchgInst *CI) const {
2339   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
2340   if (Size == 8 || Size == 16)
2341     return AtomicExpansionKind::MaskedIntrinsic;
2342   return AtomicExpansionKind::None;
2343 }
2344 
2345 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2346     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2347     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2348   unsigned XLen = Subtarget.getXLen();
2349   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
2350   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
2351   if (XLen == 64) {
2352     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
2353     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
2354     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2355     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
2356   }
2357   Type *Tys[] = {AlignedAddr->getType()};
2358   Function *MaskedCmpXchg =
2359       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
2360   Value *Result = Builder.CreateCall(
2361       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
2362   if (XLen == 64)
2363     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2364   return Result;
2365 }
2366