1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "Utils/RISCVMatInt.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "riscv-lower"
41 
42 STATISTIC(NumTailCalls, "Number of tail calls");
43 
44 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
45                                          const RISCVSubtarget &STI)
46     : TargetLowering(TM), Subtarget(STI) {
47 
48   if (Subtarget.isRV32E())
49     report_fatal_error("Codegen not yet implemented for RV32E");
50 
51   RISCVABI::ABI ABI = Subtarget.getTargetABI();
52   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
53 
54   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
55       !Subtarget.hasStdExtF()) {
56     errs() << "Hard-float 'f' ABI can't be used for a target that "
57                 "doesn't support the F instruction set extension (ignoring "
58                           "target-abi)\n";
59     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
60   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
61              !Subtarget.hasStdExtD()) {
62     errs() << "Hard-float 'd' ABI can't be used for a target that "
63               "doesn't support the D instruction set extension (ignoring "
64               "target-abi)\n";
65     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
66   }
67 
68   switch (ABI) {
69   default:
70     report_fatal_error("Don't know how to lower this ABI");
71   case RISCVABI::ABI_ILP32:
72   case RISCVABI::ABI_ILP32F:
73   case RISCVABI::ABI_ILP32D:
74   case RISCVABI::ABI_LP64:
75   case RISCVABI::ABI_LP64F:
76   case RISCVABI::ABI_LP64D:
77     break;
78   }
79 
80   MVT XLenVT = Subtarget.getXLenVT();
81 
82   // Set up the register classes.
83   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
84 
85   if (Subtarget.hasStdExtF())
86     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
87   if (Subtarget.hasStdExtD())
88     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
89 
90   // Compute derived properties from the register classes.
91   computeRegisterProperties(STI.getRegisterInfo());
92 
93   setStackPointerRegisterToSaveRestore(RISCV::X2);
94 
95   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
96     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
97 
98   // TODO: add all necessary setOperationAction calls.
99   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
100 
101   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
102   setOperationAction(ISD::BR_CC, XLenVT, Expand);
103   setOperationAction(ISD::SELECT, XLenVT, Custom);
104   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
105 
106   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
107   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
108 
109   setOperationAction(ISD::VASTART, MVT::Other, Custom);
110   setOperationAction(ISD::VAARG, MVT::Other, Expand);
111   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
112   setOperationAction(ISD::VAEND, MVT::Other, Expand);
113 
114   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
115   if (!Subtarget.hasStdExtZbb()) {
116     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
117     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
118   }
119 
120   if (Subtarget.is64Bit()) {
121     setOperationAction(ISD::ADD, MVT::i32, Custom);
122     setOperationAction(ISD::SUB, MVT::i32, Custom);
123     setOperationAction(ISD::SHL, MVT::i32, Custom);
124     setOperationAction(ISD::SRA, MVT::i32, Custom);
125     setOperationAction(ISD::SRL, MVT::i32, Custom);
126   }
127 
128   if (!Subtarget.hasStdExtM()) {
129     setOperationAction(ISD::MUL, XLenVT, Expand);
130     setOperationAction(ISD::MULHS, XLenVT, Expand);
131     setOperationAction(ISD::MULHU, XLenVT, Expand);
132     setOperationAction(ISD::SDIV, XLenVT, Expand);
133     setOperationAction(ISD::UDIV, XLenVT, Expand);
134     setOperationAction(ISD::SREM, XLenVT, Expand);
135     setOperationAction(ISD::UREM, XLenVT, Expand);
136   }
137 
138   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
139     setOperationAction(ISD::MUL, MVT::i32, Custom);
140     setOperationAction(ISD::SDIV, MVT::i32, Custom);
141     setOperationAction(ISD::UDIV, MVT::i32, Custom);
142     setOperationAction(ISD::UREM, MVT::i32, Custom);
143   }
144 
145   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
146   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
147   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
148   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
149 
150   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
151   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
152   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
153 
154   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
155     if (Subtarget.is64Bit()) {
156       setOperationAction(ISD::ROTL, MVT::i32, Custom);
157       setOperationAction(ISD::ROTR, MVT::i32, Custom);
158     }
159   } else {
160     setOperationAction(ISD::ROTL, XLenVT, Expand);
161     setOperationAction(ISD::ROTR, XLenVT, Expand);
162   }
163 
164   if (Subtarget.hasStdExtZbp()) {
165     setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
166 
167     if (Subtarget.is64Bit()) {
168       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
169       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
170     }
171   } else {
172     setOperationAction(ISD::BSWAP, XLenVT, Expand);
173   }
174 
175   if (!Subtarget.hasStdExtZbb()) {
176     setOperationAction(ISD::CTTZ, XLenVT, Expand);
177     setOperationAction(ISD::CTLZ, XLenVT, Expand);
178     setOperationAction(ISD::CTPOP, XLenVT, Expand);
179   }
180 
181   if (Subtarget.hasStdExtZbt()) {
182     setOperationAction(ISD::FSHL, XLenVT, Legal);
183     setOperationAction(ISD::FSHR, XLenVT, Legal);
184   }
185 
186   ISD::CondCode FPCCToExtend[] = {
187       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
188       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
189       ISD::SETGE,  ISD::SETNE};
190 
191   ISD::NodeType FPOpToExtend[] = {
192       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
193       ISD::FP_TO_FP16};
194 
195   if (Subtarget.hasStdExtF()) {
196     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
197     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
198     for (auto CC : FPCCToExtend)
199       setCondCodeAction(CC, MVT::f32, Expand);
200     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
201     setOperationAction(ISD::SELECT, MVT::f32, Custom);
202     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
203     for (auto Op : FPOpToExtend)
204       setOperationAction(Op, MVT::f32, Expand);
205     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
206     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
207   }
208 
209   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
210     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
211 
212   if (Subtarget.hasStdExtD()) {
213     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
214     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
215     for (auto CC : FPCCToExtend)
216       setCondCodeAction(CC, MVT::f64, Expand);
217     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
218     setOperationAction(ISD::SELECT, MVT::f64, Custom);
219     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
220     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
221     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
222     for (auto Op : FPOpToExtend)
223       setOperationAction(Op, MVT::f64, Expand);
224     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
225     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
226   }
227 
228   if (Subtarget.is64Bit()) {
229     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
230     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
231     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
232     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
233   }
234 
235   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
236   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
237   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
238 
239   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
240 
241   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
242   // Unfortunately this can't be determined just from the ISA naming string.
243   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
244                      Subtarget.is64Bit() ? Legal : Custom);
245 
246   setOperationAction(ISD::TRAP, MVT::Other, Legal);
247   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
248   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
249 
250   if (Subtarget.hasStdExtA()) {
251     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
252     setMinCmpXchgSizeInBits(32);
253   } else {
254     setMaxAtomicSizeInBitsSupported(0);
255   }
256 
257   setBooleanContents(ZeroOrOneBooleanContent);
258 
259   // Function alignments.
260   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
261   setMinFunctionAlignment(FunctionAlignment);
262   setPrefFunctionAlignment(FunctionAlignment);
263 
264   // Effectively disable jump table generation.
265   setMinimumJumpTableEntries(INT_MAX);
266 
267   // Jumps are expensive, compared to logic
268   setJumpIsExpensive();
269 
270   // We can use any register for comparisons
271   setHasMultipleConditionRegisters();
272 
273   if (Subtarget.hasStdExtZbp()) {
274     setTargetDAGCombine(ISD::OR);
275   }
276 }
277 
278 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
279                                             EVT VT) const {
280   if (!VT.isVector())
281     return getPointerTy(DL);
282   return VT.changeVectorElementTypeToInteger();
283 }
284 
285 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
286                                              const CallInst &I,
287                                              MachineFunction &MF,
288                                              unsigned Intrinsic) const {
289   switch (Intrinsic) {
290   default:
291     return false;
292   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
293   case Intrinsic::riscv_masked_atomicrmw_add_i32:
294   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
295   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
296   case Intrinsic::riscv_masked_atomicrmw_max_i32:
297   case Intrinsic::riscv_masked_atomicrmw_min_i32:
298   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
299   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
300   case Intrinsic::riscv_masked_cmpxchg_i32:
301     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
302     Info.opc = ISD::INTRINSIC_W_CHAIN;
303     Info.memVT = MVT::getVT(PtrTy->getElementType());
304     Info.ptrVal = I.getArgOperand(0);
305     Info.offset = 0;
306     Info.align = Align(4);
307     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
308                  MachineMemOperand::MOVolatile;
309     return true;
310   }
311 }
312 
313 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
314                                                 const AddrMode &AM, Type *Ty,
315                                                 unsigned AS,
316                                                 Instruction *I) const {
317   // No global is ever allowed as a base.
318   if (AM.BaseGV)
319     return false;
320 
321   // Require a 12-bit signed offset.
322   if (!isInt<12>(AM.BaseOffs))
323     return false;
324 
325   switch (AM.Scale) {
326   case 0: // "r+i" or just "i", depending on HasBaseReg.
327     break;
328   case 1:
329     if (!AM.HasBaseReg) // allow "r+i".
330       break;
331     return false; // disallow "r+r" or "r+r+i".
332   default:
333     return false;
334   }
335 
336   return true;
337 }
338 
339 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
340   return isInt<12>(Imm);
341 }
342 
343 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
344   return isInt<12>(Imm);
345 }
346 
347 // On RV32, 64-bit integers are split into their high and low parts and held
348 // in two different registers, so the trunc is free since the low register can
349 // just be used.
350 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
351   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
352     return false;
353   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
354   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
355   return (SrcBits == 64 && DestBits == 32);
356 }
357 
358 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
359   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
360       !SrcVT.isInteger() || !DstVT.isInteger())
361     return false;
362   unsigned SrcBits = SrcVT.getSizeInBits();
363   unsigned DestBits = DstVT.getSizeInBits();
364   return (SrcBits == 64 && DestBits == 32);
365 }
366 
367 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
368   // Zexts are free if they can be combined with a load.
369   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
370     EVT MemVT = LD->getMemoryVT();
371     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
372          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
373         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
374          LD->getExtensionType() == ISD::ZEXTLOAD))
375       return true;
376   }
377 
378   return TargetLowering::isZExtFree(Val, VT2);
379 }
380 
381 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
382   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
383 }
384 
385 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
386   return Subtarget.hasStdExtZbb();
387 }
388 
389 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
390   return Subtarget.hasStdExtZbb();
391 }
392 
393 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
394                                        bool ForCodeSize) const {
395   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
396     return false;
397   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
398     return false;
399   if (Imm.isNegZero())
400     return false;
401   return Imm.isZero();
402 }
403 
404 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
405   return (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
406          (VT == MVT::f64 && Subtarget.hasStdExtD());
407 }
408 
409 // Changes the condition code and swaps operands if necessary, so the SetCC
410 // operation matches one of the comparisons supported directly in the RISC-V
411 // ISA.
412 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
413   switch (CC) {
414   default:
415     break;
416   case ISD::SETGT:
417   case ISD::SETLE:
418   case ISD::SETUGT:
419   case ISD::SETULE:
420     CC = ISD::getSetCCSwappedOperands(CC);
421     std::swap(LHS, RHS);
422     break;
423   }
424 }
425 
426 // Return the RISC-V branch opcode that matches the given DAG integer
427 // condition code. The CondCode must be one of those supported by the RISC-V
428 // ISA (see normaliseSetCC).
429 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
430   switch (CC) {
431   default:
432     llvm_unreachable("Unsupported CondCode");
433   case ISD::SETEQ:
434     return RISCV::BEQ;
435   case ISD::SETNE:
436     return RISCV::BNE;
437   case ISD::SETLT:
438     return RISCV::BLT;
439   case ISD::SETGE:
440     return RISCV::BGE;
441   case ISD::SETULT:
442     return RISCV::BLTU;
443   case ISD::SETUGE:
444     return RISCV::BGEU;
445   }
446 }
447 
448 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
449                                             SelectionDAG &DAG) const {
450   switch (Op.getOpcode()) {
451   default:
452     report_fatal_error("unimplemented operand");
453   case ISD::GlobalAddress:
454     return lowerGlobalAddress(Op, DAG);
455   case ISD::BlockAddress:
456     return lowerBlockAddress(Op, DAG);
457   case ISD::ConstantPool:
458     return lowerConstantPool(Op, DAG);
459   case ISD::GlobalTLSAddress:
460     return lowerGlobalTLSAddress(Op, DAG);
461   case ISD::SELECT:
462     return lowerSELECT(Op, DAG);
463   case ISD::VASTART:
464     return lowerVASTART(Op, DAG);
465   case ISD::FRAMEADDR:
466     return lowerFRAMEADDR(Op, DAG);
467   case ISD::RETURNADDR:
468     return lowerRETURNADDR(Op, DAG);
469   case ISD::SHL_PARTS:
470     return lowerShiftLeftParts(Op, DAG);
471   case ISD::SRA_PARTS:
472     return lowerShiftRightParts(Op, DAG, true);
473   case ISD::SRL_PARTS:
474     return lowerShiftRightParts(Op, DAG, false);
475   case ISD::BITCAST: {
476     assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
477            "Unexpected custom legalisation");
478     SDLoc DL(Op);
479     SDValue Op0 = Op.getOperand(0);
480     if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
481       return SDValue();
482     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
483     SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
484     return FPConv;
485   }
486   case ISD::INTRINSIC_WO_CHAIN:
487     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
488   }
489 }
490 
491 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
492                              SelectionDAG &DAG, unsigned Flags) {
493   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
494 }
495 
496 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
497                              SelectionDAG &DAG, unsigned Flags) {
498   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
499                                    Flags);
500 }
501 
502 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
503                              SelectionDAG &DAG, unsigned Flags) {
504   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
505                                    N->getOffset(), Flags);
506 }
507 
508 template <class NodeTy>
509 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
510                                      bool IsLocal) const {
511   SDLoc DL(N);
512   EVT Ty = getPointerTy(DAG.getDataLayout());
513 
514   if (isPositionIndependent()) {
515     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
516     if (IsLocal)
517       // Use PC-relative addressing to access the symbol. This generates the
518       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
519       // %pcrel_lo(auipc)).
520       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
521 
522     // Use PC-relative addressing to access the GOT for this symbol, then load
523     // the address from the GOT. This generates the pattern (PseudoLA sym),
524     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
525     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
526   }
527 
528   switch (getTargetMachine().getCodeModel()) {
529   default:
530     report_fatal_error("Unsupported code model for lowering");
531   case CodeModel::Small: {
532     // Generate a sequence for accessing addresses within the first 2 GiB of
533     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
534     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
535     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
536     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
537     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
538   }
539   case CodeModel::Medium: {
540     // Generate a sequence for accessing addresses within any 2GiB range within
541     // the address space. This generates the pattern (PseudoLLA sym), which
542     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
543     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
544     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
545   }
546   }
547 }
548 
549 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
550                                                 SelectionDAG &DAG) const {
551   SDLoc DL(Op);
552   EVT Ty = Op.getValueType();
553   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
554   int64_t Offset = N->getOffset();
555   MVT XLenVT = Subtarget.getXLenVT();
556 
557   const GlobalValue *GV = N->getGlobal();
558   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
559   SDValue Addr = getAddr(N, DAG, IsLocal);
560 
561   // In order to maximise the opportunity for common subexpression elimination,
562   // emit a separate ADD node for the global address offset instead of folding
563   // it in the global address node. Later peephole optimisations may choose to
564   // fold it back in when profitable.
565   if (Offset != 0)
566     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
567                        DAG.getConstant(Offset, DL, XLenVT));
568   return Addr;
569 }
570 
571 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
572                                                SelectionDAG &DAG) const {
573   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
574 
575   return getAddr(N, DAG);
576 }
577 
578 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
579                                                SelectionDAG &DAG) const {
580   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
581 
582   return getAddr(N, DAG);
583 }
584 
585 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
586                                               SelectionDAG &DAG,
587                                               bool UseGOT) const {
588   SDLoc DL(N);
589   EVT Ty = getPointerTy(DAG.getDataLayout());
590   const GlobalValue *GV = N->getGlobal();
591   MVT XLenVT = Subtarget.getXLenVT();
592 
593   if (UseGOT) {
594     // Use PC-relative addressing to access the GOT for this TLS symbol, then
595     // load the address from the GOT and add the thread pointer. This generates
596     // the pattern (PseudoLA_TLS_IE sym), which expands to
597     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
598     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
599     SDValue Load =
600         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
601 
602     // Add the thread pointer.
603     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
604     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
605   }
606 
607   // Generate a sequence for accessing the address relative to the thread
608   // pointer, with the appropriate adjustment for the thread pointer offset.
609   // This generates the pattern
610   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
611   SDValue AddrHi =
612       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
613   SDValue AddrAdd =
614       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
615   SDValue AddrLo =
616       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
617 
618   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
619   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
620   SDValue MNAdd = SDValue(
621       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
622       0);
623   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
624 }
625 
626 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
627                                                SelectionDAG &DAG) const {
628   SDLoc DL(N);
629   EVT Ty = getPointerTy(DAG.getDataLayout());
630   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
631   const GlobalValue *GV = N->getGlobal();
632 
633   // Use a PC-relative addressing mode to access the global dynamic GOT address.
634   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
635   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
636   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
637   SDValue Load =
638       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
639 
640   // Prepare argument list to generate call.
641   ArgListTy Args;
642   ArgListEntry Entry;
643   Entry.Node = Load;
644   Entry.Ty = CallTy;
645   Args.push_back(Entry);
646 
647   // Setup call to __tls_get_addr.
648   TargetLowering::CallLoweringInfo CLI(DAG);
649   CLI.setDebugLoc(DL)
650       .setChain(DAG.getEntryNode())
651       .setLibCallee(CallingConv::C, CallTy,
652                     DAG.getExternalSymbol("__tls_get_addr", Ty),
653                     std::move(Args));
654 
655   return LowerCallTo(CLI).first;
656 }
657 
658 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
659                                                    SelectionDAG &DAG) const {
660   SDLoc DL(Op);
661   EVT Ty = Op.getValueType();
662   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
663   int64_t Offset = N->getOffset();
664   MVT XLenVT = Subtarget.getXLenVT();
665 
666   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
667 
668   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
669       CallingConv::GHC)
670     report_fatal_error("In GHC calling convention TLS is not supported");
671 
672   SDValue Addr;
673   switch (Model) {
674   case TLSModel::LocalExec:
675     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
676     break;
677   case TLSModel::InitialExec:
678     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
679     break;
680   case TLSModel::LocalDynamic:
681   case TLSModel::GeneralDynamic:
682     Addr = getDynamicTLSAddr(N, DAG);
683     break;
684   }
685 
686   // In order to maximise the opportunity for common subexpression elimination,
687   // emit a separate ADD node for the global address offset instead of folding
688   // it in the global address node. Later peephole optimisations may choose to
689   // fold it back in when profitable.
690   if (Offset != 0)
691     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
692                        DAG.getConstant(Offset, DL, XLenVT));
693   return Addr;
694 }
695 
696 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
697   SDValue CondV = Op.getOperand(0);
698   SDValue TrueV = Op.getOperand(1);
699   SDValue FalseV = Op.getOperand(2);
700   SDLoc DL(Op);
701   MVT XLenVT = Subtarget.getXLenVT();
702 
703   // If the result type is XLenVT and CondV is the output of a SETCC node
704   // which also operated on XLenVT inputs, then merge the SETCC node into the
705   // lowered RISCVISD::SELECT_CC to take advantage of the integer
706   // compare+branch instructions. i.e.:
707   // (select (setcc lhs, rhs, cc), truev, falsev)
708   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
709   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
710       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
711     SDValue LHS = CondV.getOperand(0);
712     SDValue RHS = CondV.getOperand(1);
713     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
714     ISD::CondCode CCVal = CC->get();
715 
716     normaliseSetCC(LHS, RHS, CCVal);
717 
718     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
719     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
720     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
721   }
722 
723   // Otherwise:
724   // (select condv, truev, falsev)
725   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
726   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
727   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
728 
729   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
730 
731   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
732 }
733 
734 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
735   MachineFunction &MF = DAG.getMachineFunction();
736   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
737 
738   SDLoc DL(Op);
739   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
740                                  getPointerTy(MF.getDataLayout()));
741 
742   // vastart just stores the address of the VarArgsFrameIndex slot into the
743   // memory location argument.
744   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
745   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
746                       MachinePointerInfo(SV));
747 }
748 
749 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
750                                             SelectionDAG &DAG) const {
751   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
752   MachineFunction &MF = DAG.getMachineFunction();
753   MachineFrameInfo &MFI = MF.getFrameInfo();
754   MFI.setFrameAddressIsTaken(true);
755   Register FrameReg = RI.getFrameRegister(MF);
756   int XLenInBytes = Subtarget.getXLen() / 8;
757 
758   EVT VT = Op.getValueType();
759   SDLoc DL(Op);
760   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
761   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
762   while (Depth--) {
763     int Offset = -(XLenInBytes * 2);
764     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
765                               DAG.getIntPtrConstant(Offset, DL));
766     FrameAddr =
767         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
768   }
769   return FrameAddr;
770 }
771 
772 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
773                                              SelectionDAG &DAG) const {
774   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
775   MachineFunction &MF = DAG.getMachineFunction();
776   MachineFrameInfo &MFI = MF.getFrameInfo();
777   MFI.setReturnAddressIsTaken(true);
778   MVT XLenVT = Subtarget.getXLenVT();
779   int XLenInBytes = Subtarget.getXLen() / 8;
780 
781   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
782     return SDValue();
783 
784   EVT VT = Op.getValueType();
785   SDLoc DL(Op);
786   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
787   if (Depth) {
788     int Off = -XLenInBytes;
789     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
790     SDValue Offset = DAG.getConstant(Off, DL, VT);
791     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
792                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
793                        MachinePointerInfo());
794   }
795 
796   // Return the value of the return address register, marking it an implicit
797   // live-in.
798   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
799   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
800 }
801 
802 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
803                                                  SelectionDAG &DAG) const {
804   SDLoc DL(Op);
805   SDValue Lo = Op.getOperand(0);
806   SDValue Hi = Op.getOperand(1);
807   SDValue Shamt = Op.getOperand(2);
808   EVT VT = Lo.getValueType();
809 
810   // if Shamt-XLEN < 0: // Shamt < XLEN
811   //   Lo = Lo << Shamt
812   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
813   // else:
814   //   Lo = 0
815   //   Hi = Lo << (Shamt-XLEN)
816 
817   SDValue Zero = DAG.getConstant(0, DL, VT);
818   SDValue One = DAG.getConstant(1, DL, VT);
819   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
820   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
821   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
822   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
823 
824   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
825   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
826   SDValue ShiftRightLo =
827       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
828   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
829   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
830   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
831 
832   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
833 
834   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
835   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
836 
837   SDValue Parts[2] = {Lo, Hi};
838   return DAG.getMergeValues(Parts, DL);
839 }
840 
841 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
842                                                   bool IsSRA) const {
843   SDLoc DL(Op);
844   SDValue Lo = Op.getOperand(0);
845   SDValue Hi = Op.getOperand(1);
846   SDValue Shamt = Op.getOperand(2);
847   EVT VT = Lo.getValueType();
848 
849   // SRA expansion:
850   //   if Shamt-XLEN < 0: // Shamt < XLEN
851   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
852   //     Hi = Hi >>s Shamt
853   //   else:
854   //     Lo = Hi >>s (Shamt-XLEN);
855   //     Hi = Hi >>s (XLEN-1)
856   //
857   // SRL expansion:
858   //   if Shamt-XLEN < 0: // Shamt < XLEN
859   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
860   //     Hi = Hi >>u Shamt
861   //   else:
862   //     Lo = Hi >>u (Shamt-XLEN);
863   //     Hi = 0;
864 
865   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
866 
867   SDValue Zero = DAG.getConstant(0, DL, VT);
868   SDValue One = DAG.getConstant(1, DL, VT);
869   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
870   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
871   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
872   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
873 
874   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
875   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
876   SDValue ShiftLeftHi =
877       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
878   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
879   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
880   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
881   SDValue HiFalse =
882       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
883 
884   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
885 
886   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
887   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
888 
889   SDValue Parts[2] = {Lo, Hi};
890   return DAG.getMergeValues(Parts, DL);
891 }
892 
893 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
894                                                      SelectionDAG &DAG) const {
895   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
896   SDLoc DL(Op);
897   switch (IntNo) {
898   default:
899     return SDValue();    // Don't custom lower most intrinsics.
900   case Intrinsic::thread_pointer: {
901     EVT PtrVT = getPointerTy(DAG.getDataLayout());
902     return DAG.getRegister(RISCV::X4, PtrVT);
903   }
904   }
905 }
906 
907 // Returns the opcode of the target-specific SDNode that implements the 32-bit
908 // form of the given Opcode.
909 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
910   switch (Opcode) {
911   default:
912     llvm_unreachable("Unexpected opcode");
913   case ISD::SHL:
914     return RISCVISD::SLLW;
915   case ISD::SRA:
916     return RISCVISD::SRAW;
917   case ISD::SRL:
918     return RISCVISD::SRLW;
919   case ISD::SDIV:
920     return RISCVISD::DIVW;
921   case ISD::UDIV:
922     return RISCVISD::DIVUW;
923   case ISD::UREM:
924     return RISCVISD::REMUW;
925   case ISD::ROTL:
926     return RISCVISD::ROLW;
927   case ISD::ROTR:
928     return RISCVISD::RORW;
929   case RISCVISD::GREVI:
930     return RISCVISD::GREVIW;
931   case RISCVISD::GORCI:
932     return RISCVISD::GORCIW;
933   }
934 }
935 
936 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
937 // Because i32 isn't a legal type for RV64, these operations would otherwise
938 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
939 // later one because the fact the operation was originally of type i32 is
940 // lost.
941 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
942   SDLoc DL(N);
943   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
944   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
945   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
946   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
947   // ReplaceNodeResults requires we maintain the same type for the return value.
948   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
949 }
950 
951 // Converts the given 32-bit operation to a i64 operation with signed extension
952 // semantic to reduce the signed extension instructions.
953 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
954   SDLoc DL(N);
955   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
956   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
957   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
958   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
959                                DAG.getValueType(MVT::i32));
960   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
961 }
962 
963 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
964                                              SmallVectorImpl<SDValue> &Results,
965                                              SelectionDAG &DAG) const {
966   SDLoc DL(N);
967   switch (N->getOpcode()) {
968   default:
969     llvm_unreachable("Don't know how to custom type legalize this operation!");
970   case ISD::STRICT_FP_TO_SINT:
971   case ISD::STRICT_FP_TO_UINT:
972   case ISD::FP_TO_SINT:
973   case ISD::FP_TO_UINT: {
974     bool IsStrict = N->isStrictFPOpcode();
975     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
976            "Unexpected custom legalisation");
977     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
978     // If the FP type needs to be softened, emit a library call using the 'si'
979     // version. If we left it to default legalization we'd end up with 'di'. If
980     // the FP type doesn't need to be softened just let generic type
981     // legalization promote the result type.
982     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
983         TargetLowering::TypeSoftenFloat)
984       return;
985     RTLIB::Libcall LC;
986     if (N->getOpcode() == ISD::FP_TO_SINT ||
987         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
988       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
989     else
990       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
991     MakeLibCallOptions CallOptions;
992     EVT OpVT = Op0.getValueType();
993     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
994     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
995     SDValue Result;
996     std::tie(Result, Chain) =
997         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
998     Results.push_back(Result);
999     if (IsStrict)
1000       Results.push_back(Chain);
1001     break;
1002   }
1003   case ISD::READCYCLECOUNTER: {
1004     assert(!Subtarget.is64Bit() &&
1005            "READCYCLECOUNTER only has custom type legalization on riscv32");
1006 
1007     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
1008     SDValue RCW =
1009         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
1010 
1011     Results.push_back(
1012         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
1013     Results.push_back(RCW.getValue(2));
1014     break;
1015   }
1016   case ISD::ADD:
1017   case ISD::SUB:
1018   case ISD::MUL:
1019     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1020            "Unexpected custom legalisation");
1021     if (N->getOperand(1).getOpcode() == ISD::Constant)
1022       return;
1023     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
1024     break;
1025   case ISD::SHL:
1026   case ISD::SRA:
1027   case ISD::SRL:
1028     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1029            "Unexpected custom legalisation");
1030     if (N->getOperand(1).getOpcode() == ISD::Constant)
1031       return;
1032     Results.push_back(customLegalizeToWOp(N, DAG));
1033     break;
1034   case ISD::ROTL:
1035   case ISD::ROTR:
1036     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1037            "Unexpected custom legalisation");
1038     Results.push_back(customLegalizeToWOp(N, DAG));
1039     break;
1040   case ISD::SDIV:
1041   case ISD::UDIV:
1042   case ISD::UREM:
1043     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1044            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
1045     if (N->getOperand(0).getOpcode() == ISD::Constant ||
1046         N->getOperand(1).getOpcode() == ISD::Constant)
1047       return;
1048     Results.push_back(customLegalizeToWOp(N, DAG));
1049     break;
1050   case ISD::BITCAST: {
1051     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1052            Subtarget.hasStdExtF() && "Unexpected custom legalisation");
1053     SDValue Op0 = N->getOperand(0);
1054     if (Op0.getValueType() != MVT::f32)
1055       return;
1056     SDValue FPConv =
1057         DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
1058     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
1059     break;
1060   }
1061   case RISCVISD::GREVI:
1062   case RISCVISD::GORCI: {
1063     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1064            "Unexpected custom legalisation");
1065     // This is similar to customLegalizeToWOp, except that we pass the second
1066     // operand (a TargetConstant) straight through: it is already of type
1067     // XLenVT.
1068     SDLoc DL(N);
1069     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1070     SDValue NewOp0 =
1071         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1072     SDValue NewRes =
1073         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
1074     // ReplaceNodeResults requires we maintain the same type for the return
1075     // value.
1076     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
1077     break;
1078   }
1079   case ISD::BSWAP:
1080   case ISD::BITREVERSE: {
1081     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1082            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1083     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1084                                  N->getOperand(0));
1085     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
1086     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
1087                                  DAG.getTargetConstant(Imm, DL,
1088                                                        Subtarget.getXLenVT()));
1089     // ReplaceNodeResults requires we maintain the same type for the return
1090     // value.
1091     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
1092     break;
1093   }
1094   }
1095 }
1096 
1097 // A structure to hold one of the bit-manipulation patterns below. Together, a
1098 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
1099 //   (or (and (shl x, 1), 0xAAAAAAAA),
1100 //       (and (srl x, 1), 0x55555555))
1101 struct RISCVBitmanipPat {
1102   SDValue Op;
1103   unsigned ShAmt;
1104   bool IsSHL;
1105 
1106   bool formsPairWith(const RISCVBitmanipPat &Other) const {
1107     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
1108   }
1109 };
1110 
1111 // Matches any of the following bit-manipulation patterns:
1112 //   (and (shl x, 1), (0x55555555 << 1))
1113 //   (and (srl x, 1), 0x55555555)
1114 //   (shl (and x, 0x55555555), 1)
1115 //   (srl (and x, (0x55555555 << 1)), 1)
1116 // where the shift amount and mask may vary thus:
1117 //   [1]  = 0x55555555 / 0xAAAAAAAA
1118 //   [2]  = 0x33333333 / 0xCCCCCCCC
1119 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
1120 //   [8]  = 0x00FF00FF / 0xFF00FF00
1121 //   [16] = 0x0000FFFF / 0xFFFFFFFF
1122 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
1123 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) {
1124   Optional<uint64_t> Mask;
1125   // Optionally consume a mask around the shift operation.
1126   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
1127     Mask = Op.getConstantOperandVal(1);
1128     Op = Op.getOperand(0);
1129   }
1130   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
1131     return None;
1132   bool IsSHL = Op.getOpcode() == ISD::SHL;
1133 
1134   if (!isa<ConstantSDNode>(Op.getOperand(1)))
1135     return None;
1136   auto ShAmt = Op.getConstantOperandVal(1);
1137 
1138   if (!isPowerOf2_64(ShAmt))
1139     return None;
1140 
1141   // These are the unshifted masks which we use to match bit-manipulation
1142   // patterns. They may be shifted left in certain circumstances.
1143   static const uint64_t BitmanipMasks[] = {
1144       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
1145       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL,
1146   };
1147 
1148   unsigned MaskIdx = Log2_64(ShAmt);
1149   if (MaskIdx >= array_lengthof(BitmanipMasks))
1150     return None;
1151 
1152   auto Src = Op.getOperand(0);
1153 
1154   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
1155   auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
1156 
1157   // The expected mask is shifted left when the AND is found around SHL
1158   // patterns.
1159   //   ((x >> 1) & 0x55555555)
1160   //   ((x << 1) & 0xAAAAAAAA)
1161   bool SHLExpMask = IsSHL;
1162 
1163   if (!Mask) {
1164     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
1165     // the mask is all ones: consume that now.
1166     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
1167       Mask = Src.getConstantOperandVal(1);
1168       Src = Src.getOperand(0);
1169       // The expected mask is now in fact shifted left for SRL, so reverse the
1170       // decision.
1171       //   ((x & 0xAAAAAAAA) >> 1)
1172       //   ((x & 0x55555555) << 1)
1173       SHLExpMask = !SHLExpMask;
1174     } else {
1175       // Use a default shifted mask of all-ones if there's no AND, truncated
1176       // down to the expected width. This simplifies the logic later on.
1177       Mask = maskTrailingOnes<uint64_t>(Width);
1178       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
1179     }
1180   }
1181 
1182   if (SHLExpMask)
1183     ExpMask <<= ShAmt;
1184 
1185   if (Mask != ExpMask)
1186     return None;
1187 
1188   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
1189 }
1190 
1191 // Match the following pattern as a GREVI(W) operation
1192 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
1193 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
1194                                const RISCVSubtarget &Subtarget) {
1195   if (Op.getSimpleValueType() == Subtarget.getXLenVT() ||
1196       (Subtarget.is64Bit() && Op.getSimpleValueType() == MVT::i32)) {
1197     auto LHS = matchRISCVBitmanipPat(Op.getOperand(0));
1198     auto RHS = matchRISCVBitmanipPat(Op.getOperand(1));
1199     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
1200       SDLoc DL(Op);
1201       return DAG.getNode(
1202           RISCVISD::GREVI, DL, Op.getValueType(), LHS->Op,
1203           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
1204     }
1205   }
1206   return SDValue();
1207 }
1208 
1209 // Matches any the following pattern as a GORCI(W) operation
1210 // 1.  (or (GREVI x, shamt), x)
1211 // 2.  (or x, (GREVI x, shamt))
1212 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
1213 // Note that with the variant of 3.,
1214 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
1215 // the inner pattern will first be matched as GREVI and then the outer
1216 // pattern will be matched to GORC via the first rule above.
1217 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
1218                                const RISCVSubtarget &Subtarget) {
1219   if (Op.getSimpleValueType() == Subtarget.getXLenVT() ||
1220       (Subtarget.is64Bit() && Op.getSimpleValueType() == MVT::i32)) {
1221     SDLoc DL(Op);
1222     SDValue Op0 = Op.getOperand(0);
1223     SDValue Op1 = Op.getOperand(1);
1224 
1225     // Check for either commutable permutation of (or (GREVI x, shamt), x)
1226     for (const auto &OpPair :
1227          {std::make_pair(Op0, Op1), std::make_pair(Op1, Op0)}) {
1228       if (OpPair.first.getOpcode() == RISCVISD::GREVI &&
1229           OpPair.first.getOperand(0) == OpPair.second)
1230         return DAG.getNode(RISCVISD::GORCI, DL, Op.getValueType(),
1231                            OpPair.second, OpPair.first.getOperand(1));
1232     }
1233 
1234     // OR is commutable so canonicalize its OR operand to the left
1235     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
1236       std::swap(Op0, Op1);
1237     if (Op0.getOpcode() != ISD::OR)
1238       return SDValue();
1239     SDValue OrOp0 = Op0.getOperand(0);
1240     SDValue OrOp1 = Op0.getOperand(1);
1241     auto LHS = matchRISCVBitmanipPat(OrOp0);
1242     // OR is commutable so swap the operands and try again: x might have been
1243     // on the left
1244     if (!LHS) {
1245       std::swap(OrOp0, OrOp1);
1246       LHS = matchRISCVBitmanipPat(OrOp0);
1247     }
1248     auto RHS = matchRISCVBitmanipPat(Op1);
1249     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
1250       return DAG.getNode(
1251           RISCVISD::GORCI, DL, Op.getValueType(), LHS->Op,
1252           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
1253     }
1254   }
1255   return SDValue();
1256 }
1257 
1258 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1259                                                DAGCombinerInfo &DCI) const {
1260   SelectionDAG &DAG = DCI.DAG;
1261 
1262   switch (N->getOpcode()) {
1263   default:
1264     break;
1265   case RISCVISD::SplitF64: {
1266     SDValue Op0 = N->getOperand(0);
1267     // If the input to SplitF64 is just BuildPairF64 then the operation is
1268     // redundant. Instead, use BuildPairF64's operands directly.
1269     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
1270       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
1271 
1272     SDLoc DL(N);
1273 
1274     // It's cheaper to materialise two 32-bit integers than to load a double
1275     // from the constant pool and transfer it to integer registers through the
1276     // stack.
1277     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
1278       APInt V = C->getValueAPF().bitcastToAPInt();
1279       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
1280       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
1281       return DCI.CombineTo(N, Lo, Hi);
1282     }
1283 
1284     // This is a target-specific version of a DAGCombine performed in
1285     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1286     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1287     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1288     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1289         !Op0.getNode()->hasOneUse())
1290       break;
1291     SDValue NewSplitF64 =
1292         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
1293                     Op0.getOperand(0));
1294     SDValue Lo = NewSplitF64.getValue(0);
1295     SDValue Hi = NewSplitF64.getValue(1);
1296     APInt SignBit = APInt::getSignMask(32);
1297     if (Op0.getOpcode() == ISD::FNEG) {
1298       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
1299                                   DAG.getConstant(SignBit, DL, MVT::i32));
1300       return DCI.CombineTo(N, Lo, NewHi);
1301     }
1302     assert(Op0.getOpcode() == ISD::FABS);
1303     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
1304                                 DAG.getConstant(~SignBit, DL, MVT::i32));
1305     return DCI.CombineTo(N, Lo, NewHi);
1306   }
1307   case RISCVISD::SLLW:
1308   case RISCVISD::SRAW:
1309   case RISCVISD::SRLW:
1310   case RISCVISD::ROLW:
1311   case RISCVISD::RORW: {
1312     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
1313     SDValue LHS = N->getOperand(0);
1314     SDValue RHS = N->getOperand(1);
1315     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
1316     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
1317     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
1318         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
1319       if (N->getOpcode() != ISD::DELETED_NODE)
1320         DCI.AddToWorklist(N);
1321       return SDValue(N, 0);
1322     }
1323     break;
1324   }
1325   case RISCVISD::GREVIW:
1326   case RISCVISD::GORCIW: {
1327     // Only the lower 32 bits of the first operand are read
1328     SDValue Op0 = N->getOperand(0);
1329     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
1330     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
1331       if (N->getOpcode() != ISD::DELETED_NODE)
1332         DCI.AddToWorklist(N);
1333       return SDValue(N, 0);
1334     }
1335     break;
1336   }
1337   case RISCVISD::FMV_X_ANYEXTW_RV64: {
1338     SDLoc DL(N);
1339     SDValue Op0 = N->getOperand(0);
1340     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
1341     // conversion is unnecessary and can be replaced with an ANY_EXTEND
1342     // of the FMV_W_X_RV64 operand.
1343     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
1344       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
1345              "Unexpected value type!");
1346       return Op0.getOperand(0);
1347     }
1348 
1349     // This is a target-specific version of a DAGCombine performed in
1350     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1351     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1352     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1353     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1354         !Op0.getNode()->hasOneUse())
1355       break;
1356     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
1357                                  Op0.getOperand(0));
1358     APInt SignBit = APInt::getSignMask(32).sext(64);
1359     if (Op0.getOpcode() == ISD::FNEG)
1360       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
1361                          DAG.getConstant(SignBit, DL, MVT::i64));
1362 
1363     assert(Op0.getOpcode() == ISD::FABS);
1364     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
1365                        DAG.getConstant(~SignBit, DL, MVT::i64));
1366   }
1367   case RISCVISD::GREVI: {
1368     // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
1369     // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
1370     SDLoc DL(N);
1371     auto GREVSrc = N->getOperand(0);
1372     uint64_t ShAmt1 = N->getConstantOperandVal(1);
1373     if (GREVSrc->getOpcode() != RISCVISD::GREVI)
1374       break;
1375     uint64_t ShAmt2 = GREVSrc.getConstantOperandVal(1);
1376     GREVSrc = GREVSrc->getOperand(0);
1377     uint64_t CombinedShAmt = ShAmt1 ^ ShAmt2;
1378     if (CombinedShAmt == 0)
1379       return GREVSrc;
1380     return DAG.getNode(
1381         RISCVISD::GREVI, DL, N->getValueType(0), GREVSrc,
1382         DAG.getTargetConstant(CombinedShAmt, DL, Subtarget.getXLenVT()));
1383   }
1384   case ISD::OR:
1385     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
1386       return GREV;
1387     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
1388       return GORC;
1389     break;
1390   }
1391 
1392   return SDValue();
1393 }
1394 
1395 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1396     const SDNode *N, CombineLevel Level) const {
1397   // The following folds are only desirable if `(OP _, c1 << c2)` can be
1398   // materialised in fewer instructions than `(OP _, c1)`:
1399   //
1400   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1401   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1402   SDValue N0 = N->getOperand(0);
1403   EVT Ty = N0.getValueType();
1404   if (Ty.isScalarInteger() &&
1405       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
1406     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
1407     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
1408     if (C1 && C2) {
1409       APInt C1Int = C1->getAPIntValue();
1410       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
1411 
1412       // We can materialise `c1 << c2` into an add immediate, so it's "free",
1413       // and the combine should happen, to potentially allow further combines
1414       // later.
1415       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
1416           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
1417         return true;
1418 
1419       // We can materialise `c1` in an add immediate, so it's "free", and the
1420       // combine should be prevented.
1421       if (C1Int.getMinSignedBits() <= 64 &&
1422           isLegalAddImmediate(C1Int.getSExtValue()))
1423         return false;
1424 
1425       // Neither constant will fit into an immediate, so find materialisation
1426       // costs.
1427       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
1428                                               Subtarget.is64Bit());
1429       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
1430           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
1431 
1432       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
1433       // combine should be prevented.
1434       if (C1Cost < ShiftedC1Cost)
1435         return false;
1436     }
1437   }
1438   return true;
1439 }
1440 
1441 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
1442     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1443     unsigned Depth) const {
1444   switch (Op.getOpcode()) {
1445   default:
1446     break;
1447   case RISCVISD::SLLW:
1448   case RISCVISD::SRAW:
1449   case RISCVISD::SRLW:
1450   case RISCVISD::DIVW:
1451   case RISCVISD::DIVUW:
1452   case RISCVISD::REMUW:
1453   case RISCVISD::ROLW:
1454   case RISCVISD::RORW:
1455   case RISCVISD::GREVIW:
1456   case RISCVISD::GORCIW:
1457     // TODO: As the result is sign-extended, this is conservatively correct. A
1458     // more precise answer could be calculated for SRAW depending on known
1459     // bits in the shift amount.
1460     return 33;
1461   }
1462 
1463   return 1;
1464 }
1465 
1466 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
1467                                                   MachineBasicBlock *BB) {
1468   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
1469 
1470   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
1471   // Should the count have wrapped while it was being read, we need to try
1472   // again.
1473   // ...
1474   // read:
1475   // rdcycleh x3 # load high word of cycle
1476   // rdcycle  x2 # load low word of cycle
1477   // rdcycleh x4 # load high word of cycle
1478   // bne x3, x4, read # check if high word reads match, otherwise try again
1479   // ...
1480 
1481   MachineFunction &MF = *BB->getParent();
1482   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1483   MachineFunction::iterator It = ++BB->getIterator();
1484 
1485   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1486   MF.insert(It, LoopMBB);
1487 
1488   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1489   MF.insert(It, DoneMBB);
1490 
1491   // Transfer the remainder of BB and its successor edges to DoneMBB.
1492   DoneMBB->splice(DoneMBB->begin(), BB,
1493                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
1494   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
1495 
1496   BB->addSuccessor(LoopMBB);
1497 
1498   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1499   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1500   Register LoReg = MI.getOperand(0).getReg();
1501   Register HiReg = MI.getOperand(1).getReg();
1502   DebugLoc DL = MI.getDebugLoc();
1503 
1504   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1505   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
1506       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
1507       .addReg(RISCV::X0);
1508   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
1509       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
1510       .addReg(RISCV::X0);
1511   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
1512       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
1513       .addReg(RISCV::X0);
1514 
1515   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
1516       .addReg(HiReg)
1517       .addReg(ReadAgainReg)
1518       .addMBB(LoopMBB);
1519 
1520   LoopMBB->addSuccessor(LoopMBB);
1521   LoopMBB->addSuccessor(DoneMBB);
1522 
1523   MI.eraseFromParent();
1524 
1525   return DoneMBB;
1526 }
1527 
1528 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
1529                                              MachineBasicBlock *BB) {
1530   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
1531 
1532   MachineFunction &MF = *BB->getParent();
1533   DebugLoc DL = MI.getDebugLoc();
1534   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1535   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1536   Register LoReg = MI.getOperand(0).getReg();
1537   Register HiReg = MI.getOperand(1).getReg();
1538   Register SrcReg = MI.getOperand(2).getReg();
1539   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
1540   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
1541 
1542   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
1543                           RI);
1544   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
1545   MachineMemOperand *MMOLo =
1546       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
1547   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
1548       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
1549   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
1550       .addFrameIndex(FI)
1551       .addImm(0)
1552       .addMemOperand(MMOLo);
1553   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
1554       .addFrameIndex(FI)
1555       .addImm(4)
1556       .addMemOperand(MMOHi);
1557   MI.eraseFromParent(); // The pseudo instruction is gone now.
1558   return BB;
1559 }
1560 
1561 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
1562                                                  MachineBasicBlock *BB) {
1563   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
1564          "Unexpected instruction");
1565 
1566   MachineFunction &MF = *BB->getParent();
1567   DebugLoc DL = MI.getDebugLoc();
1568   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1569   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1570   Register DstReg = MI.getOperand(0).getReg();
1571   Register LoReg = MI.getOperand(1).getReg();
1572   Register HiReg = MI.getOperand(2).getReg();
1573   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
1574   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
1575 
1576   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
1577   MachineMemOperand *MMOLo =
1578       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
1579   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
1580       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
1581   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
1582       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
1583       .addFrameIndex(FI)
1584       .addImm(0)
1585       .addMemOperand(MMOLo);
1586   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
1587       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
1588       .addFrameIndex(FI)
1589       .addImm(4)
1590       .addMemOperand(MMOHi);
1591   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
1592   MI.eraseFromParent(); // The pseudo instruction is gone now.
1593   return BB;
1594 }
1595 
1596 static bool isSelectPseudo(MachineInstr &MI) {
1597   switch (MI.getOpcode()) {
1598   default:
1599     return false;
1600   case RISCV::Select_GPR_Using_CC_GPR:
1601   case RISCV::Select_FPR32_Using_CC_GPR:
1602   case RISCV::Select_FPR64_Using_CC_GPR:
1603     return true;
1604   }
1605 }
1606 
1607 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
1608                                            MachineBasicBlock *BB) {
1609   // To "insert" Select_* instructions, we actually have to insert the triangle
1610   // control-flow pattern.  The incoming instructions know the destination vreg
1611   // to set, the condition code register to branch on, the true/false values to
1612   // select between, and the condcode to use to select the appropriate branch.
1613   //
1614   // We produce the following control flow:
1615   //     HeadMBB
1616   //     |  \
1617   //     |  IfFalseMBB
1618   //     | /
1619   //    TailMBB
1620   //
1621   // When we find a sequence of selects we attempt to optimize their emission
1622   // by sharing the control flow. Currently we only handle cases where we have
1623   // multiple selects with the exact same condition (same LHS, RHS and CC).
1624   // The selects may be interleaved with other instructions if the other
1625   // instructions meet some requirements we deem safe:
1626   // - They are debug instructions. Otherwise,
1627   // - They do not have side-effects, do not access memory and their inputs do
1628   //   not depend on the results of the select pseudo-instructions.
1629   // The TrueV/FalseV operands of the selects cannot depend on the result of
1630   // previous selects in the sequence.
1631   // These conditions could be further relaxed. See the X86 target for a
1632   // related approach and more information.
1633   Register LHS = MI.getOperand(1).getReg();
1634   Register RHS = MI.getOperand(2).getReg();
1635   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
1636 
1637   SmallVector<MachineInstr *, 4> SelectDebugValues;
1638   SmallSet<Register, 4> SelectDests;
1639   SelectDests.insert(MI.getOperand(0).getReg());
1640 
1641   MachineInstr *LastSelectPseudo = &MI;
1642 
1643   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
1644        SequenceMBBI != E; ++SequenceMBBI) {
1645     if (SequenceMBBI->isDebugInstr())
1646       continue;
1647     else if (isSelectPseudo(*SequenceMBBI)) {
1648       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
1649           SequenceMBBI->getOperand(2).getReg() != RHS ||
1650           SequenceMBBI->getOperand(3).getImm() != CC ||
1651           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
1652           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
1653         break;
1654       LastSelectPseudo = &*SequenceMBBI;
1655       SequenceMBBI->collectDebugValues(SelectDebugValues);
1656       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
1657     } else {
1658       if (SequenceMBBI->hasUnmodeledSideEffects() ||
1659           SequenceMBBI->mayLoadOrStore())
1660         break;
1661       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
1662             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
1663           }))
1664         break;
1665     }
1666   }
1667 
1668   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1669   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1670   DebugLoc DL = MI.getDebugLoc();
1671   MachineFunction::iterator I = ++BB->getIterator();
1672 
1673   MachineBasicBlock *HeadMBB = BB;
1674   MachineFunction *F = BB->getParent();
1675   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
1676   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
1677 
1678   F->insert(I, IfFalseMBB);
1679   F->insert(I, TailMBB);
1680 
1681   // Transfer debug instructions associated with the selects to TailMBB.
1682   for (MachineInstr *DebugInstr : SelectDebugValues) {
1683     TailMBB->push_back(DebugInstr->removeFromParent());
1684   }
1685 
1686   // Move all instructions after the sequence to TailMBB.
1687   TailMBB->splice(TailMBB->end(), HeadMBB,
1688                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
1689   // Update machine-CFG edges by transferring all successors of the current
1690   // block to the new block which will contain the Phi nodes for the selects.
1691   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
1692   // Set the successors for HeadMBB.
1693   HeadMBB->addSuccessor(IfFalseMBB);
1694   HeadMBB->addSuccessor(TailMBB);
1695 
1696   // Insert appropriate branch.
1697   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
1698 
1699   BuildMI(HeadMBB, DL, TII.get(Opcode))
1700     .addReg(LHS)
1701     .addReg(RHS)
1702     .addMBB(TailMBB);
1703 
1704   // IfFalseMBB just falls through to TailMBB.
1705   IfFalseMBB->addSuccessor(TailMBB);
1706 
1707   // Create PHIs for all of the select pseudo-instructions.
1708   auto SelectMBBI = MI.getIterator();
1709   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
1710   auto InsertionPoint = TailMBB->begin();
1711   while (SelectMBBI != SelectEnd) {
1712     auto Next = std::next(SelectMBBI);
1713     if (isSelectPseudo(*SelectMBBI)) {
1714       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
1715       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
1716               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
1717           .addReg(SelectMBBI->getOperand(4).getReg())
1718           .addMBB(HeadMBB)
1719           .addReg(SelectMBBI->getOperand(5).getReg())
1720           .addMBB(IfFalseMBB);
1721       SelectMBBI->eraseFromParent();
1722     }
1723     SelectMBBI = Next;
1724   }
1725 
1726   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
1727   return TailMBB;
1728 }
1729 
1730 MachineBasicBlock *
1731 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1732                                                  MachineBasicBlock *BB) const {
1733   switch (MI.getOpcode()) {
1734   default:
1735     llvm_unreachable("Unexpected instr type to insert");
1736   case RISCV::ReadCycleWide:
1737     assert(!Subtarget.is64Bit() &&
1738            "ReadCycleWrite is only to be used on riscv32");
1739     return emitReadCycleWidePseudo(MI, BB);
1740   case RISCV::Select_GPR_Using_CC_GPR:
1741   case RISCV::Select_FPR32_Using_CC_GPR:
1742   case RISCV::Select_FPR64_Using_CC_GPR:
1743     return emitSelectPseudo(MI, BB);
1744   case RISCV::BuildPairF64Pseudo:
1745     return emitBuildPairF64Pseudo(MI, BB);
1746   case RISCV::SplitF64Pseudo:
1747     return emitSplitF64Pseudo(MI, BB);
1748   }
1749 }
1750 
1751 // Calling Convention Implementation.
1752 // The expectations for frontend ABI lowering vary from target to target.
1753 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
1754 // details, but this is a longer term goal. For now, we simply try to keep the
1755 // role of the frontend as simple and well-defined as possible. The rules can
1756 // be summarised as:
1757 // * Never split up large scalar arguments. We handle them here.
1758 // * If a hardfloat calling convention is being used, and the struct may be
1759 // passed in a pair of registers (fp+fp, int+fp), and both registers are
1760 // available, then pass as two separate arguments. If either the GPRs or FPRs
1761 // are exhausted, then pass according to the rule below.
1762 // * If a struct could never be passed in registers or directly in a stack
1763 // slot (as it is larger than 2*XLEN and the floating point rules don't
1764 // apply), then pass it using a pointer with the byval attribute.
1765 // * If a struct is less than 2*XLEN, then coerce to either a two-element
1766 // word-sized array or a 2*XLEN scalar (depending on alignment).
1767 // * The frontend can determine whether a struct is returned by reference or
1768 // not based on its size and fields. If it will be returned by reference, the
1769 // frontend must modify the prototype so a pointer with the sret annotation is
1770 // passed as the first argument. This is not necessary for large scalar
1771 // returns.
1772 // * Struct return values and varargs should be coerced to structs containing
1773 // register-size fields in the same situations they would be for fixed
1774 // arguments.
1775 
1776 static const MCPhysReg ArgGPRs[] = {
1777   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
1778   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
1779 };
1780 static const MCPhysReg ArgFPR32s[] = {
1781   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
1782   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
1783 };
1784 static const MCPhysReg ArgFPR64s[] = {
1785   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
1786   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
1787 };
1788 
1789 // Pass a 2*XLEN argument that has been split into two XLEN values through
1790 // registers or the stack as necessary.
1791 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
1792                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
1793                                 MVT ValVT2, MVT LocVT2,
1794                                 ISD::ArgFlagsTy ArgFlags2) {
1795   unsigned XLenInBytes = XLen / 8;
1796   if (Register Reg = State.AllocateReg(ArgGPRs)) {
1797     // At least one half can be passed via register.
1798     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
1799                                      VA1.getLocVT(), CCValAssign::Full));
1800   } else {
1801     // Both halves must be passed on the stack, with proper alignment.
1802     Align StackAlign =
1803         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
1804     State.addLoc(
1805         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
1806                             State.AllocateStack(XLenInBytes, StackAlign),
1807                             VA1.getLocVT(), CCValAssign::Full));
1808     State.addLoc(CCValAssign::getMem(
1809         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
1810         LocVT2, CCValAssign::Full));
1811     return false;
1812   }
1813 
1814   if (Register Reg = State.AllocateReg(ArgGPRs)) {
1815     // The second half can also be passed via register.
1816     State.addLoc(
1817         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
1818   } else {
1819     // The second half is passed via the stack, without additional alignment.
1820     State.addLoc(CCValAssign::getMem(
1821         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
1822         LocVT2, CCValAssign::Full));
1823   }
1824 
1825   return false;
1826 }
1827 
1828 // Implements the RISC-V calling convention. Returns true upon failure.
1829 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1830                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1831                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1832                      bool IsRet, Type *OrigTy) {
1833   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
1834   assert(XLen == 32 || XLen == 64);
1835   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
1836 
1837   // Any return value split in to more than two values can't be returned
1838   // directly.
1839   if (IsRet && ValNo > 1)
1840     return true;
1841 
1842   // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a
1843   // variadic argument, or if no F32 argument registers are available.
1844   bool UseGPRForF32 = true;
1845   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
1846   // variadic argument, or if no F64 argument registers are available.
1847   bool UseGPRForF64 = true;
1848 
1849   switch (ABI) {
1850   default:
1851     llvm_unreachable("Unexpected ABI");
1852   case RISCVABI::ABI_ILP32:
1853   case RISCVABI::ABI_LP64:
1854     break;
1855   case RISCVABI::ABI_ILP32F:
1856   case RISCVABI::ABI_LP64F:
1857     UseGPRForF32 = !IsFixed;
1858     break;
1859   case RISCVABI::ABI_ILP32D:
1860   case RISCVABI::ABI_LP64D:
1861     UseGPRForF32 = !IsFixed;
1862     UseGPRForF64 = !IsFixed;
1863     break;
1864   }
1865 
1866   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s))
1867     UseGPRForF32 = true;
1868   if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s))
1869     UseGPRForF64 = true;
1870 
1871   // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local
1872   // variables rather than directly checking against the target ABI.
1873 
1874   if (UseGPRForF32 && ValVT == MVT::f32) {
1875     LocVT = XLenVT;
1876     LocInfo = CCValAssign::BCvt;
1877   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
1878     LocVT = MVT::i64;
1879     LocInfo = CCValAssign::BCvt;
1880   }
1881 
1882   // If this is a variadic argument, the RISC-V calling convention requires
1883   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
1884   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
1885   // be used regardless of whether the original argument was split during
1886   // legalisation or not. The argument will not be passed by registers if the
1887   // original type is larger than 2*XLEN, so the register alignment rule does
1888   // not apply.
1889   unsigned TwoXLenInBytes = (2 * XLen) / 8;
1890   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
1891       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
1892     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
1893     // Skip 'odd' register if necessary.
1894     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
1895       State.AllocateReg(ArgGPRs);
1896   }
1897 
1898   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
1899   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
1900       State.getPendingArgFlags();
1901 
1902   assert(PendingLocs.size() == PendingArgFlags.size() &&
1903          "PendingLocs and PendingArgFlags out of sync");
1904 
1905   // Handle passing f64 on RV32D with a soft float ABI or when floating point
1906   // registers are exhausted.
1907   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
1908     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
1909            "Can't lower f64 if it is split");
1910     // Depending on available argument GPRS, f64 may be passed in a pair of
1911     // GPRs, split between a GPR and the stack, or passed completely on the
1912     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
1913     // cases.
1914     Register Reg = State.AllocateReg(ArgGPRs);
1915     LocVT = MVT::i32;
1916     if (!Reg) {
1917       unsigned StackOffset = State.AllocateStack(8, Align(8));
1918       State.addLoc(
1919           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1920       return false;
1921     }
1922     if (!State.AllocateReg(ArgGPRs))
1923       State.AllocateStack(4, Align(4));
1924     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1925     return false;
1926   }
1927 
1928   // Split arguments might be passed indirectly, so keep track of the pending
1929   // values.
1930   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
1931     LocVT = XLenVT;
1932     LocInfo = CCValAssign::Indirect;
1933     PendingLocs.push_back(
1934         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
1935     PendingArgFlags.push_back(ArgFlags);
1936     if (!ArgFlags.isSplitEnd()) {
1937       return false;
1938     }
1939   }
1940 
1941   // If the split argument only had two elements, it should be passed directly
1942   // in registers or on the stack.
1943   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
1944     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
1945     // Apply the normal calling convention rules to the first half of the
1946     // split argument.
1947     CCValAssign VA = PendingLocs[0];
1948     ISD::ArgFlagsTy AF = PendingArgFlags[0];
1949     PendingLocs.clear();
1950     PendingArgFlags.clear();
1951     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
1952                                ArgFlags);
1953   }
1954 
1955   // Allocate to a register if possible, or else a stack slot.
1956   Register Reg;
1957   if (ValVT == MVT::f32 && !UseGPRForF32)
1958     Reg = State.AllocateReg(ArgFPR32s);
1959   else if (ValVT == MVT::f64 && !UseGPRForF64)
1960     Reg = State.AllocateReg(ArgFPR64s);
1961   else
1962     Reg = State.AllocateReg(ArgGPRs);
1963   unsigned StackOffset =
1964       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
1965 
1966   // If we reach this point and PendingLocs is non-empty, we must be at the
1967   // end of a split argument that must be passed indirectly.
1968   if (!PendingLocs.empty()) {
1969     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
1970     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
1971 
1972     for (auto &It : PendingLocs) {
1973       if (Reg)
1974         It.convertToReg(Reg);
1975       else
1976         It.convertToMem(StackOffset);
1977       State.addLoc(It);
1978     }
1979     PendingLocs.clear();
1980     PendingArgFlags.clear();
1981     return false;
1982   }
1983 
1984   assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) &&
1985          "Expected an XLenVT at this stage");
1986 
1987   if (Reg) {
1988     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1989     return false;
1990   }
1991 
1992   // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
1993   if (ValVT == MVT::f32 || ValVT == MVT::f64) {
1994     LocVT = ValVT;
1995     LocInfo = CCValAssign::Full;
1996   }
1997   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1998   return false;
1999 }
2000 
2001 void RISCVTargetLowering::analyzeInputArgs(
2002     MachineFunction &MF, CCState &CCInfo,
2003     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
2004   unsigned NumArgs = Ins.size();
2005   FunctionType *FType = MF.getFunction().getFunctionType();
2006 
2007   for (unsigned i = 0; i != NumArgs; ++i) {
2008     MVT ArgVT = Ins[i].VT;
2009     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
2010 
2011     Type *ArgTy = nullptr;
2012     if (IsRet)
2013       ArgTy = FType->getReturnType();
2014     else if (Ins[i].isOrigArg())
2015       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
2016 
2017     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2018     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
2019                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
2020       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
2021                         << EVT(ArgVT).getEVTString() << '\n');
2022       llvm_unreachable(nullptr);
2023     }
2024   }
2025 }
2026 
2027 void RISCVTargetLowering::analyzeOutputArgs(
2028     MachineFunction &MF, CCState &CCInfo,
2029     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
2030     CallLoweringInfo *CLI) const {
2031   unsigned NumArgs = Outs.size();
2032 
2033   for (unsigned i = 0; i != NumArgs; i++) {
2034     MVT ArgVT = Outs[i].VT;
2035     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2036     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
2037 
2038     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2039     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
2040                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
2041       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
2042                         << EVT(ArgVT).getEVTString() << "\n");
2043       llvm_unreachable(nullptr);
2044     }
2045   }
2046 }
2047 
2048 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
2049 // values.
2050 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
2051                                    const CCValAssign &VA, const SDLoc &DL) {
2052   switch (VA.getLocInfo()) {
2053   default:
2054     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2055   case CCValAssign::Full:
2056     break;
2057   case CCValAssign::BCvt:
2058     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
2059       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
2060       break;
2061     }
2062     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
2063     break;
2064   }
2065   return Val;
2066 }
2067 
2068 // The caller is responsible for loading the full value if the argument is
2069 // passed with CCValAssign::Indirect.
2070 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
2071                                 const CCValAssign &VA, const SDLoc &DL) {
2072   MachineFunction &MF = DAG.getMachineFunction();
2073   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2074   EVT LocVT = VA.getLocVT();
2075   SDValue Val;
2076   const TargetRegisterClass *RC;
2077 
2078   switch (LocVT.getSimpleVT().SimpleTy) {
2079   default:
2080     llvm_unreachable("Unexpected register type");
2081   case MVT::i32:
2082   case MVT::i64:
2083     RC = &RISCV::GPRRegClass;
2084     break;
2085   case MVT::f32:
2086     RC = &RISCV::FPR32RegClass;
2087     break;
2088   case MVT::f64:
2089     RC = &RISCV::FPR64RegClass;
2090     break;
2091   }
2092 
2093   Register VReg = RegInfo.createVirtualRegister(RC);
2094   RegInfo.addLiveIn(VA.getLocReg(), VReg);
2095   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2096 
2097   if (VA.getLocInfo() == CCValAssign::Indirect)
2098     return Val;
2099 
2100   return convertLocVTToValVT(DAG, Val, VA, DL);
2101 }
2102 
2103 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
2104                                    const CCValAssign &VA, const SDLoc &DL) {
2105   EVT LocVT = VA.getLocVT();
2106 
2107   switch (VA.getLocInfo()) {
2108   default:
2109     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2110   case CCValAssign::Full:
2111     break;
2112   case CCValAssign::BCvt:
2113     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
2114       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
2115       break;
2116     }
2117     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
2118     break;
2119   }
2120   return Val;
2121 }
2122 
2123 // The caller is responsible for loading the full value if the argument is
2124 // passed with CCValAssign::Indirect.
2125 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
2126                                 const CCValAssign &VA, const SDLoc &DL) {
2127   MachineFunction &MF = DAG.getMachineFunction();
2128   MachineFrameInfo &MFI = MF.getFrameInfo();
2129   EVT LocVT = VA.getLocVT();
2130   EVT ValVT = VA.getValVT();
2131   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
2132   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2133                                  VA.getLocMemOffset(), /*Immutable=*/true);
2134   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2135   SDValue Val;
2136 
2137   ISD::LoadExtType ExtType;
2138   switch (VA.getLocInfo()) {
2139   default:
2140     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2141   case CCValAssign::Full:
2142   case CCValAssign::Indirect:
2143   case CCValAssign::BCvt:
2144     ExtType = ISD::NON_EXTLOAD;
2145     break;
2146   }
2147   Val = DAG.getExtLoad(
2148       ExtType, DL, LocVT, Chain, FIN,
2149       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
2150   return Val;
2151 }
2152 
2153 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
2154                                        const CCValAssign &VA, const SDLoc &DL) {
2155   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
2156          "Unexpected VA");
2157   MachineFunction &MF = DAG.getMachineFunction();
2158   MachineFrameInfo &MFI = MF.getFrameInfo();
2159   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2160 
2161   if (VA.isMemLoc()) {
2162     // f64 is passed on the stack.
2163     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
2164     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
2165     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
2166                        MachinePointerInfo::getFixedStack(MF, FI));
2167   }
2168 
2169   assert(VA.isRegLoc() && "Expected register VA assignment");
2170 
2171   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2172   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
2173   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
2174   SDValue Hi;
2175   if (VA.getLocReg() == RISCV::X17) {
2176     // Second half of f64 is passed on the stack.
2177     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
2178     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
2179     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
2180                      MachinePointerInfo::getFixedStack(MF, FI));
2181   } else {
2182     // Second half of f64 is passed in another GPR.
2183     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2184     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
2185     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
2186   }
2187   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
2188 }
2189 
2190 // FastCC has less than 1% performance improvement for some particular
2191 // benchmark. But theoretically, it may has benenfit for some cases.
2192 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
2193                             CCValAssign::LocInfo LocInfo,
2194                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
2195 
2196   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2197     // X5 and X6 might be used for save-restore libcall.
2198     static const MCPhysReg GPRList[] = {
2199         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
2200         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
2201         RISCV::X29, RISCV::X30, RISCV::X31};
2202     if (unsigned Reg = State.AllocateReg(GPRList)) {
2203       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2204       return false;
2205     }
2206   }
2207 
2208   if (LocVT == MVT::f32) {
2209     static const MCPhysReg FPR32List[] = {
2210         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
2211         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
2212         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
2213         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
2214     if (unsigned Reg = State.AllocateReg(FPR32List)) {
2215       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2216       return false;
2217     }
2218   }
2219 
2220   if (LocVT == MVT::f64) {
2221     static const MCPhysReg FPR64List[] = {
2222         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
2223         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
2224         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
2225         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
2226     if (unsigned Reg = State.AllocateReg(FPR64List)) {
2227       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2228       return false;
2229     }
2230   }
2231 
2232   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
2233     unsigned Offset4 = State.AllocateStack(4, Align(4));
2234     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
2235     return false;
2236   }
2237 
2238   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
2239     unsigned Offset5 = State.AllocateStack(8, Align(8));
2240     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
2241     return false;
2242   }
2243 
2244   return true; // CC didn't match.
2245 }
2246 
2247 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
2248                          CCValAssign::LocInfo LocInfo,
2249                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
2250 
2251   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2252     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
2253     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
2254     static const MCPhysReg GPRList[] = {
2255         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
2256         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
2257     if (unsigned Reg = State.AllocateReg(GPRList)) {
2258       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2259       return false;
2260     }
2261   }
2262 
2263   if (LocVT == MVT::f32) {
2264     // Pass in STG registers: F1, ..., F6
2265     //                        fs0 ... fs5
2266     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
2267                                           RISCV::F18_F, RISCV::F19_F,
2268                                           RISCV::F20_F, RISCV::F21_F};
2269     if (unsigned Reg = State.AllocateReg(FPR32List)) {
2270       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2271       return false;
2272     }
2273   }
2274 
2275   if (LocVT == MVT::f64) {
2276     // Pass in STG registers: D1, ..., D6
2277     //                        fs6 ... fs11
2278     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
2279                                           RISCV::F24_D, RISCV::F25_D,
2280                                           RISCV::F26_D, RISCV::F27_D};
2281     if (unsigned Reg = State.AllocateReg(FPR64List)) {
2282       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2283       return false;
2284     }
2285   }
2286 
2287   report_fatal_error("No registers left in GHC calling convention");
2288   return true;
2289 }
2290 
2291 // Transform physical registers into virtual registers.
2292 SDValue RISCVTargetLowering::LowerFormalArguments(
2293     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2294     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2295     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2296 
2297   MachineFunction &MF = DAG.getMachineFunction();
2298 
2299   switch (CallConv) {
2300   default:
2301     report_fatal_error("Unsupported calling convention");
2302   case CallingConv::C:
2303   case CallingConv::Fast:
2304     break;
2305   case CallingConv::GHC:
2306     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
2307         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
2308       report_fatal_error(
2309         "GHC calling convention requires the F and D instruction set extensions");
2310   }
2311 
2312   const Function &Func = MF.getFunction();
2313   if (Func.hasFnAttribute("interrupt")) {
2314     if (!Func.arg_empty())
2315       report_fatal_error(
2316         "Functions with the interrupt attribute cannot have arguments!");
2317 
2318     StringRef Kind =
2319       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
2320 
2321     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
2322       report_fatal_error(
2323         "Function interrupt attribute argument not supported!");
2324   }
2325 
2326   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2327   MVT XLenVT = Subtarget.getXLenVT();
2328   unsigned XLenInBytes = Subtarget.getXLen() / 8;
2329   // Used with vargs to acumulate store chains.
2330   std::vector<SDValue> OutChains;
2331 
2332   // Assign locations to all of the incoming arguments.
2333   SmallVector<CCValAssign, 16> ArgLocs;
2334   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2335 
2336   if (CallConv == CallingConv::Fast)
2337     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
2338   else if (CallConv == CallingConv::GHC)
2339     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
2340   else
2341     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
2342 
2343   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2344     CCValAssign &VA = ArgLocs[i];
2345     SDValue ArgValue;
2346     // Passing f64 on RV32D with a soft float ABI must be handled as a special
2347     // case.
2348     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
2349       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
2350     else if (VA.isRegLoc())
2351       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
2352     else
2353       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
2354 
2355     if (VA.getLocInfo() == CCValAssign::Indirect) {
2356       // If the original argument was split and passed by reference (e.g. i128
2357       // on RV32), we need to load all parts of it here (using the same
2358       // address).
2359       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2360                                    MachinePointerInfo()));
2361       unsigned ArgIndex = Ins[i].OrigArgIndex;
2362       assert(Ins[i].PartOffset == 0);
2363       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
2364         CCValAssign &PartVA = ArgLocs[i + 1];
2365         unsigned PartOffset = Ins[i + 1].PartOffset;
2366         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2367                                       DAG.getIntPtrConstant(PartOffset, DL));
2368         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2369                                      MachinePointerInfo()));
2370         ++i;
2371       }
2372       continue;
2373     }
2374     InVals.push_back(ArgValue);
2375   }
2376 
2377   if (IsVarArg) {
2378     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
2379     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
2380     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
2381     MachineFrameInfo &MFI = MF.getFrameInfo();
2382     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2383     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
2384 
2385     // Offset of the first variable argument from stack pointer, and size of
2386     // the vararg save area. For now, the varargs save area is either zero or
2387     // large enough to hold a0-a7.
2388     int VaArgOffset, VarArgsSaveSize;
2389 
2390     // If all registers are allocated, then all varargs must be passed on the
2391     // stack and we don't need to save any argregs.
2392     if (ArgRegs.size() == Idx) {
2393       VaArgOffset = CCInfo.getNextStackOffset();
2394       VarArgsSaveSize = 0;
2395     } else {
2396       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
2397       VaArgOffset = -VarArgsSaveSize;
2398     }
2399 
2400     // Record the frame index of the first variable argument
2401     // which is a value necessary to VASTART.
2402     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
2403     RVFI->setVarArgsFrameIndex(FI);
2404 
2405     // If saving an odd number of registers then create an extra stack slot to
2406     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
2407     // offsets to even-numbered registered remain 2*XLEN-aligned.
2408     if (Idx % 2) {
2409       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
2410       VarArgsSaveSize += XLenInBytes;
2411     }
2412 
2413     // Copy the integer registers that may have been used for passing varargs
2414     // to the vararg save area.
2415     for (unsigned I = Idx; I < ArgRegs.size();
2416          ++I, VaArgOffset += XLenInBytes) {
2417       const Register Reg = RegInfo.createVirtualRegister(RC);
2418       RegInfo.addLiveIn(ArgRegs[I], Reg);
2419       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
2420       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
2421       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2422       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
2423                                    MachinePointerInfo::getFixedStack(MF, FI));
2424       cast<StoreSDNode>(Store.getNode())
2425           ->getMemOperand()
2426           ->setValue((Value *)nullptr);
2427       OutChains.push_back(Store);
2428     }
2429     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
2430   }
2431 
2432   // All stores are grouped in one node to allow the matching between
2433   // the size of Ins and InVals. This only happens for vararg functions.
2434   if (!OutChains.empty()) {
2435     OutChains.push_back(Chain);
2436     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
2437   }
2438 
2439   return Chain;
2440 }
2441 
2442 /// isEligibleForTailCallOptimization - Check whether the call is eligible
2443 /// for tail call optimization.
2444 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
2445 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
2446     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
2447     const SmallVector<CCValAssign, 16> &ArgLocs) const {
2448 
2449   auto &Callee = CLI.Callee;
2450   auto CalleeCC = CLI.CallConv;
2451   auto &Outs = CLI.Outs;
2452   auto &Caller = MF.getFunction();
2453   auto CallerCC = Caller.getCallingConv();
2454 
2455   // Exception-handling functions need a special set of instructions to
2456   // indicate a return to the hardware. Tail-calling another function would
2457   // probably break this.
2458   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
2459   // should be expanded as new function attributes are introduced.
2460   if (Caller.hasFnAttribute("interrupt"))
2461     return false;
2462 
2463   // Do not tail call opt if the stack is used to pass parameters.
2464   if (CCInfo.getNextStackOffset() != 0)
2465     return false;
2466 
2467   // Do not tail call opt if any parameters need to be passed indirectly.
2468   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
2469   // passed indirectly. So the address of the value will be passed in a
2470   // register, or if not available, then the address is put on the stack. In
2471   // order to pass indirectly, space on the stack often needs to be allocated
2472   // in order to store the value. In this case the CCInfo.getNextStackOffset()
2473   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
2474   // are passed CCValAssign::Indirect.
2475   for (auto &VA : ArgLocs)
2476     if (VA.getLocInfo() == CCValAssign::Indirect)
2477       return false;
2478 
2479   // Do not tail call opt if either caller or callee uses struct return
2480   // semantics.
2481   auto IsCallerStructRet = Caller.hasStructRetAttr();
2482   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
2483   if (IsCallerStructRet || IsCalleeStructRet)
2484     return false;
2485 
2486   // Externally-defined functions with weak linkage should not be
2487   // tail-called. The behaviour of branch instructions in this situation (as
2488   // used for tail calls) is implementation-defined, so we cannot rely on the
2489   // linker replacing the tail call with a return.
2490   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2491     const GlobalValue *GV = G->getGlobal();
2492     if (GV->hasExternalWeakLinkage())
2493       return false;
2494   }
2495 
2496   // The callee has to preserve all registers the caller needs to preserve.
2497   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
2498   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2499   if (CalleeCC != CallerCC) {
2500     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2501     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2502       return false;
2503   }
2504 
2505   // Byval parameters hand the function a pointer directly into the stack area
2506   // we want to reuse during a tail call. Working around this *is* possible
2507   // but less efficient and uglier in LowerCall.
2508   for (auto &Arg : Outs)
2509     if (Arg.Flags.isByVal())
2510       return false;
2511 
2512   return true;
2513 }
2514 
2515 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
2516 // and output parameter nodes.
2517 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
2518                                        SmallVectorImpl<SDValue> &InVals) const {
2519   SelectionDAG &DAG = CLI.DAG;
2520   SDLoc &DL = CLI.DL;
2521   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2522   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2523   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2524   SDValue Chain = CLI.Chain;
2525   SDValue Callee = CLI.Callee;
2526   bool &IsTailCall = CLI.IsTailCall;
2527   CallingConv::ID CallConv = CLI.CallConv;
2528   bool IsVarArg = CLI.IsVarArg;
2529   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2530   MVT XLenVT = Subtarget.getXLenVT();
2531 
2532   MachineFunction &MF = DAG.getMachineFunction();
2533 
2534   // Analyze the operands of the call, assigning locations to each operand.
2535   SmallVector<CCValAssign, 16> ArgLocs;
2536   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2537 
2538   if (CallConv == CallingConv::Fast)
2539     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
2540   else if (CallConv == CallingConv::GHC)
2541     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
2542   else
2543     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
2544 
2545   // Check if it's really possible to do a tail call.
2546   if (IsTailCall)
2547     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
2548 
2549   if (IsTailCall)
2550     ++NumTailCalls;
2551   else if (CLI.CB && CLI.CB->isMustTailCall())
2552     report_fatal_error("failed to perform tail call elimination on a call "
2553                        "site marked musttail");
2554 
2555   // Get a count of how many bytes are to be pushed on the stack.
2556   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
2557 
2558   // Create local copies for byval args
2559   SmallVector<SDValue, 8> ByValArgs;
2560   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2561     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2562     if (!Flags.isByVal())
2563       continue;
2564 
2565     SDValue Arg = OutVals[i];
2566     unsigned Size = Flags.getByValSize();
2567     Align Alignment = Flags.getNonZeroByValAlign();
2568 
2569     int FI =
2570         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
2571     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2572     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
2573 
2574     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
2575                           /*IsVolatile=*/false,
2576                           /*AlwaysInline=*/false, IsTailCall,
2577                           MachinePointerInfo(), MachinePointerInfo());
2578     ByValArgs.push_back(FIPtr);
2579   }
2580 
2581   if (!IsTailCall)
2582     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
2583 
2584   // Copy argument values to their designated locations.
2585   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
2586   SmallVector<SDValue, 8> MemOpChains;
2587   SDValue StackPtr;
2588   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
2589     CCValAssign &VA = ArgLocs[i];
2590     SDValue ArgValue = OutVals[i];
2591     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2592 
2593     // Handle passing f64 on RV32D with a soft float ABI as a special case.
2594     bool IsF64OnRV32DSoftABI =
2595         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
2596     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
2597       SDValue SplitF64 = DAG.getNode(
2598           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
2599       SDValue Lo = SplitF64.getValue(0);
2600       SDValue Hi = SplitF64.getValue(1);
2601 
2602       Register RegLo = VA.getLocReg();
2603       RegsToPass.push_back(std::make_pair(RegLo, Lo));
2604 
2605       if (RegLo == RISCV::X17) {
2606         // Second half of f64 is passed on the stack.
2607         // Work out the address of the stack slot.
2608         if (!StackPtr.getNode())
2609           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
2610         // Emit the store.
2611         MemOpChains.push_back(
2612             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
2613       } else {
2614         // Second half of f64 is passed in another GPR.
2615         assert(RegLo < RISCV::X31 && "Invalid register pair");
2616         Register RegHigh = RegLo + 1;
2617         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
2618       }
2619       continue;
2620     }
2621 
2622     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
2623     // as any other MemLoc.
2624 
2625     // Promote the value if needed.
2626     // For now, only handle fully promoted and indirect arguments.
2627     if (VA.getLocInfo() == CCValAssign::Indirect) {
2628       // Store the argument in a stack slot and pass its address.
2629       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
2630       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2631       MemOpChains.push_back(
2632           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2633                        MachinePointerInfo::getFixedStack(MF, FI)));
2634       // If the original argument was split (e.g. i128), we need
2635       // to store all parts of it here (and pass just one address).
2636       unsigned ArgIndex = Outs[i].OrigArgIndex;
2637       assert(Outs[i].PartOffset == 0);
2638       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
2639         SDValue PartValue = OutVals[i + 1];
2640         unsigned PartOffset = Outs[i + 1].PartOffset;
2641         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2642                                       DAG.getIntPtrConstant(PartOffset, DL));
2643         MemOpChains.push_back(
2644             DAG.getStore(Chain, DL, PartValue, Address,
2645                          MachinePointerInfo::getFixedStack(MF, FI)));
2646         ++i;
2647       }
2648       ArgValue = SpillSlot;
2649     } else {
2650       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
2651     }
2652 
2653     // Use local copy if it is a byval arg.
2654     if (Flags.isByVal())
2655       ArgValue = ByValArgs[j++];
2656 
2657     if (VA.isRegLoc()) {
2658       // Queue up the argument copies and emit them at the end.
2659       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2660     } else {
2661       assert(VA.isMemLoc() && "Argument not register or memory");
2662       assert(!IsTailCall && "Tail call not allowed if stack is used "
2663                             "for passing parameters");
2664 
2665       // Work out the address of the stack slot.
2666       if (!StackPtr.getNode())
2667         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
2668       SDValue Address =
2669           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2670                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
2671 
2672       // Emit the store.
2673       MemOpChains.push_back(
2674           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2675     }
2676   }
2677 
2678   // Join the stores, which are independent of one another.
2679   if (!MemOpChains.empty())
2680     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2681 
2682   SDValue Glue;
2683 
2684   // Build a sequence of copy-to-reg nodes, chained and glued together.
2685   for (auto &Reg : RegsToPass) {
2686     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
2687     Glue = Chain.getValue(1);
2688   }
2689 
2690   // Validate that none of the argument registers have been marked as
2691   // reserved, if so report an error. Do the same for the return address if this
2692   // is not a tailcall.
2693   validateCCReservedRegs(RegsToPass, MF);
2694   if (!IsTailCall &&
2695       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
2696     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
2697         MF.getFunction(),
2698         "Return address register required, but has been reserved."});
2699 
2700   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
2701   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
2702   // split it and then direct call can be matched by PseudoCALL.
2703   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
2704     const GlobalValue *GV = S->getGlobal();
2705 
2706     unsigned OpFlags = RISCVII::MO_CALL;
2707     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
2708       OpFlags = RISCVII::MO_PLT;
2709 
2710     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
2711   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2712     unsigned OpFlags = RISCVII::MO_CALL;
2713 
2714     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
2715                                                  nullptr))
2716       OpFlags = RISCVII::MO_PLT;
2717 
2718     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
2719   }
2720 
2721   // The first call operand is the chain and the second is the target address.
2722   SmallVector<SDValue, 8> Ops;
2723   Ops.push_back(Chain);
2724   Ops.push_back(Callee);
2725 
2726   // Add argument registers to the end of the list so that they are
2727   // known live into the call.
2728   for (auto &Reg : RegsToPass)
2729     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2730 
2731   if (!IsTailCall) {
2732     // Add a register mask operand representing the call-preserved registers.
2733     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2734     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2735     assert(Mask && "Missing call preserved mask for calling convention");
2736     Ops.push_back(DAG.getRegisterMask(Mask));
2737   }
2738 
2739   // Glue the call to the argument copies, if any.
2740   if (Glue.getNode())
2741     Ops.push_back(Glue);
2742 
2743   // Emit the call.
2744   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2745 
2746   if (IsTailCall) {
2747     MF.getFrameInfo().setHasTailCall();
2748     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
2749   }
2750 
2751   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
2752   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2753   Glue = Chain.getValue(1);
2754 
2755   // Mark the end of the call, which is glued to the call itself.
2756   Chain = DAG.getCALLSEQ_END(Chain,
2757                              DAG.getConstant(NumBytes, DL, PtrVT, true),
2758                              DAG.getConstant(0, DL, PtrVT, true),
2759                              Glue, DL);
2760   Glue = Chain.getValue(1);
2761 
2762   // Assign locations to each value returned by this call.
2763   SmallVector<CCValAssign, 16> RVLocs;
2764   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
2765   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
2766 
2767   // Copy all of the result registers out of their specified physreg.
2768   for (auto &VA : RVLocs) {
2769     // Copy the value out
2770     SDValue RetValue =
2771         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
2772     // Glue the RetValue to the end of the call sequence
2773     Chain = RetValue.getValue(1);
2774     Glue = RetValue.getValue(2);
2775 
2776     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
2777       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
2778       SDValue RetValue2 =
2779           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
2780       Chain = RetValue2.getValue(1);
2781       Glue = RetValue2.getValue(2);
2782       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
2783                              RetValue2);
2784     }
2785 
2786     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
2787 
2788     InVals.push_back(RetValue);
2789   }
2790 
2791   return Chain;
2792 }
2793 
2794 bool RISCVTargetLowering::CanLowerReturn(
2795     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2796     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2797   SmallVector<CCValAssign, 16> RVLocs;
2798   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
2799   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2800     MVT VT = Outs[i].VT;
2801     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2802     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2803     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
2804                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
2805       return false;
2806   }
2807   return true;
2808 }
2809 
2810 SDValue
2811 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2812                                  bool IsVarArg,
2813                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
2814                                  const SmallVectorImpl<SDValue> &OutVals,
2815                                  const SDLoc &DL, SelectionDAG &DAG) const {
2816   const MachineFunction &MF = DAG.getMachineFunction();
2817   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
2818 
2819   // Stores the assignment of the return value to a location.
2820   SmallVector<CCValAssign, 16> RVLocs;
2821 
2822   // Info about the registers and stack slot.
2823   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
2824                  *DAG.getContext());
2825 
2826   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
2827                     nullptr);
2828 
2829   if (CallConv == CallingConv::GHC && !RVLocs.empty())
2830     report_fatal_error("GHC functions return void only");
2831 
2832   SDValue Glue;
2833   SmallVector<SDValue, 4> RetOps(1, Chain);
2834 
2835   // Copy the result values into the output registers.
2836   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
2837     SDValue Val = OutVals[i];
2838     CCValAssign &VA = RVLocs[i];
2839     assert(VA.isRegLoc() && "Can only return in registers!");
2840 
2841     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
2842       // Handle returning f64 on RV32D with a soft float ABI.
2843       assert(VA.isRegLoc() && "Expected return via registers");
2844       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
2845                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
2846       SDValue Lo = SplitF64.getValue(0);
2847       SDValue Hi = SplitF64.getValue(1);
2848       Register RegLo = VA.getLocReg();
2849       assert(RegLo < RISCV::X31 && "Invalid register pair");
2850       Register RegHi = RegLo + 1;
2851 
2852       if (STI.isRegisterReservedByUser(RegLo) ||
2853           STI.isRegisterReservedByUser(RegHi))
2854         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
2855             MF.getFunction(),
2856             "Return value register required, but has been reserved."});
2857 
2858       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
2859       Glue = Chain.getValue(1);
2860       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
2861       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
2862       Glue = Chain.getValue(1);
2863       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
2864     } else {
2865       // Handle a 'normal' return.
2866       Val = convertValVTToLocVT(DAG, Val, VA, DL);
2867       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
2868 
2869       if (STI.isRegisterReservedByUser(VA.getLocReg()))
2870         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
2871             MF.getFunction(),
2872             "Return value register required, but has been reserved."});
2873 
2874       // Guarantee that all emitted copies are stuck together.
2875       Glue = Chain.getValue(1);
2876       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2877     }
2878   }
2879 
2880   RetOps[0] = Chain; // Update chain.
2881 
2882   // Add the glue node if we have it.
2883   if (Glue.getNode()) {
2884     RetOps.push_back(Glue);
2885   }
2886 
2887   // Interrupt service routines use different return instructions.
2888   const Function &Func = DAG.getMachineFunction().getFunction();
2889   if (Func.hasFnAttribute("interrupt")) {
2890     if (!Func.getReturnType()->isVoidTy())
2891       report_fatal_error(
2892           "Functions with the interrupt attribute must have void return type!");
2893 
2894     MachineFunction &MF = DAG.getMachineFunction();
2895     StringRef Kind =
2896       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
2897 
2898     unsigned RetOpc;
2899     if (Kind == "user")
2900       RetOpc = RISCVISD::URET_FLAG;
2901     else if (Kind == "supervisor")
2902       RetOpc = RISCVISD::SRET_FLAG;
2903     else
2904       RetOpc = RISCVISD::MRET_FLAG;
2905 
2906     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
2907   }
2908 
2909   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
2910 }
2911 
2912 void RISCVTargetLowering::validateCCReservedRegs(
2913     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
2914     MachineFunction &MF) const {
2915   const Function &F = MF.getFunction();
2916   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
2917 
2918   if (std::any_of(std::begin(Regs), std::end(Regs), [&STI](auto Reg) {
2919         return STI.isRegisterReservedByUser(Reg.first);
2920       }))
2921     F.getContext().diagnose(DiagnosticInfoUnsupported{
2922         F, "Argument register required, but has been reserved."});
2923 }
2924 
2925 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2926   return CI->isTailCall();
2927 }
2928 
2929 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
2930 #define NODE_NAME_CASE(NODE)                                                   \
2931   case RISCVISD::NODE:                                                         \
2932     return "RISCVISD::" #NODE;
2933   // clang-format off
2934   switch ((RISCVISD::NodeType)Opcode) {
2935   case RISCVISD::FIRST_NUMBER:
2936     break;
2937   NODE_NAME_CASE(RET_FLAG)
2938   NODE_NAME_CASE(URET_FLAG)
2939   NODE_NAME_CASE(SRET_FLAG)
2940   NODE_NAME_CASE(MRET_FLAG)
2941   NODE_NAME_CASE(CALL)
2942   NODE_NAME_CASE(SELECT_CC)
2943   NODE_NAME_CASE(BuildPairF64)
2944   NODE_NAME_CASE(SplitF64)
2945   NODE_NAME_CASE(TAIL)
2946   NODE_NAME_CASE(SLLW)
2947   NODE_NAME_CASE(SRAW)
2948   NODE_NAME_CASE(SRLW)
2949   NODE_NAME_CASE(DIVW)
2950   NODE_NAME_CASE(DIVUW)
2951   NODE_NAME_CASE(REMUW)
2952   NODE_NAME_CASE(ROLW)
2953   NODE_NAME_CASE(RORW)
2954   NODE_NAME_CASE(FMV_W_X_RV64)
2955   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
2956   NODE_NAME_CASE(READ_CYCLE_WIDE)
2957   NODE_NAME_CASE(GREVI)
2958   NODE_NAME_CASE(GREVIW)
2959   NODE_NAME_CASE(GORCI)
2960   NODE_NAME_CASE(GORCIW)
2961   }
2962   // clang-format on
2963   return nullptr;
2964 #undef NODE_NAME_CASE
2965 }
2966 
2967 /// getConstraintType - Given a constraint letter, return the type of
2968 /// constraint it is for this target.
2969 RISCVTargetLowering::ConstraintType
2970 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
2971   if (Constraint.size() == 1) {
2972     switch (Constraint[0]) {
2973     default:
2974       break;
2975     case 'f':
2976       return C_RegisterClass;
2977     case 'I':
2978     case 'J':
2979     case 'K':
2980       return C_Immediate;
2981     case 'A':
2982       return C_Memory;
2983     }
2984   }
2985   return TargetLowering::getConstraintType(Constraint);
2986 }
2987 
2988 std::pair<unsigned, const TargetRegisterClass *>
2989 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2990                                                   StringRef Constraint,
2991                                                   MVT VT) const {
2992   // First, see if this is a constraint that directly corresponds to a
2993   // RISCV register class.
2994   if (Constraint.size() == 1) {
2995     switch (Constraint[0]) {
2996     case 'r':
2997       return std::make_pair(0U, &RISCV::GPRRegClass);
2998     case 'f':
2999       if (Subtarget.hasStdExtF() && VT == MVT::f32)
3000         return std::make_pair(0U, &RISCV::FPR32RegClass);
3001       if (Subtarget.hasStdExtD() && VT == MVT::f64)
3002         return std::make_pair(0U, &RISCV::FPR64RegClass);
3003       break;
3004     default:
3005       break;
3006     }
3007   }
3008 
3009   // Clang will correctly decode the usage of register name aliases into their
3010   // official names. However, other frontends like `rustc` do not. This allows
3011   // users of these frontends to use the ABI names for registers in LLVM-style
3012   // register constraints.
3013   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
3014                                .Case("{zero}", RISCV::X0)
3015                                .Case("{ra}", RISCV::X1)
3016                                .Case("{sp}", RISCV::X2)
3017                                .Case("{gp}", RISCV::X3)
3018                                .Case("{tp}", RISCV::X4)
3019                                .Case("{t0}", RISCV::X5)
3020                                .Case("{t1}", RISCV::X6)
3021                                .Case("{t2}", RISCV::X7)
3022                                .Cases("{s0}", "{fp}", RISCV::X8)
3023                                .Case("{s1}", RISCV::X9)
3024                                .Case("{a0}", RISCV::X10)
3025                                .Case("{a1}", RISCV::X11)
3026                                .Case("{a2}", RISCV::X12)
3027                                .Case("{a3}", RISCV::X13)
3028                                .Case("{a4}", RISCV::X14)
3029                                .Case("{a5}", RISCV::X15)
3030                                .Case("{a6}", RISCV::X16)
3031                                .Case("{a7}", RISCV::X17)
3032                                .Case("{s2}", RISCV::X18)
3033                                .Case("{s3}", RISCV::X19)
3034                                .Case("{s4}", RISCV::X20)
3035                                .Case("{s5}", RISCV::X21)
3036                                .Case("{s6}", RISCV::X22)
3037                                .Case("{s7}", RISCV::X23)
3038                                .Case("{s8}", RISCV::X24)
3039                                .Case("{s9}", RISCV::X25)
3040                                .Case("{s10}", RISCV::X26)
3041                                .Case("{s11}", RISCV::X27)
3042                                .Case("{t3}", RISCV::X28)
3043                                .Case("{t4}", RISCV::X29)
3044                                .Case("{t5}", RISCV::X30)
3045                                .Case("{t6}", RISCV::X31)
3046                                .Default(RISCV::NoRegister);
3047   if (XRegFromAlias != RISCV::NoRegister)
3048     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
3049 
3050   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
3051   // TableGen record rather than the AsmName to choose registers for InlineAsm
3052   // constraints, plus we want to match those names to the widest floating point
3053   // register type available, manually select floating point registers here.
3054   //
3055   // The second case is the ABI name of the register, so that frontends can also
3056   // use the ABI names in register constraint lists.
3057   if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) {
3058     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
3059                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
3060                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
3061                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
3062                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
3063                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
3064                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
3065                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
3066                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
3067                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
3068                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
3069                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
3070                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
3071                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
3072                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
3073                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
3074                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
3075                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
3076                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
3077                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
3078                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
3079                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
3080                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
3081                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
3082                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
3083                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
3084                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
3085                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
3086                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
3087                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
3088                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
3089                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
3090                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
3091                         .Default(RISCV::NoRegister);
3092     if (FReg != RISCV::NoRegister) {
3093       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
3094       if (Subtarget.hasStdExtD()) {
3095         unsigned RegNo = FReg - RISCV::F0_F;
3096         unsigned DReg = RISCV::F0_D + RegNo;
3097         return std::make_pair(DReg, &RISCV::FPR64RegClass);
3098       }
3099       return std::make_pair(FReg, &RISCV::FPR32RegClass);
3100     }
3101   }
3102 
3103   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3104 }
3105 
3106 unsigned
3107 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
3108   // Currently only support length 1 constraints.
3109   if (ConstraintCode.size() == 1) {
3110     switch (ConstraintCode[0]) {
3111     case 'A':
3112       return InlineAsm::Constraint_A;
3113     default:
3114       break;
3115     }
3116   }
3117 
3118   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
3119 }
3120 
3121 void RISCVTargetLowering::LowerAsmOperandForConstraint(
3122     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
3123     SelectionDAG &DAG) const {
3124   // Currently only support length 1 constraints.
3125   if (Constraint.length() == 1) {
3126     switch (Constraint[0]) {
3127     case 'I':
3128       // Validate & create a 12-bit signed immediate operand.
3129       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3130         uint64_t CVal = C->getSExtValue();
3131         if (isInt<12>(CVal))
3132           Ops.push_back(
3133               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
3134       }
3135       return;
3136     case 'J':
3137       // Validate & create an integer zero operand.
3138       if (auto *C = dyn_cast<ConstantSDNode>(Op))
3139         if (C->getZExtValue() == 0)
3140           Ops.push_back(
3141               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
3142       return;
3143     case 'K':
3144       // Validate & create a 5-bit unsigned immediate operand.
3145       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3146         uint64_t CVal = C->getZExtValue();
3147         if (isUInt<5>(CVal))
3148           Ops.push_back(
3149               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
3150       }
3151       return;
3152     default:
3153       break;
3154     }
3155   }
3156   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3157 }
3158 
3159 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
3160                                                    Instruction *Inst,
3161                                                    AtomicOrdering Ord) const {
3162   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
3163     return Builder.CreateFence(Ord);
3164   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
3165     return Builder.CreateFence(AtomicOrdering::Release);
3166   return nullptr;
3167 }
3168 
3169 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
3170                                                     Instruction *Inst,
3171                                                     AtomicOrdering Ord) const {
3172   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
3173     return Builder.CreateFence(AtomicOrdering::Acquire);
3174   return nullptr;
3175 }
3176 
3177 TargetLowering::AtomicExpansionKind
3178 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
3179   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
3180   // point operations can't be used in an lr/sc sequence without breaking the
3181   // forward-progress guarantee.
3182   if (AI->isFloatingPointOperation())
3183     return AtomicExpansionKind::CmpXChg;
3184 
3185   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
3186   if (Size == 8 || Size == 16)
3187     return AtomicExpansionKind::MaskedIntrinsic;
3188   return AtomicExpansionKind::None;
3189 }
3190 
3191 static Intrinsic::ID
3192 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
3193   if (XLen == 32) {
3194     switch (BinOp) {
3195     default:
3196       llvm_unreachable("Unexpected AtomicRMW BinOp");
3197     case AtomicRMWInst::Xchg:
3198       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
3199     case AtomicRMWInst::Add:
3200       return Intrinsic::riscv_masked_atomicrmw_add_i32;
3201     case AtomicRMWInst::Sub:
3202       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
3203     case AtomicRMWInst::Nand:
3204       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
3205     case AtomicRMWInst::Max:
3206       return Intrinsic::riscv_masked_atomicrmw_max_i32;
3207     case AtomicRMWInst::Min:
3208       return Intrinsic::riscv_masked_atomicrmw_min_i32;
3209     case AtomicRMWInst::UMax:
3210       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
3211     case AtomicRMWInst::UMin:
3212       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
3213     }
3214   }
3215 
3216   if (XLen == 64) {
3217     switch (BinOp) {
3218     default:
3219       llvm_unreachable("Unexpected AtomicRMW BinOp");
3220     case AtomicRMWInst::Xchg:
3221       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
3222     case AtomicRMWInst::Add:
3223       return Intrinsic::riscv_masked_atomicrmw_add_i64;
3224     case AtomicRMWInst::Sub:
3225       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
3226     case AtomicRMWInst::Nand:
3227       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
3228     case AtomicRMWInst::Max:
3229       return Intrinsic::riscv_masked_atomicrmw_max_i64;
3230     case AtomicRMWInst::Min:
3231       return Intrinsic::riscv_masked_atomicrmw_min_i64;
3232     case AtomicRMWInst::UMax:
3233       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
3234     case AtomicRMWInst::UMin:
3235       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
3236     }
3237   }
3238 
3239   llvm_unreachable("Unexpected XLen\n");
3240 }
3241 
3242 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
3243     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
3244     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
3245   unsigned XLen = Subtarget.getXLen();
3246   Value *Ordering =
3247       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
3248   Type *Tys[] = {AlignedAddr->getType()};
3249   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
3250       AI->getModule(),
3251       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
3252 
3253   if (XLen == 64) {
3254     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
3255     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
3256     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
3257   }
3258 
3259   Value *Result;
3260 
3261   // Must pass the shift amount needed to sign extend the loaded value prior
3262   // to performing a signed comparison for min/max. ShiftAmt is the number of
3263   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
3264   // is the number of bits to left+right shift the value in order to
3265   // sign-extend.
3266   if (AI->getOperation() == AtomicRMWInst::Min ||
3267       AI->getOperation() == AtomicRMWInst::Max) {
3268     const DataLayout &DL = AI->getModule()->getDataLayout();
3269     unsigned ValWidth =
3270         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
3271     Value *SextShamt =
3272         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
3273     Result = Builder.CreateCall(LrwOpScwLoop,
3274                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
3275   } else {
3276     Result =
3277         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
3278   }
3279 
3280   if (XLen == 64)
3281     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
3282   return Result;
3283 }
3284 
3285 TargetLowering::AtomicExpansionKind
3286 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
3287     AtomicCmpXchgInst *CI) const {
3288   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
3289   if (Size == 8 || Size == 16)
3290     return AtomicExpansionKind::MaskedIntrinsic;
3291   return AtomicExpansionKind::None;
3292 }
3293 
3294 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
3295     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
3296     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
3297   unsigned XLen = Subtarget.getXLen();
3298   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
3299   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
3300   if (XLen == 64) {
3301     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
3302     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
3303     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
3304     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
3305   }
3306   Type *Tys[] = {AlignedAddr->getType()};
3307   Function *MaskedCmpXchg =
3308       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
3309   Value *Result = Builder.CreateCall(
3310       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
3311   if (XLen == 64)
3312     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
3313   return Result;
3314 }
3315 
3316 Register RISCVTargetLowering::getExceptionPointerRegister(
3317     const Constant *PersonalityFn) const {
3318   return RISCV::X10;
3319 }
3320 
3321 Register RISCVTargetLowering::getExceptionSelectorRegister(
3322     const Constant *PersonalityFn) const {
3323   return RISCV::X11;
3324 }
3325 
3326 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
3327   // Return false to suppress the unnecessary extensions if the LibCall
3328   // arguments or return value is f32 type for LP64 ABI.
3329   RISCVABI::ABI ABI = Subtarget.getTargetABI();
3330   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
3331     return false;
3332 
3333   return true;
3334 }
3335 
3336 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
3337                                                  SDValue C) const {
3338   // Check integral scalar types.
3339   if (VT.isScalarInteger()) {
3340     // Do not perform the transformation on riscv32 with the M extension.
3341     if (!Subtarget.is64Bit() && Subtarget.hasStdExtM())
3342       return false;
3343     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
3344       if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t))
3345         return false;
3346       int64_t Imm = ConstNode->getSExtValue();
3347       if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) ||
3348           isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm))
3349         return true;
3350     }
3351   }
3352 
3353   return false;
3354 }
3355 
3356 #define GET_REGISTER_MATCHER
3357 #include "RISCVGenAsmMatcher.inc"
3358 
3359 Register
3360 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
3361                                        const MachineFunction &MF) const {
3362   Register Reg = MatchRegisterAltName(RegName);
3363   if (Reg == RISCV::NoRegister)
3364     Reg = MatchRegisterName(RegName);
3365   if (Reg == RISCV::NoRegister)
3366     report_fatal_error(
3367         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
3368   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
3369   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
3370     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
3371                              StringRef(RegName) + "\"."));
3372   return Reg;
3373 }
3374