1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "Utils/RISCVMatInt.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "riscv-lower"
41 
42 STATISTIC(NumTailCalls, "Number of tail calls");
43 
44 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
45                                          const RISCVSubtarget &STI)
46     : TargetLowering(TM), Subtarget(STI) {
47 
48   if (Subtarget.isRV32E())
49     report_fatal_error("Codegen not yet implemented for RV32E");
50 
51   RISCVABI::ABI ABI = Subtarget.getTargetABI();
52   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
53 
54   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
55       !Subtarget.hasStdExtF()) {
56     errs() << "Hard-float 'f' ABI can't be used for a target that "
57                 "doesn't support the F instruction set extension (ignoring "
58                           "target-abi)\n";
59     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
60   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
61              !Subtarget.hasStdExtD()) {
62     errs() << "Hard-float 'd' ABI can't be used for a target that "
63               "doesn't support the D instruction set extension (ignoring "
64               "target-abi)\n";
65     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
66   }
67 
68   switch (ABI) {
69   default:
70     report_fatal_error("Don't know how to lower this ABI");
71   case RISCVABI::ABI_ILP32:
72   case RISCVABI::ABI_ILP32F:
73   case RISCVABI::ABI_ILP32D:
74   case RISCVABI::ABI_LP64:
75   case RISCVABI::ABI_LP64F:
76   case RISCVABI::ABI_LP64D:
77     break;
78   }
79 
80   MVT XLenVT = Subtarget.getXLenVT();
81 
82   // Set up the register classes.
83   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
84 
85   if (Subtarget.hasStdExtZfh())
86     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
87   if (Subtarget.hasStdExtF())
88     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
89   if (Subtarget.hasStdExtD())
90     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
91 
92   if (Subtarget.hasStdExtV()) {
93     addRegisterClass(RISCVVMVTs::vbool64_t, &RISCV::VRRegClass);
94     addRegisterClass(RISCVVMVTs::vbool32_t, &RISCV::VRRegClass);
95     addRegisterClass(RISCVVMVTs::vbool16_t, &RISCV::VRRegClass);
96     addRegisterClass(RISCVVMVTs::vbool8_t, &RISCV::VRRegClass);
97     addRegisterClass(RISCVVMVTs::vbool4_t, &RISCV::VRRegClass);
98     addRegisterClass(RISCVVMVTs::vbool2_t, &RISCV::VRRegClass);
99     addRegisterClass(RISCVVMVTs::vbool1_t, &RISCV::VRRegClass);
100 
101     addRegisterClass(RISCVVMVTs::vint8mf8_t, &RISCV::VRRegClass);
102     addRegisterClass(RISCVVMVTs::vint8mf4_t, &RISCV::VRRegClass);
103     addRegisterClass(RISCVVMVTs::vint8mf2_t, &RISCV::VRRegClass);
104     addRegisterClass(RISCVVMVTs::vint8m1_t, &RISCV::VRRegClass);
105     addRegisterClass(RISCVVMVTs::vint8m2_t, &RISCV::VRM2RegClass);
106     addRegisterClass(RISCVVMVTs::vint8m4_t, &RISCV::VRM4RegClass);
107     addRegisterClass(RISCVVMVTs::vint8m8_t, &RISCV::VRM8RegClass);
108 
109     addRegisterClass(RISCVVMVTs::vint16mf4_t, &RISCV::VRRegClass);
110     addRegisterClass(RISCVVMVTs::vint16mf2_t, &RISCV::VRRegClass);
111     addRegisterClass(RISCVVMVTs::vint16m1_t, &RISCV::VRRegClass);
112     addRegisterClass(RISCVVMVTs::vint16m2_t, &RISCV::VRM2RegClass);
113     addRegisterClass(RISCVVMVTs::vint16m4_t, &RISCV::VRM4RegClass);
114     addRegisterClass(RISCVVMVTs::vint16m8_t, &RISCV::VRM8RegClass);
115 
116     addRegisterClass(RISCVVMVTs::vint32mf2_t, &RISCV::VRRegClass);
117     addRegisterClass(RISCVVMVTs::vint32m1_t, &RISCV::VRRegClass);
118     addRegisterClass(RISCVVMVTs::vint32m2_t, &RISCV::VRM2RegClass);
119     addRegisterClass(RISCVVMVTs::vint32m4_t, &RISCV::VRM4RegClass);
120     addRegisterClass(RISCVVMVTs::vint32m8_t, &RISCV::VRM8RegClass);
121 
122     addRegisterClass(RISCVVMVTs::vint64m1_t, &RISCV::VRRegClass);
123     addRegisterClass(RISCVVMVTs::vint64m2_t, &RISCV::VRM2RegClass);
124     addRegisterClass(RISCVVMVTs::vint64m4_t, &RISCV::VRM4RegClass);
125     addRegisterClass(RISCVVMVTs::vint64m8_t, &RISCV::VRM8RegClass);
126 
127     if (Subtarget.hasStdExtZfh()) {
128       addRegisterClass(RISCVVMVTs::vfloat16mf4_t, &RISCV::VRRegClass);
129       addRegisterClass(RISCVVMVTs::vfloat16mf2_t, &RISCV::VRRegClass);
130       addRegisterClass(RISCVVMVTs::vfloat16m1_t, &RISCV::VRRegClass);
131       addRegisterClass(RISCVVMVTs::vfloat16m2_t, &RISCV::VRM2RegClass);
132       addRegisterClass(RISCVVMVTs::vfloat16m4_t, &RISCV::VRM4RegClass);
133       addRegisterClass(RISCVVMVTs::vfloat16m8_t, &RISCV::VRM8RegClass);
134     }
135 
136     if (Subtarget.hasStdExtF()) {
137       addRegisterClass(RISCVVMVTs::vfloat32mf2_t, &RISCV::VRRegClass);
138       addRegisterClass(RISCVVMVTs::vfloat32m1_t, &RISCV::VRRegClass);
139       addRegisterClass(RISCVVMVTs::vfloat32m2_t, &RISCV::VRM2RegClass);
140       addRegisterClass(RISCVVMVTs::vfloat32m4_t, &RISCV::VRM4RegClass);
141       addRegisterClass(RISCVVMVTs::vfloat32m8_t, &RISCV::VRM8RegClass);
142     }
143 
144     if (Subtarget.hasStdExtD()) {
145       addRegisterClass(RISCVVMVTs::vfloat64m1_t, &RISCV::VRRegClass);
146       addRegisterClass(RISCVVMVTs::vfloat64m2_t, &RISCV::VRM2RegClass);
147       addRegisterClass(RISCVVMVTs::vfloat64m4_t, &RISCV::VRM4RegClass);
148       addRegisterClass(RISCVVMVTs::vfloat64m8_t, &RISCV::VRM8RegClass);
149     }
150   }
151 
152   // Compute derived properties from the register classes.
153   computeRegisterProperties(STI.getRegisterInfo());
154 
155   setStackPointerRegisterToSaveRestore(RISCV::X2);
156 
157   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
158     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
159 
160   // TODO: add all necessary setOperationAction calls.
161   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
162 
163   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
164   setOperationAction(ISD::BR_CC, XLenVT, Expand);
165   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
166 
167   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
168   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
169 
170   setOperationAction(ISD::VASTART, MVT::Other, Custom);
171   setOperationAction(ISD::VAARG, MVT::Other, Expand);
172   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
173   setOperationAction(ISD::VAEND, MVT::Other, Expand);
174 
175   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
176   if (!Subtarget.hasStdExtZbb()) {
177     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
178     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
179   }
180 
181   if (Subtarget.is64Bit()) {
182     setOperationAction(ISD::ADD, MVT::i32, Custom);
183     setOperationAction(ISD::SUB, MVT::i32, Custom);
184     setOperationAction(ISD::SHL, MVT::i32, Custom);
185     setOperationAction(ISD::SRA, MVT::i32, Custom);
186     setOperationAction(ISD::SRL, MVT::i32, Custom);
187   }
188 
189   if (!Subtarget.hasStdExtM()) {
190     setOperationAction(ISD::MUL, XLenVT, Expand);
191     setOperationAction(ISD::MULHS, XLenVT, Expand);
192     setOperationAction(ISD::MULHU, XLenVT, Expand);
193     setOperationAction(ISD::SDIV, XLenVT, Expand);
194     setOperationAction(ISD::UDIV, XLenVT, Expand);
195     setOperationAction(ISD::SREM, XLenVT, Expand);
196     setOperationAction(ISD::UREM, XLenVT, Expand);
197   }
198 
199   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
200     setOperationAction(ISD::MUL, MVT::i32, Custom);
201     setOperationAction(ISD::SDIV, MVT::i32, Custom);
202     setOperationAction(ISD::UDIV, MVT::i32, Custom);
203     setOperationAction(ISD::UREM, MVT::i32, Custom);
204   }
205 
206   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
207   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
208   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
209   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
210 
211   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
212   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
213   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
214 
215   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
216     if (Subtarget.is64Bit()) {
217       setOperationAction(ISD::ROTL, MVT::i32, Custom);
218       setOperationAction(ISD::ROTR, MVT::i32, Custom);
219     }
220   } else {
221     setOperationAction(ISD::ROTL, XLenVT, Expand);
222     setOperationAction(ISD::ROTR, XLenVT, Expand);
223   }
224 
225   if (Subtarget.hasStdExtZbp()) {
226     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
227     setOperationAction(ISD::BSWAP, XLenVT, Custom);
228 
229     if (Subtarget.is64Bit()) {
230       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
231       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
232     }
233   } else {
234     setOperationAction(ISD::BSWAP, XLenVT, Expand);
235   }
236 
237   if (Subtarget.hasStdExtZbb()) {
238     setOperationAction(ISD::SMIN, XLenVT, Legal);
239     setOperationAction(ISD::SMAX, XLenVT, Legal);
240     setOperationAction(ISD::UMIN, XLenVT, Legal);
241     setOperationAction(ISD::UMAX, XLenVT, Legal);
242   } else {
243     setOperationAction(ISD::CTTZ, XLenVT, Expand);
244     setOperationAction(ISD::CTLZ, XLenVT, Expand);
245     setOperationAction(ISD::CTPOP, XLenVT, Expand);
246   }
247 
248   if (Subtarget.hasStdExtZbt()) {
249     setOperationAction(ISD::FSHL, XLenVT, Legal);
250     setOperationAction(ISD::FSHR, XLenVT, Legal);
251     setOperationAction(ISD::SELECT, XLenVT, Legal);
252 
253     if (Subtarget.is64Bit()) {
254       setOperationAction(ISD::FSHL, MVT::i32, Custom);
255       setOperationAction(ISD::FSHR, MVT::i32, Custom);
256     }
257   } else {
258     setOperationAction(ISD::SELECT, XLenVT, Custom);
259   }
260 
261   ISD::CondCode FPCCToExpand[] = {
262       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
263       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
264       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
265 
266   ISD::NodeType FPOpToExpand[] = {
267       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
268       ISD::FP_TO_FP16};
269 
270   if (Subtarget.hasStdExtZfh())
271     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
272 
273   if (Subtarget.hasStdExtZfh()) {
274     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
275     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
276     for (auto CC : FPCCToExpand)
277       setCondCodeAction(CC, MVT::f16, Expand);
278     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
279     setOperationAction(ISD::SELECT, MVT::f16, Custom);
280     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
281     for (auto Op : FPOpToExpand)
282       setOperationAction(Op, MVT::f16, Expand);
283   }
284 
285   if (Subtarget.hasStdExtF()) {
286     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
287     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
288     for (auto CC : FPCCToExpand)
289       setCondCodeAction(CC, MVT::f32, Expand);
290     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
291     setOperationAction(ISD::SELECT, MVT::f32, Custom);
292     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
293     for (auto Op : FPOpToExpand)
294       setOperationAction(Op, MVT::f32, Expand);
295     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
296     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
297   }
298 
299   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
300     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
301 
302   if (Subtarget.hasStdExtD()) {
303     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
304     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
305     for (auto CC : FPCCToExpand)
306       setCondCodeAction(CC, MVT::f64, Expand);
307     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
308     setOperationAction(ISD::SELECT, MVT::f64, Custom);
309     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
310     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
311     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
312     for (auto Op : FPOpToExpand)
313       setOperationAction(Op, MVT::f64, Expand);
314     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
315     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
316   }
317 
318   if (Subtarget.is64Bit()) {
319     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
320     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
321     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
322     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
323   }
324 
325   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
326   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
327   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
328   setOperationAction(ISD::JumpTable, XLenVT, Custom);
329 
330   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
331 
332   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
333   // Unfortunately this can't be determined just from the ISA naming string.
334   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
335                      Subtarget.is64Bit() ? Legal : Custom);
336 
337   setOperationAction(ISD::TRAP, MVT::Other, Legal);
338   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
339   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
340 
341   if (Subtarget.hasStdExtA()) {
342     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
343     setMinCmpXchgSizeInBits(32);
344   } else {
345     setMaxAtomicSizeInBitsSupported(0);
346   }
347 
348   setBooleanContents(ZeroOrOneBooleanContent);
349 
350   if (Subtarget.hasStdExtV()) {
351     setBooleanVectorContents(ZeroOrOneBooleanContent);
352 
353     // RVV intrinsics may have illegal operands.
354     // We also need to custom legalize vmv.x.s.
355     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
356     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
357     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
358     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
359     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
360     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
361 
362     if (Subtarget.is64Bit()) {
363       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
364       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
365     }
366 
367     for (auto VT : MVT::integer_scalable_vector_valuetypes()) {
368       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
369 
370       setOperationAction(ISD::SMIN, VT, Legal);
371       setOperationAction(ISD::SMAX, VT, Legal);
372       setOperationAction(ISD::UMIN, VT, Legal);
373       setOperationAction(ISD::UMAX, VT, Legal);
374     }
375 
376     // We must custom-lower SPLAT_VECTOR vXi64 on RV32
377     if (!Subtarget.is64Bit())
378       setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
379 
380     // Expand various CCs to best match the RVV ISA, which natively supports UNE
381     // but no other unordered comparisons, and supports all ordered comparisons
382     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
383     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
384     // and we pattern-match those back to the "original", swapping operands once
385     // more. This way we catch both operations and both "vf" and "fv" forms with
386     // fewer patterns.
387     ISD::CondCode VFPCCToExpand[] = {
388         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
389         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
390         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
391     };
392 
393     if (Subtarget.hasStdExtZfh()) {
394       for (auto VT : {RISCVVMVTs::vfloat16mf4_t, RISCVVMVTs::vfloat16mf2_t,
395                       RISCVVMVTs::vfloat16m1_t, RISCVVMVTs::vfloat16m2_t,
396                       RISCVVMVTs::vfloat16m4_t, RISCVVMVTs::vfloat16m8_t}) {
397         setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
398         for (auto CC : VFPCCToExpand)
399           setCondCodeAction(CC, VT, Expand);
400       }
401     }
402 
403     if (Subtarget.hasStdExtF()) {
404       for (auto VT : {RISCVVMVTs::vfloat32mf2_t, RISCVVMVTs::vfloat32m1_t,
405                       RISCVVMVTs::vfloat32m2_t, RISCVVMVTs::vfloat32m4_t,
406                       RISCVVMVTs::vfloat32m8_t}) {
407         setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
408         for (auto CC : VFPCCToExpand)
409           setCondCodeAction(CC, VT, Expand);
410       }
411     }
412 
413     if (Subtarget.hasStdExtD()) {
414       for (auto VT : {RISCVVMVTs::vfloat64m1_t, RISCVVMVTs::vfloat64m2_t,
415                       RISCVVMVTs::vfloat64m4_t, RISCVVMVTs::vfloat64m8_t}) {
416         setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
417         for (auto CC : VFPCCToExpand)
418           setCondCodeAction(CC, VT, Expand);
419       }
420     }
421   }
422 
423   // Function alignments.
424   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
425   setMinFunctionAlignment(FunctionAlignment);
426   setPrefFunctionAlignment(FunctionAlignment);
427 
428   setMinimumJumpTableEntries(5);
429 
430   // Jumps are expensive, compared to logic
431   setJumpIsExpensive();
432 
433   // We can use any register for comparisons
434   setHasMultipleConditionRegisters();
435 
436   if (Subtarget.hasStdExtZbp()) {
437     setTargetDAGCombine(ISD::OR);
438   }
439 }
440 
441 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
442                                             EVT VT) const {
443   if (!VT.isVector())
444     return getPointerTy(DL);
445   if (Subtarget.hasStdExtV())
446     return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
447   return VT.changeVectorElementTypeToInteger();
448 }
449 
450 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
451                                              const CallInst &I,
452                                              MachineFunction &MF,
453                                              unsigned Intrinsic) const {
454   switch (Intrinsic) {
455   default:
456     return false;
457   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
458   case Intrinsic::riscv_masked_atomicrmw_add_i32:
459   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
460   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
461   case Intrinsic::riscv_masked_atomicrmw_max_i32:
462   case Intrinsic::riscv_masked_atomicrmw_min_i32:
463   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
464   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
465   case Intrinsic::riscv_masked_cmpxchg_i32:
466     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
467     Info.opc = ISD::INTRINSIC_W_CHAIN;
468     Info.memVT = MVT::getVT(PtrTy->getElementType());
469     Info.ptrVal = I.getArgOperand(0);
470     Info.offset = 0;
471     Info.align = Align(4);
472     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
473                  MachineMemOperand::MOVolatile;
474     return true;
475   }
476 }
477 
478 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
479                                                 const AddrMode &AM, Type *Ty,
480                                                 unsigned AS,
481                                                 Instruction *I) const {
482   // No global is ever allowed as a base.
483   if (AM.BaseGV)
484     return false;
485 
486   // Require a 12-bit signed offset.
487   if (!isInt<12>(AM.BaseOffs))
488     return false;
489 
490   switch (AM.Scale) {
491   case 0: // "r+i" or just "i", depending on HasBaseReg.
492     break;
493   case 1:
494     if (!AM.HasBaseReg) // allow "r+i".
495       break;
496     return false; // disallow "r+r" or "r+r+i".
497   default:
498     return false;
499   }
500 
501   return true;
502 }
503 
504 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
505   return isInt<12>(Imm);
506 }
507 
508 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
509   return isInt<12>(Imm);
510 }
511 
512 // On RV32, 64-bit integers are split into their high and low parts and held
513 // in two different registers, so the trunc is free since the low register can
514 // just be used.
515 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
516   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
517     return false;
518   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
519   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
520   return (SrcBits == 64 && DestBits == 32);
521 }
522 
523 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
524   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
525       !SrcVT.isInteger() || !DstVT.isInteger())
526     return false;
527   unsigned SrcBits = SrcVT.getSizeInBits();
528   unsigned DestBits = DstVT.getSizeInBits();
529   return (SrcBits == 64 && DestBits == 32);
530 }
531 
532 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
533   // Zexts are free if they can be combined with a load.
534   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
535     EVT MemVT = LD->getMemoryVT();
536     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
537          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
538         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
539          LD->getExtensionType() == ISD::ZEXTLOAD))
540       return true;
541   }
542 
543   return TargetLowering::isZExtFree(Val, VT2);
544 }
545 
546 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
547   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
548 }
549 
550 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
551   return Subtarget.hasStdExtZbb();
552 }
553 
554 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
555   return Subtarget.hasStdExtZbb();
556 }
557 
558 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
559                                        bool ForCodeSize) const {
560   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
561     return false;
562   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
563     return false;
564   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
565     return false;
566   if (Imm.isNegZero())
567     return false;
568   return Imm.isZero();
569 }
570 
571 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
572   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
573          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
574          (VT == MVT::f64 && Subtarget.hasStdExtD());
575 }
576 
577 // Changes the condition code and swaps operands if necessary, so the SetCC
578 // operation matches one of the comparisons supported directly in the RISC-V
579 // ISA.
580 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
581   switch (CC) {
582   default:
583     break;
584   case ISD::SETGT:
585   case ISD::SETLE:
586   case ISD::SETUGT:
587   case ISD::SETULE:
588     CC = ISD::getSetCCSwappedOperands(CC);
589     std::swap(LHS, RHS);
590     break;
591   }
592 }
593 
594 // Return the RISC-V branch opcode that matches the given DAG integer
595 // condition code. The CondCode must be one of those supported by the RISC-V
596 // ISA (see normaliseSetCC).
597 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
598   switch (CC) {
599   default:
600     llvm_unreachable("Unsupported CondCode");
601   case ISD::SETEQ:
602     return RISCV::BEQ;
603   case ISD::SETNE:
604     return RISCV::BNE;
605   case ISD::SETLT:
606     return RISCV::BLT;
607   case ISD::SETGE:
608     return RISCV::BGE;
609   case ISD::SETULT:
610     return RISCV::BLTU;
611   case ISD::SETUGE:
612     return RISCV::BGEU;
613   }
614 }
615 
616 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
617                                             SelectionDAG &DAG) const {
618   switch (Op.getOpcode()) {
619   default:
620     report_fatal_error("unimplemented operand");
621   case ISD::GlobalAddress:
622     return lowerGlobalAddress(Op, DAG);
623   case ISD::BlockAddress:
624     return lowerBlockAddress(Op, DAG);
625   case ISD::ConstantPool:
626     return lowerConstantPool(Op, DAG);
627   case ISD::JumpTable:
628     return lowerJumpTable(Op, DAG);
629   case ISD::GlobalTLSAddress:
630     return lowerGlobalTLSAddress(Op, DAG);
631   case ISD::SELECT:
632     return lowerSELECT(Op, DAG);
633   case ISD::VASTART:
634     return lowerVASTART(Op, DAG);
635   case ISD::FRAMEADDR:
636     return lowerFRAMEADDR(Op, DAG);
637   case ISD::RETURNADDR:
638     return lowerRETURNADDR(Op, DAG);
639   case ISD::SHL_PARTS:
640     return lowerShiftLeftParts(Op, DAG);
641   case ISD::SRA_PARTS:
642     return lowerShiftRightParts(Op, DAG, true);
643   case ISD::SRL_PARTS:
644     return lowerShiftRightParts(Op, DAG, false);
645   case ISD::BITCAST: {
646     assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
647             Subtarget.hasStdExtZfh()) &&
648            "Unexpected custom legalisation");
649     SDLoc DL(Op);
650     SDValue Op0 = Op.getOperand(0);
651     if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
652       if (Op0.getValueType() != MVT::i16)
653         return SDValue();
654       SDValue NewOp0 =
655           DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
656       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
657       return FPConv;
658     } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
659                Subtarget.hasStdExtF()) {
660       if (Op0.getValueType() != MVT::i32)
661         return SDValue();
662       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
663       SDValue FPConv =
664           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
665       return FPConv;
666     }
667     return SDValue();
668   }
669   case ISD::INTRINSIC_WO_CHAIN:
670     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
671   case ISD::INTRINSIC_W_CHAIN:
672     return LowerINTRINSIC_W_CHAIN(Op, DAG);
673   case ISD::BSWAP:
674   case ISD::BITREVERSE: {
675     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
676     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
677     MVT VT = Op.getSimpleValueType();
678     SDLoc DL(Op);
679     // Start with the maximum immediate value which is the bitwidth - 1.
680     unsigned Imm = VT.getSizeInBits() - 1;
681     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
682     if (Op.getOpcode() == ISD::BSWAP)
683       Imm &= ~0x7U;
684     return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
685                        DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
686   }
687   case ISD::SPLAT_VECTOR:
688     return lowerSPLATVECTOR(Op, DAG);
689   }
690 }
691 
692 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
693                              SelectionDAG &DAG, unsigned Flags) {
694   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
695 }
696 
697 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
698                              SelectionDAG &DAG, unsigned Flags) {
699   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
700                                    Flags);
701 }
702 
703 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
704                              SelectionDAG &DAG, unsigned Flags) {
705   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
706                                    N->getOffset(), Flags);
707 }
708 
709 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
710                              SelectionDAG &DAG, unsigned Flags) {
711   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
712 }
713 
714 template <class NodeTy>
715 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
716                                      bool IsLocal) const {
717   SDLoc DL(N);
718   EVT Ty = getPointerTy(DAG.getDataLayout());
719 
720   if (isPositionIndependent()) {
721     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
722     if (IsLocal)
723       // Use PC-relative addressing to access the symbol. This generates the
724       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
725       // %pcrel_lo(auipc)).
726       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
727 
728     // Use PC-relative addressing to access the GOT for this symbol, then load
729     // the address from the GOT. This generates the pattern (PseudoLA sym),
730     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
731     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
732   }
733 
734   switch (getTargetMachine().getCodeModel()) {
735   default:
736     report_fatal_error("Unsupported code model for lowering");
737   case CodeModel::Small: {
738     // Generate a sequence for accessing addresses within the first 2 GiB of
739     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
740     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
741     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
742     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
743     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
744   }
745   case CodeModel::Medium: {
746     // Generate a sequence for accessing addresses within any 2GiB range within
747     // the address space. This generates the pattern (PseudoLLA sym), which
748     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
749     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
750     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
751   }
752   }
753 }
754 
755 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
756                                                 SelectionDAG &DAG) const {
757   SDLoc DL(Op);
758   EVT Ty = Op.getValueType();
759   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
760   int64_t Offset = N->getOffset();
761   MVT XLenVT = Subtarget.getXLenVT();
762 
763   const GlobalValue *GV = N->getGlobal();
764   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
765   SDValue Addr = getAddr(N, DAG, IsLocal);
766 
767   // In order to maximise the opportunity for common subexpression elimination,
768   // emit a separate ADD node for the global address offset instead of folding
769   // it in the global address node. Later peephole optimisations may choose to
770   // fold it back in when profitable.
771   if (Offset != 0)
772     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
773                        DAG.getConstant(Offset, DL, XLenVT));
774   return Addr;
775 }
776 
777 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
778                                                SelectionDAG &DAG) const {
779   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
780 
781   return getAddr(N, DAG);
782 }
783 
784 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
785                                                SelectionDAG &DAG) const {
786   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
787 
788   return getAddr(N, DAG);
789 }
790 
791 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
792                                             SelectionDAG &DAG) const {
793   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
794 
795   return getAddr(N, DAG);
796 }
797 
798 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
799                                               SelectionDAG &DAG,
800                                               bool UseGOT) const {
801   SDLoc DL(N);
802   EVT Ty = getPointerTy(DAG.getDataLayout());
803   const GlobalValue *GV = N->getGlobal();
804   MVT XLenVT = Subtarget.getXLenVT();
805 
806   if (UseGOT) {
807     // Use PC-relative addressing to access the GOT for this TLS symbol, then
808     // load the address from the GOT and add the thread pointer. This generates
809     // the pattern (PseudoLA_TLS_IE sym), which expands to
810     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
811     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
812     SDValue Load =
813         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
814 
815     // Add the thread pointer.
816     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
817     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
818   }
819 
820   // Generate a sequence for accessing the address relative to the thread
821   // pointer, with the appropriate adjustment for the thread pointer offset.
822   // This generates the pattern
823   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
824   SDValue AddrHi =
825       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
826   SDValue AddrAdd =
827       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
828   SDValue AddrLo =
829       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
830 
831   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
832   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
833   SDValue MNAdd = SDValue(
834       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
835       0);
836   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
837 }
838 
839 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
840                                                SelectionDAG &DAG) const {
841   SDLoc DL(N);
842   EVT Ty = getPointerTy(DAG.getDataLayout());
843   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
844   const GlobalValue *GV = N->getGlobal();
845 
846   // Use a PC-relative addressing mode to access the global dynamic GOT address.
847   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
848   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
849   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
850   SDValue Load =
851       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
852 
853   // Prepare argument list to generate call.
854   ArgListTy Args;
855   ArgListEntry Entry;
856   Entry.Node = Load;
857   Entry.Ty = CallTy;
858   Args.push_back(Entry);
859 
860   // Setup call to __tls_get_addr.
861   TargetLowering::CallLoweringInfo CLI(DAG);
862   CLI.setDebugLoc(DL)
863       .setChain(DAG.getEntryNode())
864       .setLibCallee(CallingConv::C, CallTy,
865                     DAG.getExternalSymbol("__tls_get_addr", Ty),
866                     std::move(Args));
867 
868   return LowerCallTo(CLI).first;
869 }
870 
871 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
872                                                    SelectionDAG &DAG) const {
873   SDLoc DL(Op);
874   EVT Ty = Op.getValueType();
875   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
876   int64_t Offset = N->getOffset();
877   MVT XLenVT = Subtarget.getXLenVT();
878 
879   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
880 
881   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
882       CallingConv::GHC)
883     report_fatal_error("In GHC calling convention TLS is not supported");
884 
885   SDValue Addr;
886   switch (Model) {
887   case TLSModel::LocalExec:
888     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
889     break;
890   case TLSModel::InitialExec:
891     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
892     break;
893   case TLSModel::LocalDynamic:
894   case TLSModel::GeneralDynamic:
895     Addr = getDynamicTLSAddr(N, DAG);
896     break;
897   }
898 
899   // In order to maximise the opportunity for common subexpression elimination,
900   // emit a separate ADD node for the global address offset instead of folding
901   // it in the global address node. Later peephole optimisations may choose to
902   // fold it back in when profitable.
903   if (Offset != 0)
904     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
905                        DAG.getConstant(Offset, DL, XLenVT));
906   return Addr;
907 }
908 
909 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
910   SDValue CondV = Op.getOperand(0);
911   SDValue TrueV = Op.getOperand(1);
912   SDValue FalseV = Op.getOperand(2);
913   SDLoc DL(Op);
914   MVT XLenVT = Subtarget.getXLenVT();
915 
916   // If the result type is XLenVT and CondV is the output of a SETCC node
917   // which also operated on XLenVT inputs, then merge the SETCC node into the
918   // lowered RISCVISD::SELECT_CC to take advantage of the integer
919   // compare+branch instructions. i.e.:
920   // (select (setcc lhs, rhs, cc), truev, falsev)
921   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
922   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
923       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
924     SDValue LHS = CondV.getOperand(0);
925     SDValue RHS = CondV.getOperand(1);
926     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
927     ISD::CondCode CCVal = CC->get();
928 
929     normaliseSetCC(LHS, RHS, CCVal);
930 
931     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
932     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
933     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
934   }
935 
936   // Otherwise:
937   // (select condv, truev, falsev)
938   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
939   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
940   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
941 
942   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
943 
944   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
945 }
946 
947 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
948   MachineFunction &MF = DAG.getMachineFunction();
949   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
950 
951   SDLoc DL(Op);
952   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
953                                  getPointerTy(MF.getDataLayout()));
954 
955   // vastart just stores the address of the VarArgsFrameIndex slot into the
956   // memory location argument.
957   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
958   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
959                       MachinePointerInfo(SV));
960 }
961 
962 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
963                                             SelectionDAG &DAG) const {
964   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
965   MachineFunction &MF = DAG.getMachineFunction();
966   MachineFrameInfo &MFI = MF.getFrameInfo();
967   MFI.setFrameAddressIsTaken(true);
968   Register FrameReg = RI.getFrameRegister(MF);
969   int XLenInBytes = Subtarget.getXLen() / 8;
970 
971   EVT VT = Op.getValueType();
972   SDLoc DL(Op);
973   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
974   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
975   while (Depth--) {
976     int Offset = -(XLenInBytes * 2);
977     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
978                               DAG.getIntPtrConstant(Offset, DL));
979     FrameAddr =
980         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
981   }
982   return FrameAddr;
983 }
984 
985 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
986                                              SelectionDAG &DAG) const {
987   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
988   MachineFunction &MF = DAG.getMachineFunction();
989   MachineFrameInfo &MFI = MF.getFrameInfo();
990   MFI.setReturnAddressIsTaken(true);
991   MVT XLenVT = Subtarget.getXLenVT();
992   int XLenInBytes = Subtarget.getXLen() / 8;
993 
994   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
995     return SDValue();
996 
997   EVT VT = Op.getValueType();
998   SDLoc DL(Op);
999   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1000   if (Depth) {
1001     int Off = -XLenInBytes;
1002     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
1003     SDValue Offset = DAG.getConstant(Off, DL, VT);
1004     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1005                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1006                        MachinePointerInfo());
1007   }
1008 
1009   // Return the value of the return address register, marking it an implicit
1010   // live-in.
1011   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
1012   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
1013 }
1014 
1015 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
1016                                                  SelectionDAG &DAG) const {
1017   SDLoc DL(Op);
1018   SDValue Lo = Op.getOperand(0);
1019   SDValue Hi = Op.getOperand(1);
1020   SDValue Shamt = Op.getOperand(2);
1021   EVT VT = Lo.getValueType();
1022 
1023   // if Shamt-XLEN < 0: // Shamt < XLEN
1024   //   Lo = Lo << Shamt
1025   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
1026   // else:
1027   //   Lo = 0
1028   //   Hi = Lo << (Shamt-XLEN)
1029 
1030   SDValue Zero = DAG.getConstant(0, DL, VT);
1031   SDValue One = DAG.getConstant(1, DL, VT);
1032   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1033   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1034   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1035   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1036 
1037   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1038   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1039   SDValue ShiftRightLo =
1040       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
1041   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1042   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1043   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
1044 
1045   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1046 
1047   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1048   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1049 
1050   SDValue Parts[2] = {Lo, Hi};
1051   return DAG.getMergeValues(Parts, DL);
1052 }
1053 
1054 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
1055                                                   bool IsSRA) const {
1056   SDLoc DL(Op);
1057   SDValue Lo = Op.getOperand(0);
1058   SDValue Hi = Op.getOperand(1);
1059   SDValue Shamt = Op.getOperand(2);
1060   EVT VT = Lo.getValueType();
1061 
1062   // SRA expansion:
1063   //   if Shamt-XLEN < 0: // Shamt < XLEN
1064   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1065   //     Hi = Hi >>s Shamt
1066   //   else:
1067   //     Lo = Hi >>s (Shamt-XLEN);
1068   //     Hi = Hi >>s (XLEN-1)
1069   //
1070   // SRL expansion:
1071   //   if Shamt-XLEN < 0: // Shamt < XLEN
1072   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1073   //     Hi = Hi >>u Shamt
1074   //   else:
1075   //     Lo = Hi >>u (Shamt-XLEN);
1076   //     Hi = 0;
1077 
1078   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1079 
1080   SDValue Zero = DAG.getConstant(0, DL, VT);
1081   SDValue One = DAG.getConstant(1, DL, VT);
1082   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1083   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1084   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1085   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1086 
1087   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1088   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1089   SDValue ShiftLeftHi =
1090       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
1091   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1092   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1093   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
1094   SDValue HiFalse =
1095       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
1096 
1097   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1098 
1099   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1100   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1101 
1102   SDValue Parts[2] = {Lo, Hi};
1103   return DAG.getMergeValues(Parts, DL);
1104 }
1105 
1106 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is
1107 // illegal (currently only vXi64 RV32).
1108 // FIXME: We could also catch non-constant sign-extended i32 values and lower
1109 // them to SPLAT_VECTOR_I64
1110 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op,
1111                                               SelectionDAG &DAG) const {
1112   SDLoc DL(Op);
1113   EVT VecVT = Op.getValueType();
1114   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
1115          "Unexpected SPLAT_VECTOR lowering");
1116   SDValue SplatVal = Op.getOperand(0);
1117 
1118   // If we can prove that the value is a sign-extended 32-bit value, lower this
1119   // as a custom node in order to try and match RVV vector/scalar instructions.
1120   if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) {
1121     if (isInt<32>(CVal->getSExtValue()))
1122       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1123                          DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32));
1124   }
1125 
1126   // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
1127   // to accidentally sign-extend the 32-bit halves to the e64 SEW:
1128   // vmv.v.x vX, hi
1129   // vsll.vx vX, vX, /*32*/
1130   // vmv.v.x vY, lo
1131   // vsll.vx vY, vY, /*32*/
1132   // vsrl.vx vY, vY, /*32*/
1133   // vor.vv vX, vX, vY
1134   SDValue One = DAG.getConstant(1, DL, MVT::i32);
1135   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
1136   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
1137   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero);
1138   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One);
1139 
1140   Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
1141   Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
1142   Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
1143 
1144   if (isNullConstant(Hi))
1145     return Lo;
1146 
1147   Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
1148   Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
1149 
1150   return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
1151 }
1152 
1153 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1154                                                      SelectionDAG &DAG) const {
1155   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1156   SDLoc DL(Op);
1157 
1158   if (Subtarget.hasStdExtV()) {
1159     // Some RVV intrinsics may claim that they want an integer operand to be
1160     // extended.
1161     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1162             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1163       if (II->ExtendedOperand) {
1164         assert(II->ExtendedOperand < Op.getNumOperands());
1165         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1166         SDValue &ScalarOp = Operands[II->ExtendedOperand];
1167         EVT OpVT = ScalarOp.getValueType();
1168         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1169             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1170           // If the operand is a constant, sign extend to increase our chances
1171           // of being able to use a .vi instruction. ANY_EXTEND would become a
1172           // a zero extend and the simm5 check in isel would fail.
1173           // FIXME: Should we ignore the upper bits in isel instead?
1174           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1175                                                           : ISD::ANY_EXTEND;
1176           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1177           return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
1178                              Operands);
1179         }
1180       }
1181     }
1182   }
1183 
1184   switch (IntNo) {
1185   default:
1186     return SDValue();    // Don't custom lower most intrinsics.
1187   case Intrinsic::thread_pointer: {
1188     EVT PtrVT = getPointerTy(DAG.getDataLayout());
1189     return DAG.getRegister(RISCV::X4, PtrVT);
1190   }
1191   case Intrinsic::riscv_vmv_x_s:
1192     assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!");
1193     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
1194                        Op.getOperand(1));
1195   }
1196 }
1197 
1198 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
1199                                                     SelectionDAG &DAG) const {
1200   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1201   SDLoc DL(Op);
1202 
1203   if (Subtarget.hasStdExtV()) {
1204     // Some RVV intrinsics may claim that they want an integer operand to be
1205     // extended.
1206     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1207             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1208       if (II->ExtendedOperand) {
1209         // The operands start from the second argument in INTRINSIC_W_CHAIN.
1210         unsigned ExtendOp = II->ExtendedOperand + 1;
1211         assert(ExtendOp < Op.getNumOperands());
1212         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1213         SDValue &ScalarOp = Operands[ExtendOp];
1214         EVT OpVT = ScalarOp.getValueType();
1215         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1216             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1217           // If the operand is a constant, sign extend to increase our chances
1218           // of being able to use a .vi instruction. ANY_EXTEND would become a
1219           // a zero extend and the simm5 check in isel would fail.
1220           // FIXME: Should we ignore the upper bits in isel instead?
1221           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1222                                                           : ISD::ANY_EXTEND;
1223           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1224           return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(),
1225                              Operands);
1226         }
1227       }
1228     }
1229   }
1230 
1231   return SDValue();
1232 }
1233 
1234 // Returns the opcode of the target-specific SDNode that implements the 32-bit
1235 // form of the given Opcode.
1236 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
1237   switch (Opcode) {
1238   default:
1239     llvm_unreachable("Unexpected opcode");
1240   case ISD::SHL:
1241     return RISCVISD::SLLW;
1242   case ISD::SRA:
1243     return RISCVISD::SRAW;
1244   case ISD::SRL:
1245     return RISCVISD::SRLW;
1246   case ISD::SDIV:
1247     return RISCVISD::DIVW;
1248   case ISD::UDIV:
1249     return RISCVISD::DIVUW;
1250   case ISD::UREM:
1251     return RISCVISD::REMUW;
1252   case ISD::ROTL:
1253     return RISCVISD::ROLW;
1254   case ISD::ROTR:
1255     return RISCVISD::RORW;
1256   case RISCVISD::GREVI:
1257     return RISCVISD::GREVIW;
1258   case RISCVISD::GORCI:
1259     return RISCVISD::GORCIW;
1260   }
1261 }
1262 
1263 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
1264 // Because i32 isn't a legal type for RV64, these operations would otherwise
1265 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
1266 // later one because the fact the operation was originally of type i32 is
1267 // lost.
1268 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
1269   SDLoc DL(N);
1270   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1271   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1272   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1273   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1274   // ReplaceNodeResults requires we maintain the same type for the return value.
1275   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
1276 }
1277 
1278 // Converts the given 32-bit operation to a i64 operation with signed extension
1279 // semantic to reduce the signed extension instructions.
1280 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
1281   SDLoc DL(N);
1282   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1283   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1284   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
1285   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
1286                                DAG.getValueType(MVT::i32));
1287   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
1288 }
1289 
1290 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1291                                              SmallVectorImpl<SDValue> &Results,
1292                                              SelectionDAG &DAG) const {
1293   SDLoc DL(N);
1294   switch (N->getOpcode()) {
1295   default:
1296     llvm_unreachable("Don't know how to custom type legalize this operation!");
1297   case ISD::STRICT_FP_TO_SINT:
1298   case ISD::STRICT_FP_TO_UINT:
1299   case ISD::FP_TO_SINT:
1300   case ISD::FP_TO_UINT: {
1301     bool IsStrict = N->isStrictFPOpcode();
1302     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1303            "Unexpected custom legalisation");
1304     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
1305     // If the FP type needs to be softened, emit a library call using the 'si'
1306     // version. If we left it to default legalization we'd end up with 'di'. If
1307     // the FP type doesn't need to be softened just let generic type
1308     // legalization promote the result type.
1309     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
1310         TargetLowering::TypeSoftenFloat)
1311       return;
1312     RTLIB::Libcall LC;
1313     if (N->getOpcode() == ISD::FP_TO_SINT ||
1314         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
1315       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
1316     else
1317       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
1318     MakeLibCallOptions CallOptions;
1319     EVT OpVT = Op0.getValueType();
1320     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
1321     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
1322     SDValue Result;
1323     std::tie(Result, Chain) =
1324         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
1325     Results.push_back(Result);
1326     if (IsStrict)
1327       Results.push_back(Chain);
1328     break;
1329   }
1330   case ISD::READCYCLECOUNTER: {
1331     assert(!Subtarget.is64Bit() &&
1332            "READCYCLECOUNTER only has custom type legalization on riscv32");
1333 
1334     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
1335     SDValue RCW =
1336         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
1337 
1338     Results.push_back(
1339         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
1340     Results.push_back(RCW.getValue(2));
1341     break;
1342   }
1343   case ISD::ADD:
1344   case ISD::SUB:
1345   case ISD::MUL:
1346     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1347            "Unexpected custom legalisation");
1348     if (N->getOperand(1).getOpcode() == ISD::Constant)
1349       return;
1350     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
1351     break;
1352   case ISD::SHL:
1353   case ISD::SRA:
1354   case ISD::SRL:
1355     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1356            "Unexpected custom legalisation");
1357     if (N->getOperand(1).getOpcode() == ISD::Constant)
1358       return;
1359     Results.push_back(customLegalizeToWOp(N, DAG));
1360     break;
1361   case ISD::ROTL:
1362   case ISD::ROTR:
1363     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1364            "Unexpected custom legalisation");
1365     Results.push_back(customLegalizeToWOp(N, DAG));
1366     break;
1367   case ISD::SDIV:
1368   case ISD::UDIV:
1369   case ISD::UREM:
1370     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1371            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
1372     if (N->getOperand(0).getOpcode() == ISD::Constant ||
1373         N->getOperand(1).getOpcode() == ISD::Constant)
1374       return;
1375     Results.push_back(customLegalizeToWOp(N, DAG));
1376     break;
1377   case ISD::BITCAST: {
1378     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1379              Subtarget.hasStdExtF()) ||
1380             (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
1381            "Unexpected custom legalisation");
1382     SDValue Op0 = N->getOperand(0);
1383     if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
1384       if (Op0.getValueType() != MVT::f16)
1385         return;
1386       SDValue FPConv =
1387           DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
1388       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
1389     } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1390                Subtarget.hasStdExtF()) {
1391       if (Op0.getValueType() != MVT::f32)
1392         return;
1393       SDValue FPConv =
1394           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
1395       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
1396     }
1397     break;
1398   }
1399   case RISCVISD::GREVI:
1400   case RISCVISD::GORCI: {
1401     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1402            "Unexpected custom legalisation");
1403     // This is similar to customLegalizeToWOp, except that we pass the second
1404     // operand (a TargetConstant) straight through: it is already of type
1405     // XLenVT.
1406     SDLoc DL(N);
1407     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1408     SDValue NewOp0 =
1409         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1410     SDValue NewRes =
1411         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
1412     // ReplaceNodeResults requires we maintain the same type for the return
1413     // value.
1414     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
1415     break;
1416   }
1417   case ISD::BSWAP:
1418   case ISD::BITREVERSE: {
1419     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1420            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1421     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1422                                  N->getOperand(0));
1423     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
1424     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
1425                                  DAG.getTargetConstant(Imm, DL,
1426                                                        Subtarget.getXLenVT()));
1427     // ReplaceNodeResults requires we maintain the same type for the return
1428     // value.
1429     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
1430     break;
1431   }
1432   case ISD::FSHL:
1433   case ISD::FSHR: {
1434     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1435            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
1436     SDValue NewOp0 =
1437         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1438     SDValue NewOp1 =
1439         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1440     SDValue NewOp2 =
1441         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
1442     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
1443     // Mask the shift amount to 5 bits.
1444     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
1445                          DAG.getConstant(0x1f, DL, MVT::i64));
1446     unsigned Opc =
1447         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
1448     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
1449     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
1450     break;
1451   }
1452   case ISD::INTRINSIC_WO_CHAIN: {
1453     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
1454     switch (IntNo) {
1455     default:
1456       llvm_unreachable(
1457           "Don't know how to custom type legalize this intrinsic!");
1458     case Intrinsic::riscv_vmv_x_s: {
1459       EVT VT = N->getValueType(0);
1460       assert((VT == MVT::i8 || VT == MVT::i16 ||
1461               (Subtarget.is64Bit() && VT == MVT::i32)) &&
1462              "Unexpected custom legalisation!");
1463       SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
1464                                     Subtarget.getXLenVT(), N->getOperand(1));
1465       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
1466       break;
1467     }
1468     }
1469     break;
1470   }
1471   }
1472 }
1473 
1474 // A structure to hold one of the bit-manipulation patterns below. Together, a
1475 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
1476 //   (or (and (shl x, 1), 0xAAAAAAAA),
1477 //       (and (srl x, 1), 0x55555555))
1478 struct RISCVBitmanipPat {
1479   SDValue Op;
1480   unsigned ShAmt;
1481   bool IsSHL;
1482 
1483   bool formsPairWith(const RISCVBitmanipPat &Other) const {
1484     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
1485   }
1486 };
1487 
1488 // Matches any of the following bit-manipulation patterns:
1489 //   (and (shl x, 1), (0x55555555 << 1))
1490 //   (and (srl x, 1), 0x55555555)
1491 //   (shl (and x, 0x55555555), 1)
1492 //   (srl (and x, (0x55555555 << 1)), 1)
1493 // where the shift amount and mask may vary thus:
1494 //   [1]  = 0x55555555 / 0xAAAAAAAA
1495 //   [2]  = 0x33333333 / 0xCCCCCCCC
1496 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
1497 //   [8]  = 0x00FF00FF / 0xFF00FF00
1498 //   [16] = 0x0000FFFF / 0xFFFFFFFF
1499 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
1500 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) {
1501   Optional<uint64_t> Mask;
1502   // Optionally consume a mask around the shift operation.
1503   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
1504     Mask = Op.getConstantOperandVal(1);
1505     Op = Op.getOperand(0);
1506   }
1507   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
1508     return None;
1509   bool IsSHL = Op.getOpcode() == ISD::SHL;
1510 
1511   if (!isa<ConstantSDNode>(Op.getOperand(1)))
1512     return None;
1513   auto ShAmt = Op.getConstantOperandVal(1);
1514 
1515   if (!isPowerOf2_64(ShAmt))
1516     return None;
1517 
1518   // These are the unshifted masks which we use to match bit-manipulation
1519   // patterns. They may be shifted left in certain circumstances.
1520   static const uint64_t BitmanipMasks[] = {
1521       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
1522       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL,
1523   };
1524 
1525   unsigned MaskIdx = Log2_64(ShAmt);
1526   if (MaskIdx >= array_lengthof(BitmanipMasks))
1527     return None;
1528 
1529   auto Src = Op.getOperand(0);
1530 
1531   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
1532   auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
1533 
1534   // The expected mask is shifted left when the AND is found around SHL
1535   // patterns.
1536   //   ((x >> 1) & 0x55555555)
1537   //   ((x << 1) & 0xAAAAAAAA)
1538   bool SHLExpMask = IsSHL;
1539 
1540   if (!Mask) {
1541     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
1542     // the mask is all ones: consume that now.
1543     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
1544       Mask = Src.getConstantOperandVal(1);
1545       Src = Src.getOperand(0);
1546       // The expected mask is now in fact shifted left for SRL, so reverse the
1547       // decision.
1548       //   ((x & 0xAAAAAAAA) >> 1)
1549       //   ((x & 0x55555555) << 1)
1550       SHLExpMask = !SHLExpMask;
1551     } else {
1552       // Use a default shifted mask of all-ones if there's no AND, truncated
1553       // down to the expected width. This simplifies the logic later on.
1554       Mask = maskTrailingOnes<uint64_t>(Width);
1555       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
1556     }
1557   }
1558 
1559   if (SHLExpMask)
1560     ExpMask <<= ShAmt;
1561 
1562   if (Mask != ExpMask)
1563     return None;
1564 
1565   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
1566 }
1567 
1568 // Match the following pattern as a GREVI(W) operation
1569 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
1570 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
1571                                const RISCVSubtarget &Subtarget) {
1572   EVT VT = Op.getValueType();
1573 
1574   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
1575     auto LHS = matchRISCVBitmanipPat(Op.getOperand(0));
1576     auto RHS = matchRISCVBitmanipPat(Op.getOperand(1));
1577     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
1578       SDLoc DL(Op);
1579       return DAG.getNode(
1580           RISCVISD::GREVI, DL, VT, LHS->Op,
1581           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
1582     }
1583   }
1584   return SDValue();
1585 }
1586 
1587 // Matches any the following pattern as a GORCI(W) operation
1588 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
1589 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
1590 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
1591 // Note that with the variant of 3.,
1592 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
1593 // the inner pattern will first be matched as GREVI and then the outer
1594 // pattern will be matched to GORC via the first rule above.
1595 // 4.  (or (rotl/rotr x, bitwidth/2), x)
1596 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
1597                                const RISCVSubtarget &Subtarget) {
1598   EVT VT = Op.getValueType();
1599 
1600   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
1601     SDLoc DL(Op);
1602     SDValue Op0 = Op.getOperand(0);
1603     SDValue Op1 = Op.getOperand(1);
1604 
1605     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
1606       if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
1607           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
1608         return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
1609       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
1610       if ((Reverse.getOpcode() == ISD::ROTL ||
1611            Reverse.getOpcode() == ISD::ROTR) &&
1612           Reverse.getOperand(0) == X &&
1613           isa<ConstantSDNode>(Reverse.getOperand(1))) {
1614         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
1615         if (RotAmt == (VT.getSizeInBits() / 2))
1616           return DAG.getNode(
1617               RISCVISD::GORCI, DL, VT, X,
1618               DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
1619       }
1620       return SDValue();
1621     };
1622 
1623     // Check for either commutable permutation of (or (GREVI x, shamt), x)
1624     if (SDValue V = MatchOROfReverse(Op0, Op1))
1625       return V;
1626     if (SDValue V = MatchOROfReverse(Op1, Op0))
1627       return V;
1628 
1629     // OR is commutable so canonicalize its OR operand to the left
1630     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
1631       std::swap(Op0, Op1);
1632     if (Op0.getOpcode() != ISD::OR)
1633       return SDValue();
1634     SDValue OrOp0 = Op0.getOperand(0);
1635     SDValue OrOp1 = Op0.getOperand(1);
1636     auto LHS = matchRISCVBitmanipPat(OrOp0);
1637     // OR is commutable so swap the operands and try again: x might have been
1638     // on the left
1639     if (!LHS) {
1640       std::swap(OrOp0, OrOp1);
1641       LHS = matchRISCVBitmanipPat(OrOp0);
1642     }
1643     auto RHS = matchRISCVBitmanipPat(Op1);
1644     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
1645       return DAG.getNode(
1646           RISCVISD::GORCI, DL, VT, LHS->Op,
1647           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
1648     }
1649   }
1650   return SDValue();
1651 }
1652 
1653 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
1654 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
1655 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
1656 // not undo itself, but they are redundant.
1657 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
1658   unsigned ShAmt1 = N->getConstantOperandVal(1);
1659   SDValue Src = N->getOperand(0);
1660 
1661   if (Src.getOpcode() != N->getOpcode())
1662     return SDValue();
1663 
1664   unsigned ShAmt2 = Src.getConstantOperandVal(1);
1665   Src = Src.getOperand(0);
1666 
1667   unsigned CombinedShAmt;
1668   if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
1669     CombinedShAmt = ShAmt1 | ShAmt2;
1670   else
1671     CombinedShAmt = ShAmt1 ^ ShAmt2;
1672 
1673   if (CombinedShAmt == 0)
1674     return Src;
1675 
1676   SDLoc DL(N);
1677   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
1678                      DAG.getTargetConstant(CombinedShAmt, DL,
1679                                            N->getOperand(1).getValueType()));
1680 }
1681 
1682 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1683                                                DAGCombinerInfo &DCI) const {
1684   SelectionDAG &DAG = DCI.DAG;
1685 
1686   switch (N->getOpcode()) {
1687   default:
1688     break;
1689   case RISCVISD::SplitF64: {
1690     SDValue Op0 = N->getOperand(0);
1691     // If the input to SplitF64 is just BuildPairF64 then the operation is
1692     // redundant. Instead, use BuildPairF64's operands directly.
1693     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
1694       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
1695 
1696     SDLoc DL(N);
1697 
1698     // It's cheaper to materialise two 32-bit integers than to load a double
1699     // from the constant pool and transfer it to integer registers through the
1700     // stack.
1701     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
1702       APInt V = C->getValueAPF().bitcastToAPInt();
1703       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
1704       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
1705       return DCI.CombineTo(N, Lo, Hi);
1706     }
1707 
1708     // This is a target-specific version of a DAGCombine performed in
1709     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1710     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1711     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1712     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1713         !Op0.getNode()->hasOneUse())
1714       break;
1715     SDValue NewSplitF64 =
1716         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
1717                     Op0.getOperand(0));
1718     SDValue Lo = NewSplitF64.getValue(0);
1719     SDValue Hi = NewSplitF64.getValue(1);
1720     APInt SignBit = APInt::getSignMask(32);
1721     if (Op0.getOpcode() == ISD::FNEG) {
1722       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
1723                                   DAG.getConstant(SignBit, DL, MVT::i32));
1724       return DCI.CombineTo(N, Lo, NewHi);
1725     }
1726     assert(Op0.getOpcode() == ISD::FABS);
1727     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
1728                                 DAG.getConstant(~SignBit, DL, MVT::i32));
1729     return DCI.CombineTo(N, Lo, NewHi);
1730   }
1731   case RISCVISD::SLLW:
1732   case RISCVISD::SRAW:
1733   case RISCVISD::SRLW:
1734   case RISCVISD::ROLW:
1735   case RISCVISD::RORW: {
1736     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
1737     SDValue LHS = N->getOperand(0);
1738     SDValue RHS = N->getOperand(1);
1739     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
1740     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
1741     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
1742         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
1743       if (N->getOpcode() != ISD::DELETED_NODE)
1744         DCI.AddToWorklist(N);
1745       return SDValue(N, 0);
1746     }
1747     break;
1748   }
1749   case RISCVISD::FSLW:
1750   case RISCVISD::FSRW: {
1751     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
1752     // read.
1753     SDValue Op0 = N->getOperand(0);
1754     SDValue Op1 = N->getOperand(1);
1755     SDValue ShAmt = N->getOperand(2);
1756     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
1757     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
1758     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
1759         SimplifyDemandedBits(Op1, OpMask, DCI) ||
1760         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
1761       if (N->getOpcode() != ISD::DELETED_NODE)
1762         DCI.AddToWorklist(N);
1763       return SDValue(N, 0);
1764     }
1765     break;
1766   }
1767   case RISCVISD::GREVIW:
1768   case RISCVISD::GORCIW: {
1769     // Only the lower 32 bits of the first operand are read
1770     SDValue Op0 = N->getOperand(0);
1771     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
1772     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
1773       if (N->getOpcode() != ISD::DELETED_NODE)
1774         DCI.AddToWorklist(N);
1775       return SDValue(N, 0);
1776     }
1777 
1778     return combineGREVI_GORCI(N, DCI.DAG);
1779   }
1780   case RISCVISD::FMV_X_ANYEXTW_RV64: {
1781     SDLoc DL(N);
1782     SDValue Op0 = N->getOperand(0);
1783     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
1784     // conversion is unnecessary and can be replaced with an ANY_EXTEND
1785     // of the FMV_W_X_RV64 operand.
1786     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
1787       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
1788              "Unexpected value type!");
1789       return Op0.getOperand(0);
1790     }
1791 
1792     // This is a target-specific version of a DAGCombine performed in
1793     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1794     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1795     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1796     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1797         !Op0.getNode()->hasOneUse())
1798       break;
1799     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
1800                                  Op0.getOperand(0));
1801     APInt SignBit = APInt::getSignMask(32).sext(64);
1802     if (Op0.getOpcode() == ISD::FNEG)
1803       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
1804                          DAG.getConstant(SignBit, DL, MVT::i64));
1805 
1806     assert(Op0.getOpcode() == ISD::FABS);
1807     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
1808                        DAG.getConstant(~SignBit, DL, MVT::i64));
1809   }
1810   case RISCVISD::GREVI:
1811   case RISCVISD::GORCI:
1812     return combineGREVI_GORCI(N, DCI.DAG);
1813   case ISD::OR:
1814     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
1815       return GREV;
1816     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
1817       return GORC;
1818     break;
1819   }
1820 
1821   return SDValue();
1822 }
1823 
1824 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1825     const SDNode *N, CombineLevel Level) const {
1826   // The following folds are only desirable if `(OP _, c1 << c2)` can be
1827   // materialised in fewer instructions than `(OP _, c1)`:
1828   //
1829   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1830   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1831   SDValue N0 = N->getOperand(0);
1832   EVT Ty = N0.getValueType();
1833   if (Ty.isScalarInteger() &&
1834       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
1835     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
1836     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
1837     if (C1 && C2) {
1838       APInt C1Int = C1->getAPIntValue();
1839       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
1840 
1841       // We can materialise `c1 << c2` into an add immediate, so it's "free",
1842       // and the combine should happen, to potentially allow further combines
1843       // later.
1844       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
1845           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
1846         return true;
1847 
1848       // We can materialise `c1` in an add immediate, so it's "free", and the
1849       // combine should be prevented.
1850       if (C1Int.getMinSignedBits() <= 64 &&
1851           isLegalAddImmediate(C1Int.getSExtValue()))
1852         return false;
1853 
1854       // Neither constant will fit into an immediate, so find materialisation
1855       // costs.
1856       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
1857                                               Subtarget.is64Bit());
1858       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
1859           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
1860 
1861       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
1862       // combine should be prevented.
1863       if (C1Cost < ShiftedC1Cost)
1864         return false;
1865     }
1866   }
1867   return true;
1868 }
1869 
1870 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
1871     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1872     unsigned Depth) const {
1873   switch (Op.getOpcode()) {
1874   default:
1875     break;
1876   case RISCVISD::SLLW:
1877   case RISCVISD::SRAW:
1878   case RISCVISD::SRLW:
1879   case RISCVISD::DIVW:
1880   case RISCVISD::DIVUW:
1881   case RISCVISD::REMUW:
1882   case RISCVISD::ROLW:
1883   case RISCVISD::RORW:
1884   case RISCVISD::GREVIW:
1885   case RISCVISD::GORCIW:
1886   case RISCVISD::FSLW:
1887   case RISCVISD::FSRW:
1888     // TODO: As the result is sign-extended, this is conservatively correct. A
1889     // more precise answer could be calculated for SRAW depending on known
1890     // bits in the shift amount.
1891     return 33;
1892   case RISCVISD::VMV_X_S:
1893     // The number of sign bits of the scalar result is computed by obtaining the
1894     // element type of the input vector operand, substracting its width from the
1895     // XLEN, and then adding one (sign bit within the element type).
1896     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
1897   }
1898 
1899   return 1;
1900 }
1901 
1902 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
1903                                                   MachineBasicBlock *BB) {
1904   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
1905 
1906   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
1907   // Should the count have wrapped while it was being read, we need to try
1908   // again.
1909   // ...
1910   // read:
1911   // rdcycleh x3 # load high word of cycle
1912   // rdcycle  x2 # load low word of cycle
1913   // rdcycleh x4 # load high word of cycle
1914   // bne x3, x4, read # check if high word reads match, otherwise try again
1915   // ...
1916 
1917   MachineFunction &MF = *BB->getParent();
1918   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1919   MachineFunction::iterator It = ++BB->getIterator();
1920 
1921   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1922   MF.insert(It, LoopMBB);
1923 
1924   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1925   MF.insert(It, DoneMBB);
1926 
1927   // Transfer the remainder of BB and its successor edges to DoneMBB.
1928   DoneMBB->splice(DoneMBB->begin(), BB,
1929                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
1930   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
1931 
1932   BB->addSuccessor(LoopMBB);
1933 
1934   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1935   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1936   Register LoReg = MI.getOperand(0).getReg();
1937   Register HiReg = MI.getOperand(1).getReg();
1938   DebugLoc DL = MI.getDebugLoc();
1939 
1940   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1941   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
1942       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
1943       .addReg(RISCV::X0);
1944   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
1945       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
1946       .addReg(RISCV::X0);
1947   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
1948       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
1949       .addReg(RISCV::X0);
1950 
1951   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
1952       .addReg(HiReg)
1953       .addReg(ReadAgainReg)
1954       .addMBB(LoopMBB);
1955 
1956   LoopMBB->addSuccessor(LoopMBB);
1957   LoopMBB->addSuccessor(DoneMBB);
1958 
1959   MI.eraseFromParent();
1960 
1961   return DoneMBB;
1962 }
1963 
1964 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
1965                                              MachineBasicBlock *BB) {
1966   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
1967 
1968   MachineFunction &MF = *BB->getParent();
1969   DebugLoc DL = MI.getDebugLoc();
1970   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1971   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1972   Register LoReg = MI.getOperand(0).getReg();
1973   Register HiReg = MI.getOperand(1).getReg();
1974   Register SrcReg = MI.getOperand(2).getReg();
1975   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
1976   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
1977 
1978   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
1979                           RI);
1980   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
1981   MachineMemOperand *MMOLo =
1982       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
1983   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
1984       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
1985   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
1986       .addFrameIndex(FI)
1987       .addImm(0)
1988       .addMemOperand(MMOLo);
1989   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
1990       .addFrameIndex(FI)
1991       .addImm(4)
1992       .addMemOperand(MMOHi);
1993   MI.eraseFromParent(); // The pseudo instruction is gone now.
1994   return BB;
1995 }
1996 
1997 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
1998                                                  MachineBasicBlock *BB) {
1999   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
2000          "Unexpected instruction");
2001 
2002   MachineFunction &MF = *BB->getParent();
2003   DebugLoc DL = MI.getDebugLoc();
2004   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2005   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
2006   Register DstReg = MI.getOperand(0).getReg();
2007   Register LoReg = MI.getOperand(1).getReg();
2008   Register HiReg = MI.getOperand(2).getReg();
2009   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
2010   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
2011 
2012   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
2013   MachineMemOperand *MMOLo =
2014       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
2015   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
2016       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
2017   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
2018       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
2019       .addFrameIndex(FI)
2020       .addImm(0)
2021       .addMemOperand(MMOLo);
2022   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
2023       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
2024       .addFrameIndex(FI)
2025       .addImm(4)
2026       .addMemOperand(MMOHi);
2027   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
2028   MI.eraseFromParent(); // The pseudo instruction is gone now.
2029   return BB;
2030 }
2031 
2032 static bool isSelectPseudo(MachineInstr &MI) {
2033   switch (MI.getOpcode()) {
2034   default:
2035     return false;
2036   case RISCV::Select_GPR_Using_CC_GPR:
2037   case RISCV::Select_FPR16_Using_CC_GPR:
2038   case RISCV::Select_FPR32_Using_CC_GPR:
2039   case RISCV::Select_FPR64_Using_CC_GPR:
2040     return true;
2041   }
2042 }
2043 
2044 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
2045                                            MachineBasicBlock *BB) {
2046   // To "insert" Select_* instructions, we actually have to insert the triangle
2047   // control-flow pattern.  The incoming instructions know the destination vreg
2048   // to set, the condition code register to branch on, the true/false values to
2049   // select between, and the condcode to use to select the appropriate branch.
2050   //
2051   // We produce the following control flow:
2052   //     HeadMBB
2053   //     |  \
2054   //     |  IfFalseMBB
2055   //     | /
2056   //    TailMBB
2057   //
2058   // When we find a sequence of selects we attempt to optimize their emission
2059   // by sharing the control flow. Currently we only handle cases where we have
2060   // multiple selects with the exact same condition (same LHS, RHS and CC).
2061   // The selects may be interleaved with other instructions if the other
2062   // instructions meet some requirements we deem safe:
2063   // - They are debug instructions. Otherwise,
2064   // - They do not have side-effects, do not access memory and their inputs do
2065   //   not depend on the results of the select pseudo-instructions.
2066   // The TrueV/FalseV operands of the selects cannot depend on the result of
2067   // previous selects in the sequence.
2068   // These conditions could be further relaxed. See the X86 target for a
2069   // related approach and more information.
2070   Register LHS = MI.getOperand(1).getReg();
2071   Register RHS = MI.getOperand(2).getReg();
2072   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
2073 
2074   SmallVector<MachineInstr *, 4> SelectDebugValues;
2075   SmallSet<Register, 4> SelectDests;
2076   SelectDests.insert(MI.getOperand(0).getReg());
2077 
2078   MachineInstr *LastSelectPseudo = &MI;
2079 
2080   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
2081        SequenceMBBI != E; ++SequenceMBBI) {
2082     if (SequenceMBBI->isDebugInstr())
2083       continue;
2084     else if (isSelectPseudo(*SequenceMBBI)) {
2085       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
2086           SequenceMBBI->getOperand(2).getReg() != RHS ||
2087           SequenceMBBI->getOperand(3).getImm() != CC ||
2088           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
2089           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
2090         break;
2091       LastSelectPseudo = &*SequenceMBBI;
2092       SequenceMBBI->collectDebugValues(SelectDebugValues);
2093       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
2094     } else {
2095       if (SequenceMBBI->hasUnmodeledSideEffects() ||
2096           SequenceMBBI->mayLoadOrStore())
2097         break;
2098       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
2099             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
2100           }))
2101         break;
2102     }
2103   }
2104 
2105   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
2106   const BasicBlock *LLVM_BB = BB->getBasicBlock();
2107   DebugLoc DL = MI.getDebugLoc();
2108   MachineFunction::iterator I = ++BB->getIterator();
2109 
2110   MachineBasicBlock *HeadMBB = BB;
2111   MachineFunction *F = BB->getParent();
2112   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
2113   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
2114 
2115   F->insert(I, IfFalseMBB);
2116   F->insert(I, TailMBB);
2117 
2118   // Transfer debug instructions associated with the selects to TailMBB.
2119   for (MachineInstr *DebugInstr : SelectDebugValues) {
2120     TailMBB->push_back(DebugInstr->removeFromParent());
2121   }
2122 
2123   // Move all instructions after the sequence to TailMBB.
2124   TailMBB->splice(TailMBB->end(), HeadMBB,
2125                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
2126   // Update machine-CFG edges by transferring all successors of the current
2127   // block to the new block which will contain the Phi nodes for the selects.
2128   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
2129   // Set the successors for HeadMBB.
2130   HeadMBB->addSuccessor(IfFalseMBB);
2131   HeadMBB->addSuccessor(TailMBB);
2132 
2133   // Insert appropriate branch.
2134   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
2135 
2136   BuildMI(HeadMBB, DL, TII.get(Opcode))
2137     .addReg(LHS)
2138     .addReg(RHS)
2139     .addMBB(TailMBB);
2140 
2141   // IfFalseMBB just falls through to TailMBB.
2142   IfFalseMBB->addSuccessor(TailMBB);
2143 
2144   // Create PHIs for all of the select pseudo-instructions.
2145   auto SelectMBBI = MI.getIterator();
2146   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
2147   auto InsertionPoint = TailMBB->begin();
2148   while (SelectMBBI != SelectEnd) {
2149     auto Next = std::next(SelectMBBI);
2150     if (isSelectPseudo(*SelectMBBI)) {
2151       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
2152       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
2153               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
2154           .addReg(SelectMBBI->getOperand(4).getReg())
2155           .addMBB(HeadMBB)
2156           .addReg(SelectMBBI->getOperand(5).getReg())
2157           .addMBB(IfFalseMBB);
2158       SelectMBBI->eraseFromParent();
2159     }
2160     SelectMBBI = Next;
2161   }
2162 
2163   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
2164   return TailMBB;
2165 }
2166 
2167 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
2168                                     int VLIndex, unsigned SEWIndex,
2169                                     RISCVVLMUL VLMul, bool WritesElement0) {
2170   MachineFunction &MF = *BB->getParent();
2171   DebugLoc DL = MI.getDebugLoc();
2172   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2173 
2174   unsigned SEW = MI.getOperand(SEWIndex).getImm();
2175   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
2176   RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
2177 
2178   MachineRegisterInfo &MRI = MF.getRegInfo();
2179 
2180   // VL and VTYPE are alive here.
2181   MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI));
2182 
2183   if (VLIndex >= 0) {
2184     // Set VL (rs1 != X0).
2185     Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2186     MIB.addReg(DestReg, RegState::Define | RegState::Dead)
2187         .addReg(MI.getOperand(VLIndex).getReg());
2188   } else
2189     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
2190     MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead)
2191         .addReg(RISCV::X0, RegState::Kill);
2192 
2193   // Default to tail agnostic unless the destination is tied to a source. In
2194   // that case the user would have some control over the tail values. The tail
2195   // policy is also ignored on instructions that only update element 0 like
2196   // vmv.s.x or reductions so use agnostic there to match the common case.
2197   // FIXME: This is conservatively correct, but we might want to detect that
2198   // the input is undefined.
2199   bool TailAgnostic = true;
2200   if (MI.isRegTiedToUseOperand(0) && !WritesElement0)
2201     TailAgnostic = false;
2202 
2203   // For simplicity we reuse the vtype representation here.
2204   MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
2205                                      /*TailAgnostic*/ TailAgnostic,
2206                                      /*MaskAgnostic*/ false));
2207 
2208   // Remove (now) redundant operands from pseudo
2209   MI.getOperand(SEWIndex).setImm(-1);
2210   if (VLIndex >= 0) {
2211     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
2212     MI.getOperand(VLIndex).setIsKill(false);
2213   }
2214 
2215   return BB;
2216 }
2217 
2218 MachineBasicBlock *
2219 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2220                                                  MachineBasicBlock *BB) const {
2221   uint64_t TSFlags = MI.getDesc().TSFlags;
2222 
2223   if (TSFlags & RISCVII::HasSEWOpMask) {
2224     unsigned NumOperands = MI.getNumExplicitOperands();
2225     int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
2226     unsigned SEWIndex = NumOperands - 1;
2227     bool WritesElement0 = TSFlags & RISCVII::WritesElement0Mask;
2228 
2229     RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
2230                                                RISCVII::VLMulShift);
2231     return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, WritesElement0);
2232   }
2233 
2234   switch (MI.getOpcode()) {
2235   default:
2236     llvm_unreachable("Unexpected instr type to insert");
2237   case RISCV::ReadCycleWide:
2238     assert(!Subtarget.is64Bit() &&
2239            "ReadCycleWrite is only to be used on riscv32");
2240     return emitReadCycleWidePseudo(MI, BB);
2241   case RISCV::Select_GPR_Using_CC_GPR:
2242   case RISCV::Select_FPR16_Using_CC_GPR:
2243   case RISCV::Select_FPR32_Using_CC_GPR:
2244   case RISCV::Select_FPR64_Using_CC_GPR:
2245     return emitSelectPseudo(MI, BB);
2246   case RISCV::BuildPairF64Pseudo:
2247     return emitBuildPairF64Pseudo(MI, BB);
2248   case RISCV::SplitF64Pseudo:
2249     return emitSplitF64Pseudo(MI, BB);
2250   }
2251 }
2252 
2253 // Calling Convention Implementation.
2254 // The expectations for frontend ABI lowering vary from target to target.
2255 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
2256 // details, but this is a longer term goal. For now, we simply try to keep the
2257 // role of the frontend as simple and well-defined as possible. The rules can
2258 // be summarised as:
2259 // * Never split up large scalar arguments. We handle them here.
2260 // * If a hardfloat calling convention is being used, and the struct may be
2261 // passed in a pair of registers (fp+fp, int+fp), and both registers are
2262 // available, then pass as two separate arguments. If either the GPRs or FPRs
2263 // are exhausted, then pass according to the rule below.
2264 // * If a struct could never be passed in registers or directly in a stack
2265 // slot (as it is larger than 2*XLEN and the floating point rules don't
2266 // apply), then pass it using a pointer with the byval attribute.
2267 // * If a struct is less than 2*XLEN, then coerce to either a two-element
2268 // word-sized array or a 2*XLEN scalar (depending on alignment).
2269 // * The frontend can determine whether a struct is returned by reference or
2270 // not based on its size and fields. If it will be returned by reference, the
2271 // frontend must modify the prototype so a pointer with the sret annotation is
2272 // passed as the first argument. This is not necessary for large scalar
2273 // returns.
2274 // * Struct return values and varargs should be coerced to structs containing
2275 // register-size fields in the same situations they would be for fixed
2276 // arguments.
2277 
2278 static const MCPhysReg ArgGPRs[] = {
2279   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
2280   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
2281 };
2282 static const MCPhysReg ArgFPR16s[] = {
2283   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
2284   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
2285 };
2286 static const MCPhysReg ArgFPR32s[] = {
2287   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
2288   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
2289 };
2290 static const MCPhysReg ArgFPR64s[] = {
2291   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
2292   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
2293 };
2294 // This is an interim calling convention and it may be changed in the future.
2295 static const MCPhysReg ArgVRs[] = {
2296   RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, RISCV::V20,
2297   RISCV::V21, RISCV::V22, RISCV::V23
2298 };
2299 static const MCPhysReg ArgVRM2s[] = {
2300   RISCV::V16M2, RISCV::V18M2, RISCV::V20M2, RISCV::V22M2
2301 };
2302 static const MCPhysReg ArgVRM4s[] = {RISCV::V16M4, RISCV::V20M4};
2303 static const MCPhysReg ArgVRM8s[] = {RISCV::V16M8};
2304 
2305 // Pass a 2*XLEN argument that has been split into two XLEN values through
2306 // registers or the stack as necessary.
2307 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
2308                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
2309                                 MVT ValVT2, MVT LocVT2,
2310                                 ISD::ArgFlagsTy ArgFlags2) {
2311   unsigned XLenInBytes = XLen / 8;
2312   if (Register Reg = State.AllocateReg(ArgGPRs)) {
2313     // At least one half can be passed via register.
2314     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
2315                                      VA1.getLocVT(), CCValAssign::Full));
2316   } else {
2317     // Both halves must be passed on the stack, with proper alignment.
2318     Align StackAlign =
2319         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
2320     State.addLoc(
2321         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
2322                             State.AllocateStack(XLenInBytes, StackAlign),
2323                             VA1.getLocVT(), CCValAssign::Full));
2324     State.addLoc(CCValAssign::getMem(
2325         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
2326         LocVT2, CCValAssign::Full));
2327     return false;
2328   }
2329 
2330   if (Register Reg = State.AllocateReg(ArgGPRs)) {
2331     // The second half can also be passed via register.
2332     State.addLoc(
2333         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
2334   } else {
2335     // The second half is passed via the stack, without additional alignment.
2336     State.addLoc(CCValAssign::getMem(
2337         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
2338         LocVT2, CCValAssign::Full));
2339   }
2340 
2341   return false;
2342 }
2343 
2344 // Implements the RISC-V calling convention. Returns true upon failure.
2345 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
2346                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
2347                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
2348                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
2349                      Optional<unsigned> FirstMaskArgument) {
2350   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
2351   assert(XLen == 32 || XLen == 64);
2352   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
2353 
2354   // Any return value split in to more than two values can't be returned
2355   // directly.
2356   if (IsRet && ValNo > 1)
2357     return true;
2358 
2359   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
2360   // variadic argument, or if no F16/F32 argument registers are available.
2361   bool UseGPRForF16_F32 = true;
2362   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
2363   // variadic argument, or if no F64 argument registers are available.
2364   bool UseGPRForF64 = true;
2365 
2366   switch (ABI) {
2367   default:
2368     llvm_unreachable("Unexpected ABI");
2369   case RISCVABI::ABI_ILP32:
2370   case RISCVABI::ABI_LP64:
2371     break;
2372   case RISCVABI::ABI_ILP32F:
2373   case RISCVABI::ABI_LP64F:
2374     UseGPRForF16_F32 = !IsFixed;
2375     break;
2376   case RISCVABI::ABI_ILP32D:
2377   case RISCVABI::ABI_LP64D:
2378     UseGPRForF16_F32 = !IsFixed;
2379     UseGPRForF64 = !IsFixed;
2380     break;
2381   }
2382 
2383   // FPR16, FPR32, and FPR64 alias each other.
2384   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
2385     UseGPRForF16_F32 = true;
2386     UseGPRForF64 = true;
2387   }
2388 
2389   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
2390   // similar local variables rather than directly checking against the target
2391   // ABI.
2392 
2393   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
2394     LocVT = XLenVT;
2395     LocInfo = CCValAssign::BCvt;
2396   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
2397     LocVT = MVT::i64;
2398     LocInfo = CCValAssign::BCvt;
2399   }
2400 
2401   // If this is a variadic argument, the RISC-V calling convention requires
2402   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
2403   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
2404   // be used regardless of whether the original argument was split during
2405   // legalisation or not. The argument will not be passed by registers if the
2406   // original type is larger than 2*XLEN, so the register alignment rule does
2407   // not apply.
2408   unsigned TwoXLenInBytes = (2 * XLen) / 8;
2409   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
2410       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
2411     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
2412     // Skip 'odd' register if necessary.
2413     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
2414       State.AllocateReg(ArgGPRs);
2415   }
2416 
2417   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
2418   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
2419       State.getPendingArgFlags();
2420 
2421   assert(PendingLocs.size() == PendingArgFlags.size() &&
2422          "PendingLocs and PendingArgFlags out of sync");
2423 
2424   // Handle passing f64 on RV32D with a soft float ABI or when floating point
2425   // registers are exhausted.
2426   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
2427     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
2428            "Can't lower f64 if it is split");
2429     // Depending on available argument GPRS, f64 may be passed in a pair of
2430     // GPRs, split between a GPR and the stack, or passed completely on the
2431     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
2432     // cases.
2433     Register Reg = State.AllocateReg(ArgGPRs);
2434     LocVT = MVT::i32;
2435     if (!Reg) {
2436       unsigned StackOffset = State.AllocateStack(8, Align(8));
2437       State.addLoc(
2438           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
2439       return false;
2440     }
2441     if (!State.AllocateReg(ArgGPRs))
2442       State.AllocateStack(4, Align(4));
2443     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2444     return false;
2445   }
2446 
2447   // Split arguments might be passed indirectly, so keep track of the pending
2448   // values.
2449   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
2450     LocVT = XLenVT;
2451     LocInfo = CCValAssign::Indirect;
2452     PendingLocs.push_back(
2453         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
2454     PendingArgFlags.push_back(ArgFlags);
2455     if (!ArgFlags.isSplitEnd()) {
2456       return false;
2457     }
2458   }
2459 
2460   // If the split argument only had two elements, it should be passed directly
2461   // in registers or on the stack.
2462   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
2463     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
2464     // Apply the normal calling convention rules to the first half of the
2465     // split argument.
2466     CCValAssign VA = PendingLocs[0];
2467     ISD::ArgFlagsTy AF = PendingArgFlags[0];
2468     PendingLocs.clear();
2469     PendingArgFlags.clear();
2470     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
2471                                ArgFlags);
2472   }
2473 
2474   // Allocate to a register if possible, or else a stack slot.
2475   Register Reg;
2476   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
2477     Reg = State.AllocateReg(ArgFPR16s);
2478   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
2479     Reg = State.AllocateReg(ArgFPR32s);
2480   else if (ValVT == MVT::f64 && !UseGPRForF64)
2481     Reg = State.AllocateReg(ArgFPR64s);
2482   else if (ValVT.isScalableVector()) {
2483     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
2484     if (RC == &RISCV::VRRegClass) {
2485       // Assign the first mask argument to V0.
2486       // This is an interim calling convention and it may be changed in the
2487       // future.
2488       if (FirstMaskArgument.hasValue() &&
2489           ValNo == FirstMaskArgument.getValue()) {
2490         Reg = State.AllocateReg(RISCV::V0);
2491       } else {
2492         Reg = State.AllocateReg(ArgVRs);
2493       }
2494     } else if (RC == &RISCV::VRM2RegClass) {
2495       Reg = State.AllocateReg(ArgVRM2s);
2496     } else if (RC == &RISCV::VRM4RegClass) {
2497       Reg = State.AllocateReg(ArgVRM4s);
2498     } else if (RC == &RISCV::VRM8RegClass) {
2499       Reg = State.AllocateReg(ArgVRM8s);
2500     } else {
2501       llvm_unreachable("Unhandled class register for ValueType");
2502     }
2503     if (!Reg) {
2504       LocInfo = CCValAssign::Indirect;
2505       // Try using a GPR to pass the address
2506       Reg = State.AllocateReg(ArgGPRs);
2507       LocVT = XLenVT;
2508     }
2509   } else
2510     Reg = State.AllocateReg(ArgGPRs);
2511   unsigned StackOffset =
2512       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
2513 
2514   // If we reach this point and PendingLocs is non-empty, we must be at the
2515   // end of a split argument that must be passed indirectly.
2516   if (!PendingLocs.empty()) {
2517     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
2518     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
2519 
2520     for (auto &It : PendingLocs) {
2521       if (Reg)
2522         It.convertToReg(Reg);
2523       else
2524         It.convertToMem(StackOffset);
2525       State.addLoc(It);
2526     }
2527     PendingLocs.clear();
2528     PendingArgFlags.clear();
2529     return false;
2530   }
2531 
2532   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
2533           (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
2534          "Expected an XLenVT or scalable vector types at this stage");
2535 
2536   if (Reg) {
2537     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2538     return false;
2539   }
2540 
2541   // When a floating-point value is passed on the stack, no bit-conversion is
2542   // needed.
2543   if (ValVT.isFloatingPoint()) {
2544     LocVT = ValVT;
2545     LocInfo = CCValAssign::Full;
2546   }
2547   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
2548   return false;
2549 }
2550 
2551 template <typename ArgTy>
2552 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
2553   for (const auto &ArgIdx : enumerate(Args)) {
2554     MVT ArgVT = ArgIdx.value().VT;
2555     if (ArgVT.isScalableVector() &&
2556         ArgVT.getVectorElementType().SimpleTy == MVT::i1)
2557       return ArgIdx.index();
2558   }
2559   return None;
2560 }
2561 
2562 void RISCVTargetLowering::analyzeInputArgs(
2563     MachineFunction &MF, CCState &CCInfo,
2564     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
2565   unsigned NumArgs = Ins.size();
2566   FunctionType *FType = MF.getFunction().getFunctionType();
2567 
2568   Optional<unsigned> FirstMaskArgument;
2569   if (Subtarget.hasStdExtV())
2570     FirstMaskArgument = preAssignMask(Ins);
2571 
2572   for (unsigned i = 0; i != NumArgs; ++i) {
2573     MVT ArgVT = Ins[i].VT;
2574     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
2575 
2576     Type *ArgTy = nullptr;
2577     if (IsRet)
2578       ArgTy = FType->getReturnType();
2579     else if (Ins[i].isOrigArg())
2580       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
2581 
2582     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2583     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
2584                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
2585                  FirstMaskArgument)) {
2586       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
2587                         << EVT(ArgVT).getEVTString() << '\n');
2588       llvm_unreachable(nullptr);
2589     }
2590   }
2591 }
2592 
2593 void RISCVTargetLowering::analyzeOutputArgs(
2594     MachineFunction &MF, CCState &CCInfo,
2595     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
2596     CallLoweringInfo *CLI) const {
2597   unsigned NumArgs = Outs.size();
2598 
2599   Optional<unsigned> FirstMaskArgument;
2600   if (Subtarget.hasStdExtV())
2601     FirstMaskArgument = preAssignMask(Outs);
2602 
2603   for (unsigned i = 0; i != NumArgs; i++) {
2604     MVT ArgVT = Outs[i].VT;
2605     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2606     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
2607 
2608     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2609     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
2610                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
2611                  FirstMaskArgument)) {
2612       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
2613                         << EVT(ArgVT).getEVTString() << "\n");
2614       llvm_unreachable(nullptr);
2615     }
2616   }
2617 }
2618 
2619 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
2620 // values.
2621 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
2622                                    const CCValAssign &VA, const SDLoc &DL) {
2623   switch (VA.getLocInfo()) {
2624   default:
2625     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2626   case CCValAssign::Full:
2627     break;
2628   case CCValAssign::BCvt:
2629     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
2630       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
2631     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2632       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
2633     else
2634       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
2635     break;
2636   }
2637   return Val;
2638 }
2639 
2640 // The caller is responsible for loading the full value if the argument is
2641 // passed with CCValAssign::Indirect.
2642 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
2643                                 const CCValAssign &VA, const SDLoc &DL,
2644                                 const RISCVTargetLowering &TLI) {
2645   MachineFunction &MF = DAG.getMachineFunction();
2646   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2647   EVT LocVT = VA.getLocVT();
2648   SDValue Val;
2649   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
2650   Register VReg = RegInfo.createVirtualRegister(RC);
2651   RegInfo.addLiveIn(VA.getLocReg(), VReg);
2652   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2653 
2654   if (VA.getLocInfo() == CCValAssign::Indirect)
2655     return Val;
2656 
2657   return convertLocVTToValVT(DAG, Val, VA, DL);
2658 }
2659 
2660 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
2661                                    const CCValAssign &VA, const SDLoc &DL) {
2662   EVT LocVT = VA.getLocVT();
2663 
2664   switch (VA.getLocInfo()) {
2665   default:
2666     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2667   case CCValAssign::Full:
2668     break;
2669   case CCValAssign::BCvt:
2670     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
2671       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
2672     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2673       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
2674     else
2675       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
2676     break;
2677   }
2678   return Val;
2679 }
2680 
2681 // The caller is responsible for loading the full value if the argument is
2682 // passed with CCValAssign::Indirect.
2683 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
2684                                 const CCValAssign &VA, const SDLoc &DL) {
2685   MachineFunction &MF = DAG.getMachineFunction();
2686   MachineFrameInfo &MFI = MF.getFrameInfo();
2687   EVT LocVT = VA.getLocVT();
2688   EVT ValVT = VA.getValVT();
2689   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
2690   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2691                                  VA.getLocMemOffset(), /*Immutable=*/true);
2692   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2693   SDValue Val;
2694 
2695   ISD::LoadExtType ExtType;
2696   switch (VA.getLocInfo()) {
2697   default:
2698     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2699   case CCValAssign::Full:
2700   case CCValAssign::Indirect:
2701   case CCValAssign::BCvt:
2702     ExtType = ISD::NON_EXTLOAD;
2703     break;
2704   }
2705   Val = DAG.getExtLoad(
2706       ExtType, DL, LocVT, Chain, FIN,
2707       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
2708   return Val;
2709 }
2710 
2711 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
2712                                        const CCValAssign &VA, const SDLoc &DL) {
2713   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
2714          "Unexpected VA");
2715   MachineFunction &MF = DAG.getMachineFunction();
2716   MachineFrameInfo &MFI = MF.getFrameInfo();
2717   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2718 
2719   if (VA.isMemLoc()) {
2720     // f64 is passed on the stack.
2721     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
2722     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
2723     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
2724                        MachinePointerInfo::getFixedStack(MF, FI));
2725   }
2726 
2727   assert(VA.isRegLoc() && "Expected register VA assignment");
2728 
2729   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2730   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
2731   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
2732   SDValue Hi;
2733   if (VA.getLocReg() == RISCV::X17) {
2734     // Second half of f64 is passed on the stack.
2735     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
2736     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
2737     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
2738                      MachinePointerInfo::getFixedStack(MF, FI));
2739   } else {
2740     // Second half of f64 is passed in another GPR.
2741     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2742     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
2743     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
2744   }
2745   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
2746 }
2747 
2748 // FastCC has less than 1% performance improvement for some particular
2749 // benchmark. But theoretically, it may has benenfit for some cases.
2750 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
2751                             CCValAssign::LocInfo LocInfo,
2752                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
2753 
2754   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2755     // X5 and X6 might be used for save-restore libcall.
2756     static const MCPhysReg GPRList[] = {
2757         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
2758         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
2759         RISCV::X29, RISCV::X30, RISCV::X31};
2760     if (unsigned Reg = State.AllocateReg(GPRList)) {
2761       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2762       return false;
2763     }
2764   }
2765 
2766   if (LocVT == MVT::f16) {
2767     static const MCPhysReg FPR16List[] = {
2768         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
2769         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
2770         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
2771         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
2772     if (unsigned Reg = State.AllocateReg(FPR16List)) {
2773       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2774       return false;
2775     }
2776   }
2777 
2778   if (LocVT == MVT::f32) {
2779     static const MCPhysReg FPR32List[] = {
2780         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
2781         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
2782         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
2783         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
2784     if (unsigned Reg = State.AllocateReg(FPR32List)) {
2785       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2786       return false;
2787     }
2788   }
2789 
2790   if (LocVT == MVT::f64) {
2791     static const MCPhysReg FPR64List[] = {
2792         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
2793         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
2794         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
2795         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
2796     if (unsigned Reg = State.AllocateReg(FPR64List)) {
2797       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2798       return false;
2799     }
2800   }
2801 
2802   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
2803     unsigned Offset4 = State.AllocateStack(4, Align(4));
2804     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
2805     return false;
2806   }
2807 
2808   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
2809     unsigned Offset5 = State.AllocateStack(8, Align(8));
2810     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
2811     return false;
2812   }
2813 
2814   return true; // CC didn't match.
2815 }
2816 
2817 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
2818                          CCValAssign::LocInfo LocInfo,
2819                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
2820 
2821   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2822     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
2823     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
2824     static const MCPhysReg GPRList[] = {
2825         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
2826         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
2827     if (unsigned Reg = State.AllocateReg(GPRList)) {
2828       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2829       return false;
2830     }
2831   }
2832 
2833   if (LocVT == MVT::f32) {
2834     // Pass in STG registers: F1, ..., F6
2835     //                        fs0 ... fs5
2836     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
2837                                           RISCV::F18_F, RISCV::F19_F,
2838                                           RISCV::F20_F, RISCV::F21_F};
2839     if (unsigned Reg = State.AllocateReg(FPR32List)) {
2840       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2841       return false;
2842     }
2843   }
2844 
2845   if (LocVT == MVT::f64) {
2846     // Pass in STG registers: D1, ..., D6
2847     //                        fs6 ... fs11
2848     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
2849                                           RISCV::F24_D, RISCV::F25_D,
2850                                           RISCV::F26_D, RISCV::F27_D};
2851     if (unsigned Reg = State.AllocateReg(FPR64List)) {
2852       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2853       return false;
2854     }
2855   }
2856 
2857   report_fatal_error("No registers left in GHC calling convention");
2858   return true;
2859 }
2860 
2861 // Transform physical registers into virtual registers.
2862 SDValue RISCVTargetLowering::LowerFormalArguments(
2863     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2864     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2865     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2866 
2867   MachineFunction &MF = DAG.getMachineFunction();
2868 
2869   switch (CallConv) {
2870   default:
2871     report_fatal_error("Unsupported calling convention");
2872   case CallingConv::C:
2873   case CallingConv::Fast:
2874     break;
2875   case CallingConv::GHC:
2876     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
2877         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
2878       report_fatal_error(
2879         "GHC calling convention requires the F and D instruction set extensions");
2880   }
2881 
2882   const Function &Func = MF.getFunction();
2883   if (Func.hasFnAttribute("interrupt")) {
2884     if (!Func.arg_empty())
2885       report_fatal_error(
2886         "Functions with the interrupt attribute cannot have arguments!");
2887 
2888     StringRef Kind =
2889       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
2890 
2891     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
2892       report_fatal_error(
2893         "Function interrupt attribute argument not supported!");
2894   }
2895 
2896   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2897   MVT XLenVT = Subtarget.getXLenVT();
2898   unsigned XLenInBytes = Subtarget.getXLen() / 8;
2899   // Used with vargs to acumulate store chains.
2900   std::vector<SDValue> OutChains;
2901 
2902   // Assign locations to all of the incoming arguments.
2903   SmallVector<CCValAssign, 16> ArgLocs;
2904   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2905 
2906   if (CallConv == CallingConv::Fast)
2907     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
2908   else if (CallConv == CallingConv::GHC)
2909     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
2910   else
2911     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
2912 
2913   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2914     CCValAssign &VA = ArgLocs[i];
2915     SDValue ArgValue;
2916     // Passing f64 on RV32D with a soft float ABI must be handled as a special
2917     // case.
2918     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
2919       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
2920     else if (VA.isRegLoc())
2921       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
2922     else
2923       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
2924 
2925     if (VA.getLocInfo() == CCValAssign::Indirect) {
2926       // If the original argument was split and passed by reference (e.g. i128
2927       // on RV32), we need to load all parts of it here (using the same
2928       // address).
2929       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2930                                    MachinePointerInfo()));
2931       unsigned ArgIndex = Ins[i].OrigArgIndex;
2932       assert(Ins[i].PartOffset == 0);
2933       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
2934         CCValAssign &PartVA = ArgLocs[i + 1];
2935         unsigned PartOffset = Ins[i + 1].PartOffset;
2936         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2937                                       DAG.getIntPtrConstant(PartOffset, DL));
2938         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2939                                      MachinePointerInfo()));
2940         ++i;
2941       }
2942       continue;
2943     }
2944     InVals.push_back(ArgValue);
2945   }
2946 
2947   if (IsVarArg) {
2948     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
2949     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
2950     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
2951     MachineFrameInfo &MFI = MF.getFrameInfo();
2952     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2953     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
2954 
2955     // Offset of the first variable argument from stack pointer, and size of
2956     // the vararg save area. For now, the varargs save area is either zero or
2957     // large enough to hold a0-a7.
2958     int VaArgOffset, VarArgsSaveSize;
2959 
2960     // If all registers are allocated, then all varargs must be passed on the
2961     // stack and we don't need to save any argregs.
2962     if (ArgRegs.size() == Idx) {
2963       VaArgOffset = CCInfo.getNextStackOffset();
2964       VarArgsSaveSize = 0;
2965     } else {
2966       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
2967       VaArgOffset = -VarArgsSaveSize;
2968     }
2969 
2970     // Record the frame index of the first variable argument
2971     // which is a value necessary to VASTART.
2972     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
2973     RVFI->setVarArgsFrameIndex(FI);
2974 
2975     // If saving an odd number of registers then create an extra stack slot to
2976     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
2977     // offsets to even-numbered registered remain 2*XLEN-aligned.
2978     if (Idx % 2) {
2979       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
2980       VarArgsSaveSize += XLenInBytes;
2981     }
2982 
2983     // Copy the integer registers that may have been used for passing varargs
2984     // to the vararg save area.
2985     for (unsigned I = Idx; I < ArgRegs.size();
2986          ++I, VaArgOffset += XLenInBytes) {
2987       const Register Reg = RegInfo.createVirtualRegister(RC);
2988       RegInfo.addLiveIn(ArgRegs[I], Reg);
2989       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
2990       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
2991       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2992       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
2993                                    MachinePointerInfo::getFixedStack(MF, FI));
2994       cast<StoreSDNode>(Store.getNode())
2995           ->getMemOperand()
2996           ->setValue((Value *)nullptr);
2997       OutChains.push_back(Store);
2998     }
2999     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
3000   }
3001 
3002   // All stores are grouped in one node to allow the matching between
3003   // the size of Ins and InVals. This only happens for vararg functions.
3004   if (!OutChains.empty()) {
3005     OutChains.push_back(Chain);
3006     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3007   }
3008 
3009   return Chain;
3010 }
3011 
3012 /// isEligibleForTailCallOptimization - Check whether the call is eligible
3013 /// for tail call optimization.
3014 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
3015 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
3016     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
3017     const SmallVector<CCValAssign, 16> &ArgLocs) const {
3018 
3019   auto &Callee = CLI.Callee;
3020   auto CalleeCC = CLI.CallConv;
3021   auto &Outs = CLI.Outs;
3022   auto &Caller = MF.getFunction();
3023   auto CallerCC = Caller.getCallingConv();
3024 
3025   // Exception-handling functions need a special set of instructions to
3026   // indicate a return to the hardware. Tail-calling another function would
3027   // probably break this.
3028   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
3029   // should be expanded as new function attributes are introduced.
3030   if (Caller.hasFnAttribute("interrupt"))
3031     return false;
3032 
3033   // Do not tail call opt if the stack is used to pass parameters.
3034   if (CCInfo.getNextStackOffset() != 0)
3035     return false;
3036 
3037   // Do not tail call opt if any parameters need to be passed indirectly.
3038   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
3039   // passed indirectly. So the address of the value will be passed in a
3040   // register, or if not available, then the address is put on the stack. In
3041   // order to pass indirectly, space on the stack often needs to be allocated
3042   // in order to store the value. In this case the CCInfo.getNextStackOffset()
3043   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
3044   // are passed CCValAssign::Indirect.
3045   for (auto &VA : ArgLocs)
3046     if (VA.getLocInfo() == CCValAssign::Indirect)
3047       return false;
3048 
3049   // Do not tail call opt if either caller or callee uses struct return
3050   // semantics.
3051   auto IsCallerStructRet = Caller.hasStructRetAttr();
3052   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3053   if (IsCallerStructRet || IsCalleeStructRet)
3054     return false;
3055 
3056   // Externally-defined functions with weak linkage should not be
3057   // tail-called. The behaviour of branch instructions in this situation (as
3058   // used for tail calls) is implementation-defined, so we cannot rely on the
3059   // linker replacing the tail call with a return.
3060   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3061     const GlobalValue *GV = G->getGlobal();
3062     if (GV->hasExternalWeakLinkage())
3063       return false;
3064   }
3065 
3066   // The callee has to preserve all registers the caller needs to preserve.
3067   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3068   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3069   if (CalleeCC != CallerCC) {
3070     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3071     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3072       return false;
3073   }
3074 
3075   // Byval parameters hand the function a pointer directly into the stack area
3076   // we want to reuse during a tail call. Working around this *is* possible
3077   // but less efficient and uglier in LowerCall.
3078   for (auto &Arg : Outs)
3079     if (Arg.Flags.isByVal())
3080       return false;
3081 
3082   return true;
3083 }
3084 
3085 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
3086 // and output parameter nodes.
3087 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
3088                                        SmallVectorImpl<SDValue> &InVals) const {
3089   SelectionDAG &DAG = CLI.DAG;
3090   SDLoc &DL = CLI.DL;
3091   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3092   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3093   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3094   SDValue Chain = CLI.Chain;
3095   SDValue Callee = CLI.Callee;
3096   bool &IsTailCall = CLI.IsTailCall;
3097   CallingConv::ID CallConv = CLI.CallConv;
3098   bool IsVarArg = CLI.IsVarArg;
3099   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3100   MVT XLenVT = Subtarget.getXLenVT();
3101 
3102   MachineFunction &MF = DAG.getMachineFunction();
3103 
3104   // Analyze the operands of the call, assigning locations to each operand.
3105   SmallVector<CCValAssign, 16> ArgLocs;
3106   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3107 
3108   if (CallConv == CallingConv::Fast)
3109     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
3110   else if (CallConv == CallingConv::GHC)
3111     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
3112   else
3113     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
3114 
3115   // Check if it's really possible to do a tail call.
3116   if (IsTailCall)
3117     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
3118 
3119   if (IsTailCall)
3120     ++NumTailCalls;
3121   else if (CLI.CB && CLI.CB->isMustTailCall())
3122     report_fatal_error("failed to perform tail call elimination on a call "
3123                        "site marked musttail");
3124 
3125   // Get a count of how many bytes are to be pushed on the stack.
3126   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
3127 
3128   // Create local copies for byval args
3129   SmallVector<SDValue, 8> ByValArgs;
3130   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3131     ISD::ArgFlagsTy Flags = Outs[i].Flags;
3132     if (!Flags.isByVal())
3133       continue;
3134 
3135     SDValue Arg = OutVals[i];
3136     unsigned Size = Flags.getByValSize();
3137     Align Alignment = Flags.getNonZeroByValAlign();
3138 
3139     int FI =
3140         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
3141     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3142     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
3143 
3144     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
3145                           /*IsVolatile=*/false,
3146                           /*AlwaysInline=*/false, IsTailCall,
3147                           MachinePointerInfo(), MachinePointerInfo());
3148     ByValArgs.push_back(FIPtr);
3149   }
3150 
3151   if (!IsTailCall)
3152     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
3153 
3154   // Copy argument values to their designated locations.
3155   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
3156   SmallVector<SDValue, 8> MemOpChains;
3157   SDValue StackPtr;
3158   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
3159     CCValAssign &VA = ArgLocs[i];
3160     SDValue ArgValue = OutVals[i];
3161     ISD::ArgFlagsTy Flags = Outs[i].Flags;
3162 
3163     // Handle passing f64 on RV32D with a soft float ABI as a special case.
3164     bool IsF64OnRV32DSoftABI =
3165         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
3166     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
3167       SDValue SplitF64 = DAG.getNode(
3168           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
3169       SDValue Lo = SplitF64.getValue(0);
3170       SDValue Hi = SplitF64.getValue(1);
3171 
3172       Register RegLo = VA.getLocReg();
3173       RegsToPass.push_back(std::make_pair(RegLo, Lo));
3174 
3175       if (RegLo == RISCV::X17) {
3176         // Second half of f64 is passed on the stack.
3177         // Work out the address of the stack slot.
3178         if (!StackPtr.getNode())
3179           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
3180         // Emit the store.
3181         MemOpChains.push_back(
3182             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
3183       } else {
3184         // Second half of f64 is passed in another GPR.
3185         assert(RegLo < RISCV::X31 && "Invalid register pair");
3186         Register RegHigh = RegLo + 1;
3187         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
3188       }
3189       continue;
3190     }
3191 
3192     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
3193     // as any other MemLoc.
3194 
3195     // Promote the value if needed.
3196     // For now, only handle fully promoted and indirect arguments.
3197     if (VA.getLocInfo() == CCValAssign::Indirect) {
3198       // Store the argument in a stack slot and pass its address.
3199       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
3200       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3201       MemOpChains.push_back(
3202           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
3203                        MachinePointerInfo::getFixedStack(MF, FI)));
3204       // If the original argument was split (e.g. i128), we need
3205       // to store all parts of it here (and pass just one address).
3206       unsigned ArgIndex = Outs[i].OrigArgIndex;
3207       assert(Outs[i].PartOffset == 0);
3208       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
3209         SDValue PartValue = OutVals[i + 1];
3210         unsigned PartOffset = Outs[i + 1].PartOffset;
3211         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
3212                                       DAG.getIntPtrConstant(PartOffset, DL));
3213         MemOpChains.push_back(
3214             DAG.getStore(Chain, DL, PartValue, Address,
3215                          MachinePointerInfo::getFixedStack(MF, FI)));
3216         ++i;
3217       }
3218       ArgValue = SpillSlot;
3219     } else {
3220       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
3221     }
3222 
3223     // Use local copy if it is a byval arg.
3224     if (Flags.isByVal())
3225       ArgValue = ByValArgs[j++];
3226 
3227     if (VA.isRegLoc()) {
3228       // Queue up the argument copies and emit them at the end.
3229       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
3230     } else {
3231       assert(VA.isMemLoc() && "Argument not register or memory");
3232       assert(!IsTailCall && "Tail call not allowed if stack is used "
3233                             "for passing parameters");
3234 
3235       // Work out the address of the stack slot.
3236       if (!StackPtr.getNode())
3237         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
3238       SDValue Address =
3239           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
3240                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
3241 
3242       // Emit the store.
3243       MemOpChains.push_back(
3244           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
3245     }
3246   }
3247 
3248   // Join the stores, which are independent of one another.
3249   if (!MemOpChains.empty())
3250     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3251 
3252   SDValue Glue;
3253 
3254   // Build a sequence of copy-to-reg nodes, chained and glued together.
3255   for (auto &Reg : RegsToPass) {
3256     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
3257     Glue = Chain.getValue(1);
3258   }
3259 
3260   // Validate that none of the argument registers have been marked as
3261   // reserved, if so report an error. Do the same for the return address if this
3262   // is not a tailcall.
3263   validateCCReservedRegs(RegsToPass, MF);
3264   if (!IsTailCall &&
3265       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
3266     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3267         MF.getFunction(),
3268         "Return address register required, but has been reserved."});
3269 
3270   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
3271   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
3272   // split it and then direct call can be matched by PseudoCALL.
3273   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
3274     const GlobalValue *GV = S->getGlobal();
3275 
3276     unsigned OpFlags = RISCVII::MO_CALL;
3277     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
3278       OpFlags = RISCVII::MO_PLT;
3279 
3280     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
3281   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3282     unsigned OpFlags = RISCVII::MO_CALL;
3283 
3284     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
3285                                                  nullptr))
3286       OpFlags = RISCVII::MO_PLT;
3287 
3288     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
3289   }
3290 
3291   // The first call operand is the chain and the second is the target address.
3292   SmallVector<SDValue, 8> Ops;
3293   Ops.push_back(Chain);
3294   Ops.push_back(Callee);
3295 
3296   // Add argument registers to the end of the list so that they are
3297   // known live into the call.
3298   for (auto &Reg : RegsToPass)
3299     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
3300 
3301   if (!IsTailCall) {
3302     // Add a register mask operand representing the call-preserved registers.
3303     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3304     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
3305     assert(Mask && "Missing call preserved mask for calling convention");
3306     Ops.push_back(DAG.getRegisterMask(Mask));
3307   }
3308 
3309   // Glue the call to the argument copies, if any.
3310   if (Glue.getNode())
3311     Ops.push_back(Glue);
3312 
3313   // Emit the call.
3314   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3315 
3316   if (IsTailCall) {
3317     MF.getFrameInfo().setHasTailCall();
3318     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
3319   }
3320 
3321   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
3322   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
3323   Glue = Chain.getValue(1);
3324 
3325   // Mark the end of the call, which is glued to the call itself.
3326   Chain = DAG.getCALLSEQ_END(Chain,
3327                              DAG.getConstant(NumBytes, DL, PtrVT, true),
3328                              DAG.getConstant(0, DL, PtrVT, true),
3329                              Glue, DL);
3330   Glue = Chain.getValue(1);
3331 
3332   // Assign locations to each value returned by this call.
3333   SmallVector<CCValAssign, 16> RVLocs;
3334   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
3335   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
3336 
3337   // Copy all of the result registers out of their specified physreg.
3338   for (auto &VA : RVLocs) {
3339     // Copy the value out
3340     SDValue RetValue =
3341         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
3342     // Glue the RetValue to the end of the call sequence
3343     Chain = RetValue.getValue(1);
3344     Glue = RetValue.getValue(2);
3345 
3346     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
3347       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
3348       SDValue RetValue2 =
3349           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
3350       Chain = RetValue2.getValue(1);
3351       Glue = RetValue2.getValue(2);
3352       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
3353                              RetValue2);
3354     }
3355 
3356     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
3357 
3358     InVals.push_back(RetValue);
3359   }
3360 
3361   return Chain;
3362 }
3363 
3364 bool RISCVTargetLowering::CanLowerReturn(
3365     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
3366     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3367   SmallVector<CCValAssign, 16> RVLocs;
3368   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
3369 
3370   Optional<unsigned> FirstMaskArgument;
3371   if (Subtarget.hasStdExtV())
3372     FirstMaskArgument = preAssignMask(Outs);
3373 
3374   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3375     MVT VT = Outs[i].VT;
3376     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3377     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3378     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
3379                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
3380                  *this, FirstMaskArgument))
3381       return false;
3382   }
3383   return true;
3384 }
3385 
3386 SDValue
3387 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3388                                  bool IsVarArg,
3389                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
3390                                  const SmallVectorImpl<SDValue> &OutVals,
3391                                  const SDLoc &DL, SelectionDAG &DAG) const {
3392   const MachineFunction &MF = DAG.getMachineFunction();
3393   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
3394 
3395   // Stores the assignment of the return value to a location.
3396   SmallVector<CCValAssign, 16> RVLocs;
3397 
3398   // Info about the registers and stack slot.
3399   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
3400                  *DAG.getContext());
3401 
3402   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
3403                     nullptr);
3404 
3405   if (CallConv == CallingConv::GHC && !RVLocs.empty())
3406     report_fatal_error("GHC functions return void only");
3407 
3408   SDValue Glue;
3409   SmallVector<SDValue, 4> RetOps(1, Chain);
3410 
3411   // Copy the result values into the output registers.
3412   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
3413     SDValue Val = OutVals[i];
3414     CCValAssign &VA = RVLocs[i];
3415     assert(VA.isRegLoc() && "Can only return in registers!");
3416 
3417     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
3418       // Handle returning f64 on RV32D with a soft float ABI.
3419       assert(VA.isRegLoc() && "Expected return via registers");
3420       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
3421                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
3422       SDValue Lo = SplitF64.getValue(0);
3423       SDValue Hi = SplitF64.getValue(1);
3424       Register RegLo = VA.getLocReg();
3425       assert(RegLo < RISCV::X31 && "Invalid register pair");
3426       Register RegHi = RegLo + 1;
3427 
3428       if (STI.isRegisterReservedByUser(RegLo) ||
3429           STI.isRegisterReservedByUser(RegHi))
3430         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3431             MF.getFunction(),
3432             "Return value register required, but has been reserved."});
3433 
3434       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
3435       Glue = Chain.getValue(1);
3436       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
3437       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
3438       Glue = Chain.getValue(1);
3439       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
3440     } else {
3441       // Handle a 'normal' return.
3442       Val = convertValVTToLocVT(DAG, Val, VA, DL);
3443       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
3444 
3445       if (STI.isRegisterReservedByUser(VA.getLocReg()))
3446         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3447             MF.getFunction(),
3448             "Return value register required, but has been reserved."});
3449 
3450       // Guarantee that all emitted copies are stuck together.
3451       Glue = Chain.getValue(1);
3452       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3453     }
3454   }
3455 
3456   RetOps[0] = Chain; // Update chain.
3457 
3458   // Add the glue node if we have it.
3459   if (Glue.getNode()) {
3460     RetOps.push_back(Glue);
3461   }
3462 
3463   // Interrupt service routines use different return instructions.
3464   const Function &Func = DAG.getMachineFunction().getFunction();
3465   if (Func.hasFnAttribute("interrupt")) {
3466     if (!Func.getReturnType()->isVoidTy())
3467       report_fatal_error(
3468           "Functions with the interrupt attribute must have void return type!");
3469 
3470     MachineFunction &MF = DAG.getMachineFunction();
3471     StringRef Kind =
3472       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
3473 
3474     unsigned RetOpc;
3475     if (Kind == "user")
3476       RetOpc = RISCVISD::URET_FLAG;
3477     else if (Kind == "supervisor")
3478       RetOpc = RISCVISD::SRET_FLAG;
3479     else
3480       RetOpc = RISCVISD::MRET_FLAG;
3481 
3482     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
3483   }
3484 
3485   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
3486 }
3487 
3488 void RISCVTargetLowering::validateCCReservedRegs(
3489     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
3490     MachineFunction &MF) const {
3491   const Function &F = MF.getFunction();
3492   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
3493 
3494   if (llvm::any_of(Regs, [&STI](auto Reg) {
3495         return STI.isRegisterReservedByUser(Reg.first);
3496       }))
3497     F.getContext().diagnose(DiagnosticInfoUnsupported{
3498         F, "Argument register required, but has been reserved."});
3499 }
3500 
3501 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3502   return CI->isTailCall();
3503 }
3504 
3505 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
3506 #define NODE_NAME_CASE(NODE)                                                   \
3507   case RISCVISD::NODE:                                                         \
3508     return "RISCVISD::" #NODE;
3509   // clang-format off
3510   switch ((RISCVISD::NodeType)Opcode) {
3511   case RISCVISD::FIRST_NUMBER:
3512     break;
3513   NODE_NAME_CASE(RET_FLAG)
3514   NODE_NAME_CASE(URET_FLAG)
3515   NODE_NAME_CASE(SRET_FLAG)
3516   NODE_NAME_CASE(MRET_FLAG)
3517   NODE_NAME_CASE(CALL)
3518   NODE_NAME_CASE(SELECT_CC)
3519   NODE_NAME_CASE(BuildPairF64)
3520   NODE_NAME_CASE(SplitF64)
3521   NODE_NAME_CASE(TAIL)
3522   NODE_NAME_CASE(SLLW)
3523   NODE_NAME_CASE(SRAW)
3524   NODE_NAME_CASE(SRLW)
3525   NODE_NAME_CASE(DIVW)
3526   NODE_NAME_CASE(DIVUW)
3527   NODE_NAME_CASE(REMUW)
3528   NODE_NAME_CASE(ROLW)
3529   NODE_NAME_CASE(RORW)
3530   NODE_NAME_CASE(FSLW)
3531   NODE_NAME_CASE(FSRW)
3532   NODE_NAME_CASE(FMV_H_X)
3533   NODE_NAME_CASE(FMV_X_ANYEXTH)
3534   NODE_NAME_CASE(FMV_W_X_RV64)
3535   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
3536   NODE_NAME_CASE(READ_CYCLE_WIDE)
3537   NODE_NAME_CASE(GREVI)
3538   NODE_NAME_CASE(GREVIW)
3539   NODE_NAME_CASE(GORCI)
3540   NODE_NAME_CASE(GORCIW)
3541   NODE_NAME_CASE(VMV_X_S)
3542   NODE_NAME_CASE(SPLAT_VECTOR_I64)
3543   }
3544   // clang-format on
3545   return nullptr;
3546 #undef NODE_NAME_CASE
3547 }
3548 
3549 /// getConstraintType - Given a constraint letter, return the type of
3550 /// constraint it is for this target.
3551 RISCVTargetLowering::ConstraintType
3552 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
3553   if (Constraint.size() == 1) {
3554     switch (Constraint[0]) {
3555     default:
3556       break;
3557     case 'f':
3558       return C_RegisterClass;
3559     case 'I':
3560     case 'J':
3561     case 'K':
3562       return C_Immediate;
3563     case 'A':
3564       return C_Memory;
3565     }
3566   }
3567   return TargetLowering::getConstraintType(Constraint);
3568 }
3569 
3570 std::pair<unsigned, const TargetRegisterClass *>
3571 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3572                                                   StringRef Constraint,
3573                                                   MVT VT) const {
3574   // First, see if this is a constraint that directly corresponds to a
3575   // RISCV register class.
3576   if (Constraint.size() == 1) {
3577     switch (Constraint[0]) {
3578     case 'r':
3579       return std::make_pair(0U, &RISCV::GPRRegClass);
3580     case 'f':
3581       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
3582         return std::make_pair(0U, &RISCV::FPR16RegClass);
3583       if (Subtarget.hasStdExtF() && VT == MVT::f32)
3584         return std::make_pair(0U, &RISCV::FPR32RegClass);
3585       if (Subtarget.hasStdExtD() && VT == MVT::f64)
3586         return std::make_pair(0U, &RISCV::FPR64RegClass);
3587       break;
3588     default:
3589       break;
3590     }
3591   }
3592 
3593   // Clang will correctly decode the usage of register name aliases into their
3594   // official names. However, other frontends like `rustc` do not. This allows
3595   // users of these frontends to use the ABI names for registers in LLVM-style
3596   // register constraints.
3597   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
3598                                .Case("{zero}", RISCV::X0)
3599                                .Case("{ra}", RISCV::X1)
3600                                .Case("{sp}", RISCV::X2)
3601                                .Case("{gp}", RISCV::X3)
3602                                .Case("{tp}", RISCV::X4)
3603                                .Case("{t0}", RISCV::X5)
3604                                .Case("{t1}", RISCV::X6)
3605                                .Case("{t2}", RISCV::X7)
3606                                .Cases("{s0}", "{fp}", RISCV::X8)
3607                                .Case("{s1}", RISCV::X9)
3608                                .Case("{a0}", RISCV::X10)
3609                                .Case("{a1}", RISCV::X11)
3610                                .Case("{a2}", RISCV::X12)
3611                                .Case("{a3}", RISCV::X13)
3612                                .Case("{a4}", RISCV::X14)
3613                                .Case("{a5}", RISCV::X15)
3614                                .Case("{a6}", RISCV::X16)
3615                                .Case("{a7}", RISCV::X17)
3616                                .Case("{s2}", RISCV::X18)
3617                                .Case("{s3}", RISCV::X19)
3618                                .Case("{s4}", RISCV::X20)
3619                                .Case("{s5}", RISCV::X21)
3620                                .Case("{s6}", RISCV::X22)
3621                                .Case("{s7}", RISCV::X23)
3622                                .Case("{s8}", RISCV::X24)
3623                                .Case("{s9}", RISCV::X25)
3624                                .Case("{s10}", RISCV::X26)
3625                                .Case("{s11}", RISCV::X27)
3626                                .Case("{t3}", RISCV::X28)
3627                                .Case("{t4}", RISCV::X29)
3628                                .Case("{t5}", RISCV::X30)
3629                                .Case("{t6}", RISCV::X31)
3630                                .Default(RISCV::NoRegister);
3631   if (XRegFromAlias != RISCV::NoRegister)
3632     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
3633 
3634   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
3635   // TableGen record rather than the AsmName to choose registers for InlineAsm
3636   // constraints, plus we want to match those names to the widest floating point
3637   // register type available, manually select floating point registers here.
3638   //
3639   // The second case is the ABI name of the register, so that frontends can also
3640   // use the ABI names in register constraint lists.
3641   if (Subtarget.hasStdExtF()) {
3642     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
3643                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
3644                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
3645                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
3646                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
3647                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
3648                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
3649                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
3650                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
3651                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
3652                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
3653                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
3654                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
3655                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
3656                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
3657                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
3658                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
3659                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
3660                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
3661                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
3662                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
3663                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
3664                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
3665                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
3666                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
3667                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
3668                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
3669                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
3670                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
3671                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
3672                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
3673                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
3674                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
3675                         .Default(RISCV::NoRegister);
3676     if (FReg != RISCV::NoRegister) {
3677       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
3678       if (Subtarget.hasStdExtD()) {
3679         unsigned RegNo = FReg - RISCV::F0_F;
3680         unsigned DReg = RISCV::F0_D + RegNo;
3681         return std::make_pair(DReg, &RISCV::FPR64RegClass);
3682       }
3683       return std::make_pair(FReg, &RISCV::FPR32RegClass);
3684     }
3685   }
3686 
3687   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3688 }
3689 
3690 unsigned
3691 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
3692   // Currently only support length 1 constraints.
3693   if (ConstraintCode.size() == 1) {
3694     switch (ConstraintCode[0]) {
3695     case 'A':
3696       return InlineAsm::Constraint_A;
3697     default:
3698       break;
3699     }
3700   }
3701 
3702   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
3703 }
3704 
3705 void RISCVTargetLowering::LowerAsmOperandForConstraint(
3706     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
3707     SelectionDAG &DAG) const {
3708   // Currently only support length 1 constraints.
3709   if (Constraint.length() == 1) {
3710     switch (Constraint[0]) {
3711     case 'I':
3712       // Validate & create a 12-bit signed immediate operand.
3713       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3714         uint64_t CVal = C->getSExtValue();
3715         if (isInt<12>(CVal))
3716           Ops.push_back(
3717               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
3718       }
3719       return;
3720     case 'J':
3721       // Validate & create an integer zero operand.
3722       if (auto *C = dyn_cast<ConstantSDNode>(Op))
3723         if (C->getZExtValue() == 0)
3724           Ops.push_back(
3725               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
3726       return;
3727     case 'K':
3728       // Validate & create a 5-bit unsigned immediate operand.
3729       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3730         uint64_t CVal = C->getZExtValue();
3731         if (isUInt<5>(CVal))
3732           Ops.push_back(
3733               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
3734       }
3735       return;
3736     default:
3737       break;
3738     }
3739   }
3740   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3741 }
3742 
3743 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
3744                                                    Instruction *Inst,
3745                                                    AtomicOrdering Ord) const {
3746   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
3747     return Builder.CreateFence(Ord);
3748   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
3749     return Builder.CreateFence(AtomicOrdering::Release);
3750   return nullptr;
3751 }
3752 
3753 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
3754                                                     Instruction *Inst,
3755                                                     AtomicOrdering Ord) const {
3756   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
3757     return Builder.CreateFence(AtomicOrdering::Acquire);
3758   return nullptr;
3759 }
3760 
3761 TargetLowering::AtomicExpansionKind
3762 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
3763   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
3764   // point operations can't be used in an lr/sc sequence without breaking the
3765   // forward-progress guarantee.
3766   if (AI->isFloatingPointOperation())
3767     return AtomicExpansionKind::CmpXChg;
3768 
3769   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
3770   if (Size == 8 || Size == 16)
3771     return AtomicExpansionKind::MaskedIntrinsic;
3772   return AtomicExpansionKind::None;
3773 }
3774 
3775 static Intrinsic::ID
3776 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
3777   if (XLen == 32) {
3778     switch (BinOp) {
3779     default:
3780       llvm_unreachable("Unexpected AtomicRMW BinOp");
3781     case AtomicRMWInst::Xchg:
3782       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
3783     case AtomicRMWInst::Add:
3784       return Intrinsic::riscv_masked_atomicrmw_add_i32;
3785     case AtomicRMWInst::Sub:
3786       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
3787     case AtomicRMWInst::Nand:
3788       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
3789     case AtomicRMWInst::Max:
3790       return Intrinsic::riscv_masked_atomicrmw_max_i32;
3791     case AtomicRMWInst::Min:
3792       return Intrinsic::riscv_masked_atomicrmw_min_i32;
3793     case AtomicRMWInst::UMax:
3794       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
3795     case AtomicRMWInst::UMin:
3796       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
3797     }
3798   }
3799 
3800   if (XLen == 64) {
3801     switch (BinOp) {
3802     default:
3803       llvm_unreachable("Unexpected AtomicRMW BinOp");
3804     case AtomicRMWInst::Xchg:
3805       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
3806     case AtomicRMWInst::Add:
3807       return Intrinsic::riscv_masked_atomicrmw_add_i64;
3808     case AtomicRMWInst::Sub:
3809       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
3810     case AtomicRMWInst::Nand:
3811       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
3812     case AtomicRMWInst::Max:
3813       return Intrinsic::riscv_masked_atomicrmw_max_i64;
3814     case AtomicRMWInst::Min:
3815       return Intrinsic::riscv_masked_atomicrmw_min_i64;
3816     case AtomicRMWInst::UMax:
3817       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
3818     case AtomicRMWInst::UMin:
3819       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
3820     }
3821   }
3822 
3823   llvm_unreachable("Unexpected XLen\n");
3824 }
3825 
3826 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
3827     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
3828     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
3829   unsigned XLen = Subtarget.getXLen();
3830   Value *Ordering =
3831       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
3832   Type *Tys[] = {AlignedAddr->getType()};
3833   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
3834       AI->getModule(),
3835       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
3836 
3837   if (XLen == 64) {
3838     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
3839     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
3840     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
3841   }
3842 
3843   Value *Result;
3844 
3845   // Must pass the shift amount needed to sign extend the loaded value prior
3846   // to performing a signed comparison for min/max. ShiftAmt is the number of
3847   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
3848   // is the number of bits to left+right shift the value in order to
3849   // sign-extend.
3850   if (AI->getOperation() == AtomicRMWInst::Min ||
3851       AI->getOperation() == AtomicRMWInst::Max) {
3852     const DataLayout &DL = AI->getModule()->getDataLayout();
3853     unsigned ValWidth =
3854         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
3855     Value *SextShamt =
3856         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
3857     Result = Builder.CreateCall(LrwOpScwLoop,
3858                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
3859   } else {
3860     Result =
3861         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
3862   }
3863 
3864   if (XLen == 64)
3865     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
3866   return Result;
3867 }
3868 
3869 TargetLowering::AtomicExpansionKind
3870 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
3871     AtomicCmpXchgInst *CI) const {
3872   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
3873   if (Size == 8 || Size == 16)
3874     return AtomicExpansionKind::MaskedIntrinsic;
3875   return AtomicExpansionKind::None;
3876 }
3877 
3878 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
3879     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
3880     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
3881   unsigned XLen = Subtarget.getXLen();
3882   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
3883   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
3884   if (XLen == 64) {
3885     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
3886     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
3887     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
3888     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
3889   }
3890   Type *Tys[] = {AlignedAddr->getType()};
3891   Function *MaskedCmpXchg =
3892       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
3893   Value *Result = Builder.CreateCall(
3894       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
3895   if (XLen == 64)
3896     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
3897   return Result;
3898 }
3899 
3900 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
3901                                                      EVT VT) const {
3902   VT = VT.getScalarType();
3903 
3904   if (!VT.isSimple())
3905     return false;
3906 
3907   switch (VT.getSimpleVT().SimpleTy) {
3908   case MVT::f16:
3909     return Subtarget.hasStdExtZfh();
3910   case MVT::f32:
3911     return Subtarget.hasStdExtF();
3912   case MVT::f64:
3913     return Subtarget.hasStdExtD();
3914   default:
3915     break;
3916   }
3917 
3918   return false;
3919 }
3920 
3921 Register RISCVTargetLowering::getExceptionPointerRegister(
3922     const Constant *PersonalityFn) const {
3923   return RISCV::X10;
3924 }
3925 
3926 Register RISCVTargetLowering::getExceptionSelectorRegister(
3927     const Constant *PersonalityFn) const {
3928   return RISCV::X11;
3929 }
3930 
3931 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
3932   // Return false to suppress the unnecessary extensions if the LibCall
3933   // arguments or return value is f32 type for LP64 ABI.
3934   RISCVABI::ABI ABI = Subtarget.getTargetABI();
3935   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
3936     return false;
3937 
3938   return true;
3939 }
3940 
3941 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
3942                                                  SDValue C) const {
3943   // Check integral scalar types.
3944   if (VT.isScalarInteger()) {
3945     // Omit the optimization if the sub target has the M extension and the data
3946     // size exceeds XLen.
3947     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
3948       return false;
3949     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
3950       // Break the MUL to a SLLI and an ADD/SUB.
3951       const APInt &Imm = ConstNode->getAPIntValue();
3952       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
3953           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
3954         return true;
3955       // Omit the following optimization if the sub target has the M extension
3956       // and the data size >= XLen.
3957       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
3958         return false;
3959       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
3960       // a pair of LUI/ADDI.
3961       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
3962         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
3963         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
3964             (1 - ImmS).isPowerOf2())
3965         return true;
3966       }
3967     }
3968   }
3969 
3970   return false;
3971 }
3972 
3973 #define GET_REGISTER_MATCHER
3974 #include "RISCVGenAsmMatcher.inc"
3975 
3976 Register
3977 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
3978                                        const MachineFunction &MF) const {
3979   Register Reg = MatchRegisterAltName(RegName);
3980   if (Reg == RISCV::NoRegister)
3981     Reg = MatchRegisterName(RegName);
3982   if (Reg == RISCV::NoRegister)
3983     report_fatal_error(
3984         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
3985   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
3986   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
3987     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
3988                              StringRef(RegName) + "\"."));
3989   return Reg;
3990 }
3991 
3992 namespace llvm {
3993 namespace RISCVVIntrinsicsTable {
3994 
3995 #define GET_RISCVVIntrinsicsTable_IMPL
3996 #include "RISCVGenSearchableTables.inc"
3997 
3998 } // namespace RISCVVIntrinsicsTable
3999 } // namespace llvm
4000