1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtZfh())
87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88   if (Subtarget.hasStdExtF())
89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90   if (Subtarget.hasStdExtD())
91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93   if (Subtarget.hasStdExtV()) {
94     addRegisterClass(RISCVVMVTs::vbool64_t, &RISCV::VRRegClass);
95     addRegisterClass(RISCVVMVTs::vbool32_t, &RISCV::VRRegClass);
96     addRegisterClass(RISCVVMVTs::vbool16_t, &RISCV::VRRegClass);
97     addRegisterClass(RISCVVMVTs::vbool8_t, &RISCV::VRRegClass);
98     addRegisterClass(RISCVVMVTs::vbool4_t, &RISCV::VRRegClass);
99     addRegisterClass(RISCVVMVTs::vbool2_t, &RISCV::VRRegClass);
100     addRegisterClass(RISCVVMVTs::vbool1_t, &RISCV::VRRegClass);
101 
102     addRegisterClass(RISCVVMVTs::vint8mf8_t, &RISCV::VRRegClass);
103     addRegisterClass(RISCVVMVTs::vint8mf4_t, &RISCV::VRRegClass);
104     addRegisterClass(RISCVVMVTs::vint8mf2_t, &RISCV::VRRegClass);
105     addRegisterClass(RISCVVMVTs::vint8m1_t, &RISCV::VRRegClass);
106     addRegisterClass(RISCVVMVTs::vint8m2_t, &RISCV::VRM2RegClass);
107     addRegisterClass(RISCVVMVTs::vint8m4_t, &RISCV::VRM4RegClass);
108     addRegisterClass(RISCVVMVTs::vint8m8_t, &RISCV::VRM8RegClass);
109 
110     addRegisterClass(RISCVVMVTs::vint16mf4_t, &RISCV::VRRegClass);
111     addRegisterClass(RISCVVMVTs::vint16mf2_t, &RISCV::VRRegClass);
112     addRegisterClass(RISCVVMVTs::vint16m1_t, &RISCV::VRRegClass);
113     addRegisterClass(RISCVVMVTs::vint16m2_t, &RISCV::VRM2RegClass);
114     addRegisterClass(RISCVVMVTs::vint16m4_t, &RISCV::VRM4RegClass);
115     addRegisterClass(RISCVVMVTs::vint16m8_t, &RISCV::VRM8RegClass);
116 
117     addRegisterClass(RISCVVMVTs::vint32mf2_t, &RISCV::VRRegClass);
118     addRegisterClass(RISCVVMVTs::vint32m1_t, &RISCV::VRRegClass);
119     addRegisterClass(RISCVVMVTs::vint32m2_t, &RISCV::VRM2RegClass);
120     addRegisterClass(RISCVVMVTs::vint32m4_t, &RISCV::VRM4RegClass);
121     addRegisterClass(RISCVVMVTs::vint32m8_t, &RISCV::VRM8RegClass);
122 
123     addRegisterClass(RISCVVMVTs::vint64m1_t, &RISCV::VRRegClass);
124     addRegisterClass(RISCVVMVTs::vint64m2_t, &RISCV::VRM2RegClass);
125     addRegisterClass(RISCVVMVTs::vint64m4_t, &RISCV::VRM4RegClass);
126     addRegisterClass(RISCVVMVTs::vint64m8_t, &RISCV::VRM8RegClass);
127 
128     if (Subtarget.hasStdExtZfh()) {
129       addRegisterClass(RISCVVMVTs::vfloat16mf4_t, &RISCV::VRRegClass);
130       addRegisterClass(RISCVVMVTs::vfloat16mf2_t, &RISCV::VRRegClass);
131       addRegisterClass(RISCVVMVTs::vfloat16m1_t, &RISCV::VRRegClass);
132       addRegisterClass(RISCVVMVTs::vfloat16m2_t, &RISCV::VRM2RegClass);
133       addRegisterClass(RISCVVMVTs::vfloat16m4_t, &RISCV::VRM4RegClass);
134       addRegisterClass(RISCVVMVTs::vfloat16m8_t, &RISCV::VRM8RegClass);
135     }
136 
137     if (Subtarget.hasStdExtF()) {
138       addRegisterClass(RISCVVMVTs::vfloat32mf2_t, &RISCV::VRRegClass);
139       addRegisterClass(RISCVVMVTs::vfloat32m1_t, &RISCV::VRRegClass);
140       addRegisterClass(RISCVVMVTs::vfloat32m2_t, &RISCV::VRM2RegClass);
141       addRegisterClass(RISCVVMVTs::vfloat32m4_t, &RISCV::VRM4RegClass);
142       addRegisterClass(RISCVVMVTs::vfloat32m8_t, &RISCV::VRM8RegClass);
143     }
144 
145     if (Subtarget.hasStdExtD()) {
146       addRegisterClass(RISCVVMVTs::vfloat64m1_t, &RISCV::VRRegClass);
147       addRegisterClass(RISCVVMVTs::vfloat64m2_t, &RISCV::VRM2RegClass);
148       addRegisterClass(RISCVVMVTs::vfloat64m4_t, &RISCV::VRM4RegClass);
149       addRegisterClass(RISCVVMVTs::vfloat64m8_t, &RISCV::VRM8RegClass);
150     }
151   }
152 
153   // Compute derived properties from the register classes.
154   computeRegisterProperties(STI.getRegisterInfo());
155 
156   setStackPointerRegisterToSaveRestore(RISCV::X2);
157 
158   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
159     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
160 
161   // TODO: add all necessary setOperationAction calls.
162   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
163 
164   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165   setOperationAction(ISD::BR_CC, XLenVT, Expand);
166   setOperationAction(ISD::SELECT, XLenVT, Custom);
167   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
168 
169   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
170   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
171 
172   setOperationAction(ISD::VASTART, MVT::Other, Custom);
173   setOperationAction(ISD::VAARG, MVT::Other, Expand);
174   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
175   setOperationAction(ISD::VAEND, MVT::Other, Expand);
176 
177   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
178   if (!Subtarget.hasStdExtZbb()) {
179     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
180     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
181   }
182 
183   if (Subtarget.is64Bit()) {
184     setOperationAction(ISD::ADD, MVT::i32, Custom);
185     setOperationAction(ISD::SUB, MVT::i32, Custom);
186     setOperationAction(ISD::SHL, MVT::i32, Custom);
187     setOperationAction(ISD::SRA, MVT::i32, Custom);
188     setOperationAction(ISD::SRL, MVT::i32, Custom);
189   }
190 
191   if (!Subtarget.hasStdExtM()) {
192     setOperationAction(ISD::MUL, XLenVT, Expand);
193     setOperationAction(ISD::MULHS, XLenVT, Expand);
194     setOperationAction(ISD::MULHU, XLenVT, Expand);
195     setOperationAction(ISD::SDIV, XLenVT, Expand);
196     setOperationAction(ISD::UDIV, XLenVT, Expand);
197     setOperationAction(ISD::SREM, XLenVT, Expand);
198     setOperationAction(ISD::UREM, XLenVT, Expand);
199   }
200 
201   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
202     setOperationAction(ISD::MUL, MVT::i32, Custom);
203     setOperationAction(ISD::SDIV, MVT::i32, Custom);
204     setOperationAction(ISD::UDIV, MVT::i32, Custom);
205     setOperationAction(ISD::UREM, MVT::i32, Custom);
206   }
207 
208   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
209   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
210   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
211   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
212 
213   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
214   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
215   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
216 
217   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
218     if (Subtarget.is64Bit()) {
219       setOperationAction(ISD::ROTL, MVT::i32, Custom);
220       setOperationAction(ISD::ROTR, MVT::i32, Custom);
221     }
222   } else {
223     setOperationAction(ISD::ROTL, XLenVT, Expand);
224     setOperationAction(ISD::ROTR, XLenVT, Expand);
225   }
226 
227   if (Subtarget.hasStdExtZbp()) {
228     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
229     setOperationAction(ISD::BSWAP, XLenVT, Custom);
230 
231     if (Subtarget.is64Bit()) {
232       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
233       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
234     }
235   } else {
236     setOperationAction(ISD::BSWAP, XLenVT, Expand);
237   }
238 
239   if (Subtarget.hasStdExtZbb()) {
240     setOperationAction(ISD::SMIN, XLenVT, Legal);
241     setOperationAction(ISD::SMAX, XLenVT, Legal);
242     setOperationAction(ISD::UMIN, XLenVT, Legal);
243     setOperationAction(ISD::UMAX, XLenVT, Legal);
244   } else {
245     setOperationAction(ISD::CTTZ, XLenVT, Expand);
246     setOperationAction(ISD::CTLZ, XLenVT, Expand);
247     setOperationAction(ISD::CTPOP, XLenVT, Expand);
248   }
249 
250   if (Subtarget.hasStdExtZbt()) {
251     setOperationAction(ISD::FSHL, XLenVT, Legal);
252     setOperationAction(ISD::FSHR, XLenVT, Legal);
253 
254     if (Subtarget.is64Bit()) {
255       setOperationAction(ISD::FSHL, MVT::i32, Custom);
256       setOperationAction(ISD::FSHR, MVT::i32, Custom);
257     }
258   }
259 
260   ISD::CondCode FPCCToExpand[] = {
261       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
262       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
263       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
264 
265   ISD::NodeType FPOpToExpand[] = {
266       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
267       ISD::FP_TO_FP16};
268 
269   if (Subtarget.hasStdExtZfh())
270     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
271 
272   if (Subtarget.hasStdExtZfh()) {
273     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
274     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
275     for (auto CC : FPCCToExpand)
276       setCondCodeAction(CC, MVT::f16, Expand);
277     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
278     setOperationAction(ISD::SELECT, MVT::f16, Custom);
279     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
280     for (auto Op : FPOpToExpand)
281       setOperationAction(Op, MVT::f16, Expand);
282   }
283 
284   if (Subtarget.hasStdExtF()) {
285     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
286     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
287     for (auto CC : FPCCToExpand)
288       setCondCodeAction(CC, MVT::f32, Expand);
289     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
290     setOperationAction(ISD::SELECT, MVT::f32, Custom);
291     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
292     for (auto Op : FPOpToExpand)
293       setOperationAction(Op, MVT::f32, Expand);
294     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
295     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
296   }
297 
298   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
299     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
300 
301   if (Subtarget.hasStdExtD()) {
302     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
303     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
304     for (auto CC : FPCCToExpand)
305       setCondCodeAction(CC, MVT::f64, Expand);
306     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
307     setOperationAction(ISD::SELECT, MVT::f64, Custom);
308     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
309     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
310     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
311     for (auto Op : FPOpToExpand)
312       setOperationAction(Op, MVT::f64, Expand);
313     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
314     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
315   }
316 
317   if (Subtarget.is64Bit()) {
318     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
319     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
320     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
321     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
322   }
323 
324   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
325   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
326   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
327   setOperationAction(ISD::JumpTable, XLenVT, Custom);
328 
329   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
330 
331   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
332   // Unfortunately this can't be determined just from the ISA naming string.
333   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
334                      Subtarget.is64Bit() ? Legal : Custom);
335 
336   setOperationAction(ISD::TRAP, MVT::Other, Legal);
337   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
338   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
339 
340   if (Subtarget.hasStdExtA()) {
341     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
342     setMinCmpXchgSizeInBits(32);
343   } else {
344     setMaxAtomicSizeInBitsSupported(0);
345   }
346 
347   setBooleanContents(ZeroOrOneBooleanContent);
348 
349   if (Subtarget.hasStdExtV()) {
350     setBooleanVectorContents(ZeroOrOneBooleanContent);
351 
352     setOperationAction(ISD::VSCALE, XLenVT, Custom);
353 
354     // RVV intrinsics may have illegal operands.
355     // We also need to custom legalize vmv.x.s.
356     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
357     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
358     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
359     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
360     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
361     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
362 
363     if (Subtarget.is64Bit()) {
364       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
365       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
366     }
367 
368     for (auto VT : MVT::integer_scalable_vector_valuetypes()) {
369       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
370 
371       setOperationAction(ISD::SMIN, VT, Legal);
372       setOperationAction(ISD::SMAX, VT, Legal);
373       setOperationAction(ISD::UMIN, VT, Legal);
374       setOperationAction(ISD::UMAX, VT, Legal);
375 
376       // Lower RVV truncates as a series of "RISCVISD::TRUNCATE_VECTOR"
377       // nodes which truncate by one power of two at a time.
378       setOperationAction(ISD::TRUNCATE, VT, Custom);
379     }
380 
381     // We must custom-lower SPLAT_VECTOR vXi64 on RV32
382     if (!Subtarget.is64Bit())
383       setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
384 
385     // Expand various CCs to best match the RVV ISA, which natively supports UNE
386     // but no other unordered comparisons, and supports all ordered comparisons
387     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
388     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
389     // and we pattern-match those back to the "original", swapping operands once
390     // more. This way we catch both operations and both "vf" and "fv" forms with
391     // fewer patterns.
392     ISD::CondCode VFPCCToExpand[] = {
393         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
394         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
395         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
396     };
397 
398     if (Subtarget.hasStdExtZfh()) {
399       for (auto VT : {RISCVVMVTs::vfloat16mf4_t, RISCVVMVTs::vfloat16mf2_t,
400                       RISCVVMVTs::vfloat16m1_t, RISCVVMVTs::vfloat16m2_t,
401                       RISCVVMVTs::vfloat16m4_t, RISCVVMVTs::vfloat16m8_t}) {
402         setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
403         for (auto CC : VFPCCToExpand)
404           setCondCodeAction(CC, VT, Expand);
405       }
406     }
407 
408     if (Subtarget.hasStdExtF()) {
409       for (auto VT : {RISCVVMVTs::vfloat32mf2_t, RISCVVMVTs::vfloat32m1_t,
410                       RISCVVMVTs::vfloat32m2_t, RISCVVMVTs::vfloat32m4_t,
411                       RISCVVMVTs::vfloat32m8_t}) {
412         setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
413         for (auto CC : VFPCCToExpand)
414           setCondCodeAction(CC, VT, Expand);
415       }
416     }
417 
418     if (Subtarget.hasStdExtD()) {
419       for (auto VT : {RISCVVMVTs::vfloat64m1_t, RISCVVMVTs::vfloat64m2_t,
420                       RISCVVMVTs::vfloat64m4_t, RISCVVMVTs::vfloat64m8_t}) {
421         setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
422         for (auto CC : VFPCCToExpand)
423           setCondCodeAction(CC, VT, Expand);
424       }
425     }
426   }
427 
428   // Function alignments.
429   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
430   setMinFunctionAlignment(FunctionAlignment);
431   setPrefFunctionAlignment(FunctionAlignment);
432 
433   setMinimumJumpTableEntries(5);
434 
435   // Jumps are expensive, compared to logic
436   setJumpIsExpensive();
437 
438   // We can use any register for comparisons
439   setHasMultipleConditionRegisters();
440 
441   if (Subtarget.hasStdExtZbp()) {
442     setTargetDAGCombine(ISD::OR);
443   }
444 }
445 
446 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
447                                             EVT VT) const {
448   if (!VT.isVector())
449     return getPointerTy(DL);
450   if (Subtarget.hasStdExtV())
451     return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
452   return VT.changeVectorElementTypeToInteger();
453 }
454 
455 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
456                                              const CallInst &I,
457                                              MachineFunction &MF,
458                                              unsigned Intrinsic) const {
459   switch (Intrinsic) {
460   default:
461     return false;
462   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
463   case Intrinsic::riscv_masked_atomicrmw_add_i32:
464   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
465   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
466   case Intrinsic::riscv_masked_atomicrmw_max_i32:
467   case Intrinsic::riscv_masked_atomicrmw_min_i32:
468   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
469   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
470   case Intrinsic::riscv_masked_cmpxchg_i32:
471     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
472     Info.opc = ISD::INTRINSIC_W_CHAIN;
473     Info.memVT = MVT::getVT(PtrTy->getElementType());
474     Info.ptrVal = I.getArgOperand(0);
475     Info.offset = 0;
476     Info.align = Align(4);
477     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
478                  MachineMemOperand::MOVolatile;
479     return true;
480   }
481 }
482 
483 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
484                                                 const AddrMode &AM, Type *Ty,
485                                                 unsigned AS,
486                                                 Instruction *I) const {
487   // No global is ever allowed as a base.
488   if (AM.BaseGV)
489     return false;
490 
491   // Require a 12-bit signed offset.
492   if (!isInt<12>(AM.BaseOffs))
493     return false;
494 
495   switch (AM.Scale) {
496   case 0: // "r+i" or just "i", depending on HasBaseReg.
497     break;
498   case 1:
499     if (!AM.HasBaseReg) // allow "r+i".
500       break;
501     return false; // disallow "r+r" or "r+r+i".
502   default:
503     return false;
504   }
505 
506   return true;
507 }
508 
509 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
510   return isInt<12>(Imm);
511 }
512 
513 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
514   return isInt<12>(Imm);
515 }
516 
517 // On RV32, 64-bit integers are split into their high and low parts and held
518 // in two different registers, so the trunc is free since the low register can
519 // just be used.
520 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
521   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
522     return false;
523   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
524   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
525   return (SrcBits == 64 && DestBits == 32);
526 }
527 
528 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
529   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
530       !SrcVT.isInteger() || !DstVT.isInteger())
531     return false;
532   unsigned SrcBits = SrcVT.getSizeInBits();
533   unsigned DestBits = DstVT.getSizeInBits();
534   return (SrcBits == 64 && DestBits == 32);
535 }
536 
537 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
538   // Zexts are free if they can be combined with a load.
539   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
540     EVT MemVT = LD->getMemoryVT();
541     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
542          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
543         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
544          LD->getExtensionType() == ISD::ZEXTLOAD))
545       return true;
546   }
547 
548   return TargetLowering::isZExtFree(Val, VT2);
549 }
550 
551 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
552   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
553 }
554 
555 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
556   return Subtarget.hasStdExtZbb();
557 }
558 
559 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
560   return Subtarget.hasStdExtZbb();
561 }
562 
563 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
564                                        bool ForCodeSize) const {
565   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
566     return false;
567   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
568     return false;
569   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
570     return false;
571   if (Imm.isNegZero())
572     return false;
573   return Imm.isZero();
574 }
575 
576 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
577   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
578          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
579          (VT == MVT::f64 && Subtarget.hasStdExtD());
580 }
581 
582 // Changes the condition code and swaps operands if necessary, so the SetCC
583 // operation matches one of the comparisons supported directly in the RISC-V
584 // ISA.
585 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
586   switch (CC) {
587   default:
588     break;
589   case ISD::SETGT:
590   case ISD::SETLE:
591   case ISD::SETUGT:
592   case ISD::SETULE:
593     CC = ISD::getSetCCSwappedOperands(CC);
594     std::swap(LHS, RHS);
595     break;
596   }
597 }
598 
599 // Return the RISC-V branch opcode that matches the given DAG integer
600 // condition code. The CondCode must be one of those supported by the RISC-V
601 // ISA (see normaliseSetCC).
602 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
603   switch (CC) {
604   default:
605     llvm_unreachable("Unsupported CondCode");
606   case ISD::SETEQ:
607     return RISCV::BEQ;
608   case ISD::SETNE:
609     return RISCV::BNE;
610   case ISD::SETLT:
611     return RISCV::BLT;
612   case ISD::SETGE:
613     return RISCV::BGE;
614   case ISD::SETULT:
615     return RISCV::BLTU;
616   case ISD::SETUGE:
617     return RISCV::BGEU;
618   }
619 }
620 
621 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
622                                             SelectionDAG &DAG) const {
623   switch (Op.getOpcode()) {
624   default:
625     report_fatal_error("unimplemented operand");
626   case ISD::GlobalAddress:
627     return lowerGlobalAddress(Op, DAG);
628   case ISD::BlockAddress:
629     return lowerBlockAddress(Op, DAG);
630   case ISD::ConstantPool:
631     return lowerConstantPool(Op, DAG);
632   case ISD::JumpTable:
633     return lowerJumpTable(Op, DAG);
634   case ISD::GlobalTLSAddress:
635     return lowerGlobalTLSAddress(Op, DAG);
636   case ISD::SELECT:
637     return lowerSELECT(Op, DAG);
638   case ISD::VASTART:
639     return lowerVASTART(Op, DAG);
640   case ISD::FRAMEADDR:
641     return lowerFRAMEADDR(Op, DAG);
642   case ISD::RETURNADDR:
643     return lowerRETURNADDR(Op, DAG);
644   case ISD::SHL_PARTS:
645     return lowerShiftLeftParts(Op, DAG);
646   case ISD::SRA_PARTS:
647     return lowerShiftRightParts(Op, DAG, true);
648   case ISD::SRL_PARTS:
649     return lowerShiftRightParts(Op, DAG, false);
650   case ISD::BITCAST: {
651     assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
652             Subtarget.hasStdExtZfh()) &&
653            "Unexpected custom legalisation");
654     SDLoc DL(Op);
655     SDValue Op0 = Op.getOperand(0);
656     if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
657       if (Op0.getValueType() != MVT::i16)
658         return SDValue();
659       SDValue NewOp0 =
660           DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
661       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
662       return FPConv;
663     } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
664                Subtarget.hasStdExtF()) {
665       if (Op0.getValueType() != MVT::i32)
666         return SDValue();
667       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
668       SDValue FPConv =
669           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
670       return FPConv;
671     }
672     return SDValue();
673   }
674   case ISD::INTRINSIC_WO_CHAIN:
675     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
676   case ISD::INTRINSIC_W_CHAIN:
677     return LowerINTRINSIC_W_CHAIN(Op, DAG);
678   case ISD::BSWAP:
679   case ISD::BITREVERSE: {
680     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
681     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
682     MVT VT = Op.getSimpleValueType();
683     SDLoc DL(Op);
684     // Start with the maximum immediate value which is the bitwidth - 1.
685     unsigned Imm = VT.getSizeInBits() - 1;
686     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
687     if (Op.getOpcode() == ISD::BSWAP)
688       Imm &= ~0x7U;
689     return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
690                        DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
691   }
692   case ISD::TRUNCATE: {
693     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
694     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which
695     // truncate by one power of two at a time.
696     SDLoc DL(Op);
697     EVT VT = Op.getValueType();
698     // Only custom-lower non-mask truncates
699     if (!VT.isVector() || VT.getVectorElementType() == MVT::i1)
700       return Op;
701 
702     EVT DstEltVT = VT.getVectorElementType();
703 
704     SDValue Src = Op.getOperand(0);
705     EVT SrcVT = Src.getValueType();
706     EVT SrcEltVT = SrcVT.getVectorElementType();
707 
708     assert(DstEltVT.bitsLT(SrcEltVT) &&
709            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
710            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
711            "Unexpected vector truncate lowering");
712 
713     SDValue Result = Src;
714     LLVMContext &Context = *DAG.getContext();
715     const ElementCount Count = SrcVT.getVectorElementCount();
716     do {
717       SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2);
718       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
719       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result);
720     } while (SrcEltVT != DstEltVT);
721 
722     return Result;
723   }
724   case ISD::SPLAT_VECTOR:
725     return lowerSPLATVECTOR(Op, DAG);
726   case ISD::VSCALE: {
727     MVT VT = Op.getSimpleValueType();
728     SDLoc DL(Op);
729     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
730     // We define our scalable vector types for lmul=1 to use a 64 bit known
731     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
732     // vscale as VLENB / 8.
733     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
734                                  DAG.getConstant(3, DL, VT));
735     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
736   }
737   }
738 }
739 
740 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
741                              SelectionDAG &DAG, unsigned Flags) {
742   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
743 }
744 
745 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
746                              SelectionDAG &DAG, unsigned Flags) {
747   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
748                                    Flags);
749 }
750 
751 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
752                              SelectionDAG &DAG, unsigned Flags) {
753   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
754                                    N->getOffset(), Flags);
755 }
756 
757 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
758                              SelectionDAG &DAG, unsigned Flags) {
759   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
760 }
761 
762 template <class NodeTy>
763 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
764                                      bool IsLocal) const {
765   SDLoc DL(N);
766   EVT Ty = getPointerTy(DAG.getDataLayout());
767 
768   if (isPositionIndependent()) {
769     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
770     if (IsLocal)
771       // Use PC-relative addressing to access the symbol. This generates the
772       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
773       // %pcrel_lo(auipc)).
774       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
775 
776     // Use PC-relative addressing to access the GOT for this symbol, then load
777     // the address from the GOT. This generates the pattern (PseudoLA sym),
778     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
779     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
780   }
781 
782   switch (getTargetMachine().getCodeModel()) {
783   default:
784     report_fatal_error("Unsupported code model for lowering");
785   case CodeModel::Small: {
786     // Generate a sequence for accessing addresses within the first 2 GiB of
787     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
788     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
789     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
790     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
791     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
792   }
793   case CodeModel::Medium: {
794     // Generate a sequence for accessing addresses within any 2GiB range within
795     // the address space. This generates the pattern (PseudoLLA sym), which
796     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
797     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
798     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
799   }
800   }
801 }
802 
803 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
804                                                 SelectionDAG &DAG) const {
805   SDLoc DL(Op);
806   EVT Ty = Op.getValueType();
807   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
808   int64_t Offset = N->getOffset();
809   MVT XLenVT = Subtarget.getXLenVT();
810 
811   const GlobalValue *GV = N->getGlobal();
812   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
813   SDValue Addr = getAddr(N, DAG, IsLocal);
814 
815   // In order to maximise the opportunity for common subexpression elimination,
816   // emit a separate ADD node for the global address offset instead of folding
817   // it in the global address node. Later peephole optimisations may choose to
818   // fold it back in when profitable.
819   if (Offset != 0)
820     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
821                        DAG.getConstant(Offset, DL, XLenVT));
822   return Addr;
823 }
824 
825 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
826                                                SelectionDAG &DAG) const {
827   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
828 
829   return getAddr(N, DAG);
830 }
831 
832 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
833                                                SelectionDAG &DAG) const {
834   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
835 
836   return getAddr(N, DAG);
837 }
838 
839 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
840                                             SelectionDAG &DAG) const {
841   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
842 
843   return getAddr(N, DAG);
844 }
845 
846 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
847                                               SelectionDAG &DAG,
848                                               bool UseGOT) const {
849   SDLoc DL(N);
850   EVT Ty = getPointerTy(DAG.getDataLayout());
851   const GlobalValue *GV = N->getGlobal();
852   MVT XLenVT = Subtarget.getXLenVT();
853 
854   if (UseGOT) {
855     // Use PC-relative addressing to access the GOT for this TLS symbol, then
856     // load the address from the GOT and add the thread pointer. This generates
857     // the pattern (PseudoLA_TLS_IE sym), which expands to
858     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
859     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
860     SDValue Load =
861         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
862 
863     // Add the thread pointer.
864     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
865     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
866   }
867 
868   // Generate a sequence for accessing the address relative to the thread
869   // pointer, with the appropriate adjustment for the thread pointer offset.
870   // This generates the pattern
871   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
872   SDValue AddrHi =
873       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
874   SDValue AddrAdd =
875       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
876   SDValue AddrLo =
877       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
878 
879   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
880   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
881   SDValue MNAdd = SDValue(
882       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
883       0);
884   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
885 }
886 
887 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
888                                                SelectionDAG &DAG) const {
889   SDLoc DL(N);
890   EVT Ty = getPointerTy(DAG.getDataLayout());
891   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
892   const GlobalValue *GV = N->getGlobal();
893 
894   // Use a PC-relative addressing mode to access the global dynamic GOT address.
895   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
896   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
897   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
898   SDValue Load =
899       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
900 
901   // Prepare argument list to generate call.
902   ArgListTy Args;
903   ArgListEntry Entry;
904   Entry.Node = Load;
905   Entry.Ty = CallTy;
906   Args.push_back(Entry);
907 
908   // Setup call to __tls_get_addr.
909   TargetLowering::CallLoweringInfo CLI(DAG);
910   CLI.setDebugLoc(DL)
911       .setChain(DAG.getEntryNode())
912       .setLibCallee(CallingConv::C, CallTy,
913                     DAG.getExternalSymbol("__tls_get_addr", Ty),
914                     std::move(Args));
915 
916   return LowerCallTo(CLI).first;
917 }
918 
919 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
920                                                    SelectionDAG &DAG) const {
921   SDLoc DL(Op);
922   EVT Ty = Op.getValueType();
923   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
924   int64_t Offset = N->getOffset();
925   MVT XLenVT = Subtarget.getXLenVT();
926 
927   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
928 
929   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
930       CallingConv::GHC)
931     report_fatal_error("In GHC calling convention TLS is not supported");
932 
933   SDValue Addr;
934   switch (Model) {
935   case TLSModel::LocalExec:
936     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
937     break;
938   case TLSModel::InitialExec:
939     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
940     break;
941   case TLSModel::LocalDynamic:
942   case TLSModel::GeneralDynamic:
943     Addr = getDynamicTLSAddr(N, DAG);
944     break;
945   }
946 
947   // In order to maximise the opportunity for common subexpression elimination,
948   // emit a separate ADD node for the global address offset instead of folding
949   // it in the global address node. Later peephole optimisations may choose to
950   // fold it back in when profitable.
951   if (Offset != 0)
952     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
953                        DAG.getConstant(Offset, DL, XLenVT));
954   return Addr;
955 }
956 
957 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
958   SDValue CondV = Op.getOperand(0);
959   SDValue TrueV = Op.getOperand(1);
960   SDValue FalseV = Op.getOperand(2);
961   SDLoc DL(Op);
962   MVT XLenVT = Subtarget.getXLenVT();
963 
964   // If the result type is XLenVT and CondV is the output of a SETCC node
965   // which also operated on XLenVT inputs, then merge the SETCC node into the
966   // lowered RISCVISD::SELECT_CC to take advantage of the integer
967   // compare+branch instructions. i.e.:
968   // (select (setcc lhs, rhs, cc), truev, falsev)
969   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
970   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
971       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
972     SDValue LHS = CondV.getOperand(0);
973     SDValue RHS = CondV.getOperand(1);
974     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
975     ISD::CondCode CCVal = CC->get();
976 
977     normaliseSetCC(LHS, RHS, CCVal);
978 
979     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
980     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
981     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
982   }
983 
984   // Otherwise:
985   // (select condv, truev, falsev)
986   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
987   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
988   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
989 
990   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
991 
992   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
993 }
994 
995 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
996   MachineFunction &MF = DAG.getMachineFunction();
997   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
998 
999   SDLoc DL(Op);
1000   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1001                                  getPointerTy(MF.getDataLayout()));
1002 
1003   // vastart just stores the address of the VarArgsFrameIndex slot into the
1004   // memory location argument.
1005   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1006   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1007                       MachinePointerInfo(SV));
1008 }
1009 
1010 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
1011                                             SelectionDAG &DAG) const {
1012   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1013   MachineFunction &MF = DAG.getMachineFunction();
1014   MachineFrameInfo &MFI = MF.getFrameInfo();
1015   MFI.setFrameAddressIsTaken(true);
1016   Register FrameReg = RI.getFrameRegister(MF);
1017   int XLenInBytes = Subtarget.getXLen() / 8;
1018 
1019   EVT VT = Op.getValueType();
1020   SDLoc DL(Op);
1021   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1022   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1023   while (Depth--) {
1024     int Offset = -(XLenInBytes * 2);
1025     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1026                               DAG.getIntPtrConstant(Offset, DL));
1027     FrameAddr =
1028         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1029   }
1030   return FrameAddr;
1031 }
1032 
1033 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
1034                                              SelectionDAG &DAG) const {
1035   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1036   MachineFunction &MF = DAG.getMachineFunction();
1037   MachineFrameInfo &MFI = MF.getFrameInfo();
1038   MFI.setReturnAddressIsTaken(true);
1039   MVT XLenVT = Subtarget.getXLenVT();
1040   int XLenInBytes = Subtarget.getXLen() / 8;
1041 
1042   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1043     return SDValue();
1044 
1045   EVT VT = Op.getValueType();
1046   SDLoc DL(Op);
1047   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1048   if (Depth) {
1049     int Off = -XLenInBytes;
1050     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
1051     SDValue Offset = DAG.getConstant(Off, DL, VT);
1052     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1053                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1054                        MachinePointerInfo());
1055   }
1056 
1057   // Return the value of the return address register, marking it an implicit
1058   // live-in.
1059   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
1060   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
1061 }
1062 
1063 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
1064                                                  SelectionDAG &DAG) const {
1065   SDLoc DL(Op);
1066   SDValue Lo = Op.getOperand(0);
1067   SDValue Hi = Op.getOperand(1);
1068   SDValue Shamt = Op.getOperand(2);
1069   EVT VT = Lo.getValueType();
1070 
1071   // if Shamt-XLEN < 0: // Shamt < XLEN
1072   //   Lo = Lo << Shamt
1073   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
1074   // else:
1075   //   Lo = 0
1076   //   Hi = Lo << (Shamt-XLEN)
1077 
1078   SDValue Zero = DAG.getConstant(0, DL, VT);
1079   SDValue One = DAG.getConstant(1, DL, VT);
1080   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1081   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1082   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1083   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1084 
1085   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1086   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1087   SDValue ShiftRightLo =
1088       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
1089   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1090   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1091   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
1092 
1093   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1094 
1095   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1096   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1097 
1098   SDValue Parts[2] = {Lo, Hi};
1099   return DAG.getMergeValues(Parts, DL);
1100 }
1101 
1102 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
1103                                                   bool IsSRA) const {
1104   SDLoc DL(Op);
1105   SDValue Lo = Op.getOperand(0);
1106   SDValue Hi = Op.getOperand(1);
1107   SDValue Shamt = Op.getOperand(2);
1108   EVT VT = Lo.getValueType();
1109 
1110   // SRA expansion:
1111   //   if Shamt-XLEN < 0: // Shamt < XLEN
1112   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1113   //     Hi = Hi >>s Shamt
1114   //   else:
1115   //     Lo = Hi >>s (Shamt-XLEN);
1116   //     Hi = Hi >>s (XLEN-1)
1117   //
1118   // SRL expansion:
1119   //   if Shamt-XLEN < 0: // Shamt < XLEN
1120   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1121   //     Hi = Hi >>u Shamt
1122   //   else:
1123   //     Lo = Hi >>u (Shamt-XLEN);
1124   //     Hi = 0;
1125 
1126   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1127 
1128   SDValue Zero = DAG.getConstant(0, DL, VT);
1129   SDValue One = DAG.getConstant(1, DL, VT);
1130   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1131   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1132   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1133   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1134 
1135   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1136   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1137   SDValue ShiftLeftHi =
1138       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
1139   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1140   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1141   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
1142   SDValue HiFalse =
1143       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
1144 
1145   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1146 
1147   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1148   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1149 
1150   SDValue Parts[2] = {Lo, Hi};
1151   return DAG.getMergeValues(Parts, DL);
1152 }
1153 
1154 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is
1155 // illegal (currently only vXi64 RV32).
1156 // FIXME: We could also catch non-constant sign-extended i32 values and lower
1157 // them to SPLAT_VECTOR_I64
1158 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op,
1159                                               SelectionDAG &DAG) const {
1160   SDLoc DL(Op);
1161   EVT VecVT = Op.getValueType();
1162   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
1163          "Unexpected SPLAT_VECTOR lowering");
1164   SDValue SplatVal = Op.getOperand(0);
1165 
1166   // If we can prove that the value is a sign-extended 32-bit value, lower this
1167   // as a custom node in order to try and match RVV vector/scalar instructions.
1168   if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) {
1169     if (isInt<32>(CVal->getSExtValue()))
1170       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1171                          DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32));
1172   }
1173 
1174   // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
1175   // to accidentally sign-extend the 32-bit halves to the e64 SEW:
1176   // vmv.v.x vX, hi
1177   // vsll.vx vX, vX, /*32*/
1178   // vmv.v.x vY, lo
1179   // vsll.vx vY, vY, /*32*/
1180   // vsrl.vx vY, vY, /*32*/
1181   // vor.vv vX, vX, vY
1182   SDValue One = DAG.getConstant(1, DL, MVT::i32);
1183   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
1184   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
1185   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero);
1186   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One);
1187 
1188   Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
1189   Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
1190   Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
1191 
1192   if (isNullConstant(Hi))
1193     return Lo;
1194 
1195   Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
1196   Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
1197 
1198   return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
1199 }
1200 
1201 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1202                                                      SelectionDAG &DAG) const {
1203   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1204   SDLoc DL(Op);
1205 
1206   if (Subtarget.hasStdExtV()) {
1207     // Some RVV intrinsics may claim that they want an integer operand to be
1208     // extended.
1209     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1210             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1211       if (II->ExtendedOperand) {
1212         assert(II->ExtendedOperand < Op.getNumOperands());
1213         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1214         SDValue &ScalarOp = Operands[II->ExtendedOperand];
1215         EVT OpVT = ScalarOp.getValueType();
1216         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1217             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1218           // If the operand is a constant, sign extend to increase our chances
1219           // of being able to use a .vi instruction. ANY_EXTEND would become a
1220           // a zero extend and the simm5 check in isel would fail.
1221           // FIXME: Should we ignore the upper bits in isel instead?
1222           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1223                                                           : ISD::ANY_EXTEND;
1224           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1225           return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
1226                              Operands);
1227         }
1228       }
1229     }
1230   }
1231 
1232   switch (IntNo) {
1233   default:
1234     return SDValue();    // Don't custom lower most intrinsics.
1235   case Intrinsic::thread_pointer: {
1236     EVT PtrVT = getPointerTy(DAG.getDataLayout());
1237     return DAG.getRegister(RISCV::X4, PtrVT);
1238   }
1239   case Intrinsic::riscv_vmv_x_s:
1240     assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!");
1241     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
1242                        Op.getOperand(1));
1243   }
1244 }
1245 
1246 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
1247                                                     SelectionDAG &DAG) const {
1248   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1249   SDLoc DL(Op);
1250 
1251   if (Subtarget.hasStdExtV()) {
1252     // Some RVV intrinsics may claim that they want an integer operand to be
1253     // extended.
1254     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1255             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1256       if (II->ExtendedOperand) {
1257         // The operands start from the second argument in INTRINSIC_W_CHAIN.
1258         unsigned ExtendOp = II->ExtendedOperand + 1;
1259         assert(ExtendOp < Op.getNumOperands());
1260         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1261         SDValue &ScalarOp = Operands[ExtendOp];
1262         EVT OpVT = ScalarOp.getValueType();
1263         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1264             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1265           // If the operand is a constant, sign extend to increase our chances
1266           // of being able to use a .vi instruction. ANY_EXTEND would become a
1267           // a zero extend and the simm5 check in isel would fail.
1268           // FIXME: Should we ignore the upper bits in isel instead?
1269           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1270                                                           : ISD::ANY_EXTEND;
1271           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1272           return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(),
1273                              Operands);
1274         }
1275       }
1276     }
1277   }
1278 
1279   return SDValue();
1280 }
1281 
1282 // Returns the opcode of the target-specific SDNode that implements the 32-bit
1283 // form of the given Opcode.
1284 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
1285   switch (Opcode) {
1286   default:
1287     llvm_unreachable("Unexpected opcode");
1288   case ISD::SHL:
1289     return RISCVISD::SLLW;
1290   case ISD::SRA:
1291     return RISCVISD::SRAW;
1292   case ISD::SRL:
1293     return RISCVISD::SRLW;
1294   case ISD::SDIV:
1295     return RISCVISD::DIVW;
1296   case ISD::UDIV:
1297     return RISCVISD::DIVUW;
1298   case ISD::UREM:
1299     return RISCVISD::REMUW;
1300   case ISD::ROTL:
1301     return RISCVISD::ROLW;
1302   case ISD::ROTR:
1303     return RISCVISD::RORW;
1304   case RISCVISD::GREVI:
1305     return RISCVISD::GREVIW;
1306   case RISCVISD::GORCI:
1307     return RISCVISD::GORCIW;
1308   }
1309 }
1310 
1311 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
1312 // Because i32 isn't a legal type for RV64, these operations would otherwise
1313 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
1314 // later one because the fact the operation was originally of type i32 is
1315 // lost.
1316 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
1317   SDLoc DL(N);
1318   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1319   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1320   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1321   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1322   // ReplaceNodeResults requires we maintain the same type for the return value.
1323   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
1324 }
1325 
1326 // Converts the given 32-bit operation to a i64 operation with signed extension
1327 // semantic to reduce the signed extension instructions.
1328 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
1329   SDLoc DL(N);
1330   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1331   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1332   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
1333   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
1334                                DAG.getValueType(MVT::i32));
1335   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
1336 }
1337 
1338 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1339                                              SmallVectorImpl<SDValue> &Results,
1340                                              SelectionDAG &DAG) const {
1341   SDLoc DL(N);
1342   switch (N->getOpcode()) {
1343   default:
1344     llvm_unreachable("Don't know how to custom type legalize this operation!");
1345   case ISD::STRICT_FP_TO_SINT:
1346   case ISD::STRICT_FP_TO_UINT:
1347   case ISD::FP_TO_SINT:
1348   case ISD::FP_TO_UINT: {
1349     bool IsStrict = N->isStrictFPOpcode();
1350     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1351            "Unexpected custom legalisation");
1352     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
1353     // If the FP type needs to be softened, emit a library call using the 'si'
1354     // version. If we left it to default legalization we'd end up with 'di'. If
1355     // the FP type doesn't need to be softened just let generic type
1356     // legalization promote the result type.
1357     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
1358         TargetLowering::TypeSoftenFloat)
1359       return;
1360     RTLIB::Libcall LC;
1361     if (N->getOpcode() == ISD::FP_TO_SINT ||
1362         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
1363       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
1364     else
1365       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
1366     MakeLibCallOptions CallOptions;
1367     EVT OpVT = Op0.getValueType();
1368     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
1369     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
1370     SDValue Result;
1371     std::tie(Result, Chain) =
1372         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
1373     Results.push_back(Result);
1374     if (IsStrict)
1375       Results.push_back(Chain);
1376     break;
1377   }
1378   case ISD::READCYCLECOUNTER: {
1379     assert(!Subtarget.is64Bit() &&
1380            "READCYCLECOUNTER only has custom type legalization on riscv32");
1381 
1382     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
1383     SDValue RCW =
1384         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
1385 
1386     Results.push_back(
1387         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
1388     Results.push_back(RCW.getValue(2));
1389     break;
1390   }
1391   case ISD::ADD:
1392   case ISD::SUB:
1393   case ISD::MUL:
1394     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1395            "Unexpected custom legalisation");
1396     if (N->getOperand(1).getOpcode() == ISD::Constant)
1397       return;
1398     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
1399     break;
1400   case ISD::SHL:
1401   case ISD::SRA:
1402   case ISD::SRL:
1403     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1404            "Unexpected custom legalisation");
1405     if (N->getOperand(1).getOpcode() == ISD::Constant)
1406       return;
1407     Results.push_back(customLegalizeToWOp(N, DAG));
1408     break;
1409   case ISD::ROTL:
1410   case ISD::ROTR:
1411     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1412            "Unexpected custom legalisation");
1413     Results.push_back(customLegalizeToWOp(N, DAG));
1414     break;
1415   case ISD::SDIV:
1416   case ISD::UDIV:
1417   case ISD::UREM:
1418     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1419            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
1420     if (N->getOperand(0).getOpcode() == ISD::Constant ||
1421         N->getOperand(1).getOpcode() == ISD::Constant)
1422       return;
1423     Results.push_back(customLegalizeToWOp(N, DAG));
1424     break;
1425   case ISD::BITCAST: {
1426     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1427              Subtarget.hasStdExtF()) ||
1428             (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
1429            "Unexpected custom legalisation");
1430     SDValue Op0 = N->getOperand(0);
1431     if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
1432       if (Op0.getValueType() != MVT::f16)
1433         return;
1434       SDValue FPConv =
1435           DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
1436       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
1437     } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1438                Subtarget.hasStdExtF()) {
1439       if (Op0.getValueType() != MVT::f32)
1440         return;
1441       SDValue FPConv =
1442           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
1443       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
1444     }
1445     break;
1446   }
1447   case RISCVISD::GREVI:
1448   case RISCVISD::GORCI: {
1449     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1450            "Unexpected custom legalisation");
1451     // This is similar to customLegalizeToWOp, except that we pass the second
1452     // operand (a TargetConstant) straight through: it is already of type
1453     // XLenVT.
1454     SDLoc DL(N);
1455     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1456     SDValue NewOp0 =
1457         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1458     SDValue NewRes =
1459         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
1460     // ReplaceNodeResults requires we maintain the same type for the return
1461     // value.
1462     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
1463     break;
1464   }
1465   case ISD::BSWAP:
1466   case ISD::BITREVERSE: {
1467     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1468            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1469     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1470                                  N->getOperand(0));
1471     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
1472     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
1473                                  DAG.getTargetConstant(Imm, DL,
1474                                                        Subtarget.getXLenVT()));
1475     // ReplaceNodeResults requires we maintain the same type for the return
1476     // value.
1477     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
1478     break;
1479   }
1480   case ISD::FSHL:
1481   case ISD::FSHR: {
1482     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1483            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
1484     SDValue NewOp0 =
1485         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1486     SDValue NewOp1 =
1487         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1488     SDValue NewOp2 =
1489         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
1490     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
1491     // Mask the shift amount to 5 bits.
1492     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
1493                          DAG.getConstant(0x1f, DL, MVT::i64));
1494     unsigned Opc =
1495         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
1496     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
1497     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
1498     break;
1499   }
1500   case ISD::INTRINSIC_WO_CHAIN: {
1501     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
1502     switch (IntNo) {
1503     default:
1504       llvm_unreachable(
1505           "Don't know how to custom type legalize this intrinsic!");
1506     case Intrinsic::riscv_vmv_x_s: {
1507       EVT VT = N->getValueType(0);
1508       assert((VT == MVT::i8 || VT == MVT::i16 ||
1509               (Subtarget.is64Bit() && VT == MVT::i32)) &&
1510              "Unexpected custom legalisation!");
1511       SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
1512                                     Subtarget.getXLenVT(), N->getOperand(1));
1513       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
1514       break;
1515     }
1516     }
1517     break;
1518   }
1519   }
1520 }
1521 
1522 // A structure to hold one of the bit-manipulation patterns below. Together, a
1523 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
1524 //   (or (and (shl x, 1), 0xAAAAAAAA),
1525 //       (and (srl x, 1), 0x55555555))
1526 struct RISCVBitmanipPat {
1527   SDValue Op;
1528   unsigned ShAmt;
1529   bool IsSHL;
1530 
1531   bool formsPairWith(const RISCVBitmanipPat &Other) const {
1532     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
1533   }
1534 };
1535 
1536 // Matches any of the following bit-manipulation patterns:
1537 //   (and (shl x, 1), (0x55555555 << 1))
1538 //   (and (srl x, 1), 0x55555555)
1539 //   (shl (and x, 0x55555555), 1)
1540 //   (srl (and x, (0x55555555 << 1)), 1)
1541 // where the shift amount and mask may vary thus:
1542 //   [1]  = 0x55555555 / 0xAAAAAAAA
1543 //   [2]  = 0x33333333 / 0xCCCCCCCC
1544 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
1545 //   [8]  = 0x00FF00FF / 0xFF00FF00
1546 //   [16] = 0x0000FFFF / 0xFFFFFFFF
1547 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
1548 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) {
1549   Optional<uint64_t> Mask;
1550   // Optionally consume a mask around the shift operation.
1551   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
1552     Mask = Op.getConstantOperandVal(1);
1553     Op = Op.getOperand(0);
1554   }
1555   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
1556     return None;
1557   bool IsSHL = Op.getOpcode() == ISD::SHL;
1558 
1559   if (!isa<ConstantSDNode>(Op.getOperand(1)))
1560     return None;
1561   auto ShAmt = Op.getConstantOperandVal(1);
1562 
1563   if (!isPowerOf2_64(ShAmt))
1564     return None;
1565 
1566   // These are the unshifted masks which we use to match bit-manipulation
1567   // patterns. They may be shifted left in certain circumstances.
1568   static const uint64_t BitmanipMasks[] = {
1569       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
1570       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL,
1571   };
1572 
1573   unsigned MaskIdx = Log2_64(ShAmt);
1574   if (MaskIdx >= array_lengthof(BitmanipMasks))
1575     return None;
1576 
1577   auto Src = Op.getOperand(0);
1578 
1579   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
1580   auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
1581 
1582   // The expected mask is shifted left when the AND is found around SHL
1583   // patterns.
1584   //   ((x >> 1) & 0x55555555)
1585   //   ((x << 1) & 0xAAAAAAAA)
1586   bool SHLExpMask = IsSHL;
1587 
1588   if (!Mask) {
1589     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
1590     // the mask is all ones: consume that now.
1591     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
1592       Mask = Src.getConstantOperandVal(1);
1593       Src = Src.getOperand(0);
1594       // The expected mask is now in fact shifted left for SRL, so reverse the
1595       // decision.
1596       //   ((x & 0xAAAAAAAA) >> 1)
1597       //   ((x & 0x55555555) << 1)
1598       SHLExpMask = !SHLExpMask;
1599     } else {
1600       // Use a default shifted mask of all-ones if there's no AND, truncated
1601       // down to the expected width. This simplifies the logic later on.
1602       Mask = maskTrailingOnes<uint64_t>(Width);
1603       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
1604     }
1605   }
1606 
1607   if (SHLExpMask)
1608     ExpMask <<= ShAmt;
1609 
1610   if (Mask != ExpMask)
1611     return None;
1612 
1613   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
1614 }
1615 
1616 // Match the following pattern as a GREVI(W) operation
1617 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
1618 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
1619                                const RISCVSubtarget &Subtarget) {
1620   EVT VT = Op.getValueType();
1621 
1622   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
1623     auto LHS = matchRISCVBitmanipPat(Op.getOperand(0));
1624     auto RHS = matchRISCVBitmanipPat(Op.getOperand(1));
1625     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
1626       SDLoc DL(Op);
1627       return DAG.getNode(
1628           RISCVISD::GREVI, DL, VT, LHS->Op,
1629           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
1630     }
1631   }
1632   return SDValue();
1633 }
1634 
1635 // Matches any the following pattern as a GORCI(W) operation
1636 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
1637 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
1638 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
1639 // Note that with the variant of 3.,
1640 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
1641 // the inner pattern will first be matched as GREVI and then the outer
1642 // pattern will be matched to GORC via the first rule above.
1643 // 4.  (or (rotl/rotr x, bitwidth/2), x)
1644 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
1645                                const RISCVSubtarget &Subtarget) {
1646   EVT VT = Op.getValueType();
1647 
1648   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
1649     SDLoc DL(Op);
1650     SDValue Op0 = Op.getOperand(0);
1651     SDValue Op1 = Op.getOperand(1);
1652 
1653     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
1654       if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
1655           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
1656         return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
1657       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
1658       if ((Reverse.getOpcode() == ISD::ROTL ||
1659            Reverse.getOpcode() == ISD::ROTR) &&
1660           Reverse.getOperand(0) == X &&
1661           isa<ConstantSDNode>(Reverse.getOperand(1))) {
1662         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
1663         if (RotAmt == (VT.getSizeInBits() / 2))
1664           return DAG.getNode(
1665               RISCVISD::GORCI, DL, VT, X,
1666               DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
1667       }
1668       return SDValue();
1669     };
1670 
1671     // Check for either commutable permutation of (or (GREVI x, shamt), x)
1672     if (SDValue V = MatchOROfReverse(Op0, Op1))
1673       return V;
1674     if (SDValue V = MatchOROfReverse(Op1, Op0))
1675       return V;
1676 
1677     // OR is commutable so canonicalize its OR operand to the left
1678     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
1679       std::swap(Op0, Op1);
1680     if (Op0.getOpcode() != ISD::OR)
1681       return SDValue();
1682     SDValue OrOp0 = Op0.getOperand(0);
1683     SDValue OrOp1 = Op0.getOperand(1);
1684     auto LHS = matchRISCVBitmanipPat(OrOp0);
1685     // OR is commutable so swap the operands and try again: x might have been
1686     // on the left
1687     if (!LHS) {
1688       std::swap(OrOp0, OrOp1);
1689       LHS = matchRISCVBitmanipPat(OrOp0);
1690     }
1691     auto RHS = matchRISCVBitmanipPat(Op1);
1692     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
1693       return DAG.getNode(
1694           RISCVISD::GORCI, DL, VT, LHS->Op,
1695           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
1696     }
1697   }
1698   return SDValue();
1699 }
1700 
1701 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
1702 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
1703 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
1704 // not undo itself, but they are redundant.
1705 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
1706   unsigned ShAmt1 = N->getConstantOperandVal(1);
1707   SDValue Src = N->getOperand(0);
1708 
1709   if (Src.getOpcode() != N->getOpcode())
1710     return SDValue();
1711 
1712   unsigned ShAmt2 = Src.getConstantOperandVal(1);
1713   Src = Src.getOperand(0);
1714 
1715   unsigned CombinedShAmt;
1716   if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
1717     CombinedShAmt = ShAmt1 | ShAmt2;
1718   else
1719     CombinedShAmt = ShAmt1 ^ ShAmt2;
1720 
1721   if (CombinedShAmt == 0)
1722     return Src;
1723 
1724   SDLoc DL(N);
1725   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
1726                      DAG.getTargetConstant(CombinedShAmt, DL,
1727                                            N->getOperand(1).getValueType()));
1728 }
1729 
1730 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1731                                                DAGCombinerInfo &DCI) const {
1732   SelectionDAG &DAG = DCI.DAG;
1733 
1734   switch (N->getOpcode()) {
1735   default:
1736     break;
1737   case RISCVISD::SplitF64: {
1738     SDValue Op0 = N->getOperand(0);
1739     // If the input to SplitF64 is just BuildPairF64 then the operation is
1740     // redundant. Instead, use BuildPairF64's operands directly.
1741     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
1742       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
1743 
1744     SDLoc DL(N);
1745 
1746     // It's cheaper to materialise two 32-bit integers than to load a double
1747     // from the constant pool and transfer it to integer registers through the
1748     // stack.
1749     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
1750       APInt V = C->getValueAPF().bitcastToAPInt();
1751       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
1752       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
1753       return DCI.CombineTo(N, Lo, Hi);
1754     }
1755 
1756     // This is a target-specific version of a DAGCombine performed in
1757     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1758     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1759     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1760     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1761         !Op0.getNode()->hasOneUse())
1762       break;
1763     SDValue NewSplitF64 =
1764         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
1765                     Op0.getOperand(0));
1766     SDValue Lo = NewSplitF64.getValue(0);
1767     SDValue Hi = NewSplitF64.getValue(1);
1768     APInt SignBit = APInt::getSignMask(32);
1769     if (Op0.getOpcode() == ISD::FNEG) {
1770       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
1771                                   DAG.getConstant(SignBit, DL, MVT::i32));
1772       return DCI.CombineTo(N, Lo, NewHi);
1773     }
1774     assert(Op0.getOpcode() == ISD::FABS);
1775     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
1776                                 DAG.getConstant(~SignBit, DL, MVT::i32));
1777     return DCI.CombineTo(N, Lo, NewHi);
1778   }
1779   case RISCVISD::SLLW:
1780   case RISCVISD::SRAW:
1781   case RISCVISD::SRLW:
1782   case RISCVISD::ROLW:
1783   case RISCVISD::RORW: {
1784     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
1785     SDValue LHS = N->getOperand(0);
1786     SDValue RHS = N->getOperand(1);
1787     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
1788     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
1789     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
1790         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
1791       if (N->getOpcode() != ISD::DELETED_NODE)
1792         DCI.AddToWorklist(N);
1793       return SDValue(N, 0);
1794     }
1795     break;
1796   }
1797   case RISCVISD::FSLW:
1798   case RISCVISD::FSRW: {
1799     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
1800     // read.
1801     SDValue Op0 = N->getOperand(0);
1802     SDValue Op1 = N->getOperand(1);
1803     SDValue ShAmt = N->getOperand(2);
1804     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
1805     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
1806     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
1807         SimplifyDemandedBits(Op1, OpMask, DCI) ||
1808         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
1809       if (N->getOpcode() != ISD::DELETED_NODE)
1810         DCI.AddToWorklist(N);
1811       return SDValue(N, 0);
1812     }
1813     break;
1814   }
1815   case RISCVISD::GREVIW:
1816   case RISCVISD::GORCIW: {
1817     // Only the lower 32 bits of the first operand are read
1818     SDValue Op0 = N->getOperand(0);
1819     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
1820     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
1821       if (N->getOpcode() != ISD::DELETED_NODE)
1822         DCI.AddToWorklist(N);
1823       return SDValue(N, 0);
1824     }
1825 
1826     return combineGREVI_GORCI(N, DCI.DAG);
1827   }
1828   case RISCVISD::FMV_X_ANYEXTW_RV64: {
1829     SDLoc DL(N);
1830     SDValue Op0 = N->getOperand(0);
1831     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
1832     // conversion is unnecessary and can be replaced with an ANY_EXTEND
1833     // of the FMV_W_X_RV64 operand.
1834     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
1835       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
1836              "Unexpected value type!");
1837       return Op0.getOperand(0);
1838     }
1839 
1840     // This is a target-specific version of a DAGCombine performed in
1841     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1842     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1843     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1844     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1845         !Op0.getNode()->hasOneUse())
1846       break;
1847     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
1848                                  Op0.getOperand(0));
1849     APInt SignBit = APInt::getSignMask(32).sext(64);
1850     if (Op0.getOpcode() == ISD::FNEG)
1851       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
1852                          DAG.getConstant(SignBit, DL, MVT::i64));
1853 
1854     assert(Op0.getOpcode() == ISD::FABS);
1855     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
1856                        DAG.getConstant(~SignBit, DL, MVT::i64));
1857   }
1858   case RISCVISD::GREVI:
1859   case RISCVISD::GORCI:
1860     return combineGREVI_GORCI(N, DCI.DAG);
1861   case ISD::OR:
1862     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
1863       return GREV;
1864     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
1865       return GORC;
1866     break;
1867   case RISCVISD::SELECT_CC: {
1868     // Transform
1869     // (select_cc (xor X, 1), 0, setne, trueV, falseV) ->
1870     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
1871     // This can occur when legalizing some floating point comparisons.
1872     SDValue LHS = N->getOperand(0);
1873     SDValue RHS = N->getOperand(1);
1874     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
1875     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
1876     if ((CCVal == ISD::SETNE || CCVal == ISD::SETEQ) && isNullConstant(RHS) &&
1877         LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) &&
1878         DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) {
1879       SDLoc DL(N);
1880       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
1881       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
1882       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
1883                          {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3),
1884                           N->getOperand(4)});
1885     }
1886     break;
1887   }
1888   }
1889 
1890   return SDValue();
1891 }
1892 
1893 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1894     const SDNode *N, CombineLevel Level) const {
1895   // The following folds are only desirable if `(OP _, c1 << c2)` can be
1896   // materialised in fewer instructions than `(OP _, c1)`:
1897   //
1898   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1899   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1900   SDValue N0 = N->getOperand(0);
1901   EVT Ty = N0.getValueType();
1902   if (Ty.isScalarInteger() &&
1903       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
1904     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
1905     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
1906     if (C1 && C2) {
1907       APInt C1Int = C1->getAPIntValue();
1908       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
1909 
1910       // We can materialise `c1 << c2` into an add immediate, so it's "free",
1911       // and the combine should happen, to potentially allow further combines
1912       // later.
1913       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
1914           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
1915         return true;
1916 
1917       // We can materialise `c1` in an add immediate, so it's "free", and the
1918       // combine should be prevented.
1919       if (C1Int.getMinSignedBits() <= 64 &&
1920           isLegalAddImmediate(C1Int.getSExtValue()))
1921         return false;
1922 
1923       // Neither constant will fit into an immediate, so find materialisation
1924       // costs.
1925       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
1926                                               Subtarget.is64Bit());
1927       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
1928           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
1929 
1930       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
1931       // combine should be prevented.
1932       if (C1Cost < ShiftedC1Cost)
1933         return false;
1934     }
1935   }
1936   return true;
1937 }
1938 
1939 bool RISCVTargetLowering::targetShrinkDemandedConstant(
1940     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1941     TargetLoweringOpt &TLO) const {
1942   // Delay this optimization as late as possible.
1943   if (!TLO.LegalOps)
1944     return false;
1945 
1946   EVT VT = Op.getValueType();
1947   if (VT.isVector())
1948     return false;
1949 
1950   // Only handle AND for now.
1951   if (Op.getOpcode() != ISD::AND)
1952     return false;
1953 
1954   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1955   if (!C)
1956     return false;
1957 
1958   const APInt &Mask = C->getAPIntValue();
1959 
1960   // Clear all non-demanded bits initially.
1961   APInt ShrunkMask = Mask & DemandedBits;
1962 
1963   // If the shrunk mask fits in sign extended 12 bits, let the target
1964   // independent code apply it.
1965   if (ShrunkMask.isSignedIntN(12))
1966     return false;
1967 
1968   // Try to make a smaller immediate by setting undemanded bits.
1969 
1970   // We need to be able to make a negative number through a combination of mask
1971   // and undemanded bits.
1972   APInt ExpandedMask = Mask | ~DemandedBits;
1973   if (!ExpandedMask.isNegative())
1974     return false;
1975 
1976   // What is the fewest number of bits we need to represent the negative number.
1977   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
1978 
1979   // Try to make a 12 bit negative immediate. If that fails try to make a 32
1980   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
1981   APInt NewMask = ShrunkMask;
1982   if (MinSignedBits <= 12)
1983     NewMask.setBitsFrom(11);
1984   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
1985     NewMask.setBitsFrom(31);
1986   else
1987     return false;
1988 
1989   // Sanity check that our new mask is a subset of the demanded mask.
1990   assert(NewMask.isSubsetOf(ExpandedMask));
1991 
1992   // If we aren't changing the mask, just return true to keep it and prevent
1993   // the caller from optimizing.
1994   if (NewMask == Mask)
1995     return true;
1996 
1997   // Replace the constant with the new mask.
1998   SDLoc DL(Op);
1999   SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
2000   SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
2001   return TLO.CombineTo(Op, NewOp);
2002 }
2003 
2004 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2005                                                         KnownBits &Known,
2006                                                         const APInt &DemandedElts,
2007                                                         const SelectionDAG &DAG,
2008                                                         unsigned Depth) const {
2009   unsigned Opc = Op.getOpcode();
2010   assert((Opc >= ISD::BUILTIN_OP_END ||
2011           Opc == ISD::INTRINSIC_WO_CHAIN ||
2012           Opc == ISD::INTRINSIC_W_CHAIN ||
2013           Opc == ISD::INTRINSIC_VOID) &&
2014          "Should use MaskedValueIsZero if you don't know whether Op"
2015          " is a target node!");
2016 
2017   Known.resetAll();
2018   switch (Opc) {
2019   default: break;
2020   case RISCVISD::READ_VLENB:
2021     // We assume VLENB is at least 8 bytes.
2022     // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
2023     Known.Zero.setLowBits(3);
2024     break;
2025   }
2026 }
2027 
2028 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
2029     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
2030     unsigned Depth) const {
2031   switch (Op.getOpcode()) {
2032   default:
2033     break;
2034   case RISCVISD::SLLW:
2035   case RISCVISD::SRAW:
2036   case RISCVISD::SRLW:
2037   case RISCVISD::DIVW:
2038   case RISCVISD::DIVUW:
2039   case RISCVISD::REMUW:
2040   case RISCVISD::ROLW:
2041   case RISCVISD::RORW:
2042   case RISCVISD::GREVIW:
2043   case RISCVISD::GORCIW:
2044   case RISCVISD::FSLW:
2045   case RISCVISD::FSRW:
2046     // TODO: As the result is sign-extended, this is conservatively correct. A
2047     // more precise answer could be calculated for SRAW depending on known
2048     // bits in the shift amount.
2049     return 33;
2050   case RISCVISD::VMV_X_S:
2051     // The number of sign bits of the scalar result is computed by obtaining the
2052     // element type of the input vector operand, substracting its width from the
2053     // XLEN, and then adding one (sign bit within the element type).
2054     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
2055   }
2056 
2057   return 1;
2058 }
2059 
2060 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
2061                                                   MachineBasicBlock *BB) {
2062   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
2063 
2064   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
2065   // Should the count have wrapped while it was being read, we need to try
2066   // again.
2067   // ...
2068   // read:
2069   // rdcycleh x3 # load high word of cycle
2070   // rdcycle  x2 # load low word of cycle
2071   // rdcycleh x4 # load high word of cycle
2072   // bne x3, x4, read # check if high word reads match, otherwise try again
2073   // ...
2074 
2075   MachineFunction &MF = *BB->getParent();
2076   const BasicBlock *LLVM_BB = BB->getBasicBlock();
2077   MachineFunction::iterator It = ++BB->getIterator();
2078 
2079   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
2080   MF.insert(It, LoopMBB);
2081 
2082   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
2083   MF.insert(It, DoneMBB);
2084 
2085   // Transfer the remainder of BB and its successor edges to DoneMBB.
2086   DoneMBB->splice(DoneMBB->begin(), BB,
2087                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
2088   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
2089 
2090   BB->addSuccessor(LoopMBB);
2091 
2092   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2093   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2094   Register LoReg = MI.getOperand(0).getReg();
2095   Register HiReg = MI.getOperand(1).getReg();
2096   DebugLoc DL = MI.getDebugLoc();
2097 
2098   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
2099   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
2100       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
2101       .addReg(RISCV::X0);
2102   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
2103       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
2104       .addReg(RISCV::X0);
2105   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
2106       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
2107       .addReg(RISCV::X0);
2108 
2109   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
2110       .addReg(HiReg)
2111       .addReg(ReadAgainReg)
2112       .addMBB(LoopMBB);
2113 
2114   LoopMBB->addSuccessor(LoopMBB);
2115   LoopMBB->addSuccessor(DoneMBB);
2116 
2117   MI.eraseFromParent();
2118 
2119   return DoneMBB;
2120 }
2121 
2122 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
2123                                              MachineBasicBlock *BB) {
2124   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
2125 
2126   MachineFunction &MF = *BB->getParent();
2127   DebugLoc DL = MI.getDebugLoc();
2128   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2129   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
2130   Register LoReg = MI.getOperand(0).getReg();
2131   Register HiReg = MI.getOperand(1).getReg();
2132   Register SrcReg = MI.getOperand(2).getReg();
2133   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
2134   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
2135 
2136   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
2137                           RI);
2138   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
2139   MachineMemOperand *MMOLo =
2140       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
2141   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
2142       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
2143   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
2144       .addFrameIndex(FI)
2145       .addImm(0)
2146       .addMemOperand(MMOLo);
2147   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
2148       .addFrameIndex(FI)
2149       .addImm(4)
2150       .addMemOperand(MMOHi);
2151   MI.eraseFromParent(); // The pseudo instruction is gone now.
2152   return BB;
2153 }
2154 
2155 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
2156                                                  MachineBasicBlock *BB) {
2157   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
2158          "Unexpected instruction");
2159 
2160   MachineFunction &MF = *BB->getParent();
2161   DebugLoc DL = MI.getDebugLoc();
2162   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2163   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
2164   Register DstReg = MI.getOperand(0).getReg();
2165   Register LoReg = MI.getOperand(1).getReg();
2166   Register HiReg = MI.getOperand(2).getReg();
2167   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
2168   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
2169 
2170   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
2171   MachineMemOperand *MMOLo =
2172       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
2173   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
2174       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
2175   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
2176       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
2177       .addFrameIndex(FI)
2178       .addImm(0)
2179       .addMemOperand(MMOLo);
2180   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
2181       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
2182       .addFrameIndex(FI)
2183       .addImm(4)
2184       .addMemOperand(MMOHi);
2185   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
2186   MI.eraseFromParent(); // The pseudo instruction is gone now.
2187   return BB;
2188 }
2189 
2190 static bool isSelectPseudo(MachineInstr &MI) {
2191   switch (MI.getOpcode()) {
2192   default:
2193     return false;
2194   case RISCV::Select_GPR_Using_CC_GPR:
2195   case RISCV::Select_FPR16_Using_CC_GPR:
2196   case RISCV::Select_FPR32_Using_CC_GPR:
2197   case RISCV::Select_FPR64_Using_CC_GPR:
2198     return true;
2199   }
2200 }
2201 
2202 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
2203                                            MachineBasicBlock *BB) {
2204   // To "insert" Select_* instructions, we actually have to insert the triangle
2205   // control-flow pattern.  The incoming instructions know the destination vreg
2206   // to set, the condition code register to branch on, the true/false values to
2207   // select between, and the condcode to use to select the appropriate branch.
2208   //
2209   // We produce the following control flow:
2210   //     HeadMBB
2211   //     |  \
2212   //     |  IfFalseMBB
2213   //     | /
2214   //    TailMBB
2215   //
2216   // When we find a sequence of selects we attempt to optimize their emission
2217   // by sharing the control flow. Currently we only handle cases where we have
2218   // multiple selects with the exact same condition (same LHS, RHS and CC).
2219   // The selects may be interleaved with other instructions if the other
2220   // instructions meet some requirements we deem safe:
2221   // - They are debug instructions. Otherwise,
2222   // - They do not have side-effects, do not access memory and their inputs do
2223   //   not depend on the results of the select pseudo-instructions.
2224   // The TrueV/FalseV operands of the selects cannot depend on the result of
2225   // previous selects in the sequence.
2226   // These conditions could be further relaxed. See the X86 target for a
2227   // related approach and more information.
2228   Register LHS = MI.getOperand(1).getReg();
2229   Register RHS = MI.getOperand(2).getReg();
2230   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
2231 
2232   SmallVector<MachineInstr *, 4> SelectDebugValues;
2233   SmallSet<Register, 4> SelectDests;
2234   SelectDests.insert(MI.getOperand(0).getReg());
2235 
2236   MachineInstr *LastSelectPseudo = &MI;
2237 
2238   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
2239        SequenceMBBI != E; ++SequenceMBBI) {
2240     if (SequenceMBBI->isDebugInstr())
2241       continue;
2242     else if (isSelectPseudo(*SequenceMBBI)) {
2243       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
2244           SequenceMBBI->getOperand(2).getReg() != RHS ||
2245           SequenceMBBI->getOperand(3).getImm() != CC ||
2246           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
2247           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
2248         break;
2249       LastSelectPseudo = &*SequenceMBBI;
2250       SequenceMBBI->collectDebugValues(SelectDebugValues);
2251       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
2252     } else {
2253       if (SequenceMBBI->hasUnmodeledSideEffects() ||
2254           SequenceMBBI->mayLoadOrStore())
2255         break;
2256       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
2257             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
2258           }))
2259         break;
2260     }
2261   }
2262 
2263   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
2264   const BasicBlock *LLVM_BB = BB->getBasicBlock();
2265   DebugLoc DL = MI.getDebugLoc();
2266   MachineFunction::iterator I = ++BB->getIterator();
2267 
2268   MachineBasicBlock *HeadMBB = BB;
2269   MachineFunction *F = BB->getParent();
2270   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
2271   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
2272 
2273   F->insert(I, IfFalseMBB);
2274   F->insert(I, TailMBB);
2275 
2276   // Transfer debug instructions associated with the selects to TailMBB.
2277   for (MachineInstr *DebugInstr : SelectDebugValues) {
2278     TailMBB->push_back(DebugInstr->removeFromParent());
2279   }
2280 
2281   // Move all instructions after the sequence to TailMBB.
2282   TailMBB->splice(TailMBB->end(), HeadMBB,
2283                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
2284   // Update machine-CFG edges by transferring all successors of the current
2285   // block to the new block which will contain the Phi nodes for the selects.
2286   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
2287   // Set the successors for HeadMBB.
2288   HeadMBB->addSuccessor(IfFalseMBB);
2289   HeadMBB->addSuccessor(TailMBB);
2290 
2291   // Insert appropriate branch.
2292   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
2293 
2294   BuildMI(HeadMBB, DL, TII.get(Opcode))
2295     .addReg(LHS)
2296     .addReg(RHS)
2297     .addMBB(TailMBB);
2298 
2299   // IfFalseMBB just falls through to TailMBB.
2300   IfFalseMBB->addSuccessor(TailMBB);
2301 
2302   // Create PHIs for all of the select pseudo-instructions.
2303   auto SelectMBBI = MI.getIterator();
2304   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
2305   auto InsertionPoint = TailMBB->begin();
2306   while (SelectMBBI != SelectEnd) {
2307     auto Next = std::next(SelectMBBI);
2308     if (isSelectPseudo(*SelectMBBI)) {
2309       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
2310       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
2311               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
2312           .addReg(SelectMBBI->getOperand(4).getReg())
2313           .addMBB(HeadMBB)
2314           .addReg(SelectMBBI->getOperand(5).getReg())
2315           .addMBB(IfFalseMBB);
2316       SelectMBBI->eraseFromParent();
2317     }
2318     SelectMBBI = Next;
2319   }
2320 
2321   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
2322   return TailMBB;
2323 }
2324 
2325 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
2326                                     int VLIndex, unsigned SEWIndex,
2327                                     RISCVVLMUL VLMul, bool WritesElement0) {
2328   MachineFunction &MF = *BB->getParent();
2329   DebugLoc DL = MI.getDebugLoc();
2330   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2331 
2332   unsigned SEW = MI.getOperand(SEWIndex).getImm();
2333   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
2334   RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
2335 
2336   MachineRegisterInfo &MRI = MF.getRegInfo();
2337 
2338   // VL and VTYPE are alive here.
2339   MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI));
2340 
2341   if (VLIndex >= 0) {
2342     // Set VL (rs1 != X0).
2343     Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2344     MIB.addReg(DestReg, RegState::Define | RegState::Dead)
2345         .addReg(MI.getOperand(VLIndex).getReg());
2346   } else
2347     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
2348     MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead)
2349         .addReg(RISCV::X0, RegState::Kill);
2350 
2351   // Default to tail agnostic unless the destination is tied to a source. In
2352   // that case the user would have some control over the tail values. The tail
2353   // policy is also ignored on instructions that only update element 0 like
2354   // vmv.s.x or reductions so use agnostic there to match the common case.
2355   // FIXME: This is conservatively correct, but we might want to detect that
2356   // the input is undefined.
2357   bool TailAgnostic = true;
2358   unsigned UseOpIdx;
2359   if (MI.isRegTiedToUseOperand(0, &UseOpIdx) && !WritesElement0) {
2360     TailAgnostic = false;
2361     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
2362     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
2363     MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
2364     if (UseMI && UseMI->isImplicitDef())
2365       TailAgnostic = true;
2366   }
2367 
2368   // For simplicity we reuse the vtype representation here.
2369   MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
2370                                      /*TailAgnostic*/ TailAgnostic,
2371                                      /*MaskAgnostic*/ false));
2372 
2373   // Remove (now) redundant operands from pseudo
2374   MI.getOperand(SEWIndex).setImm(-1);
2375   if (VLIndex >= 0) {
2376     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
2377     MI.getOperand(VLIndex).setIsKill(false);
2378   }
2379 
2380   return BB;
2381 }
2382 
2383 MachineBasicBlock *
2384 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2385                                                  MachineBasicBlock *BB) const {
2386   uint64_t TSFlags = MI.getDesc().TSFlags;
2387 
2388   if (TSFlags & RISCVII::HasSEWOpMask) {
2389     unsigned NumOperands = MI.getNumExplicitOperands();
2390     int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
2391     unsigned SEWIndex = NumOperands - 1;
2392     bool WritesElement0 = TSFlags & RISCVII::WritesElement0Mask;
2393 
2394     RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
2395                                                RISCVII::VLMulShift);
2396     return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, WritesElement0);
2397   }
2398 
2399   switch (MI.getOpcode()) {
2400   default:
2401     llvm_unreachable("Unexpected instr type to insert");
2402   case RISCV::ReadCycleWide:
2403     assert(!Subtarget.is64Bit() &&
2404            "ReadCycleWrite is only to be used on riscv32");
2405     return emitReadCycleWidePseudo(MI, BB);
2406   case RISCV::Select_GPR_Using_CC_GPR:
2407   case RISCV::Select_FPR16_Using_CC_GPR:
2408   case RISCV::Select_FPR32_Using_CC_GPR:
2409   case RISCV::Select_FPR64_Using_CC_GPR:
2410     return emitSelectPseudo(MI, BB);
2411   case RISCV::BuildPairF64Pseudo:
2412     return emitBuildPairF64Pseudo(MI, BB);
2413   case RISCV::SplitF64Pseudo:
2414     return emitSplitF64Pseudo(MI, BB);
2415   }
2416 }
2417 
2418 // Calling Convention Implementation.
2419 // The expectations for frontend ABI lowering vary from target to target.
2420 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
2421 // details, but this is a longer term goal. For now, we simply try to keep the
2422 // role of the frontend as simple and well-defined as possible. The rules can
2423 // be summarised as:
2424 // * Never split up large scalar arguments. We handle them here.
2425 // * If a hardfloat calling convention is being used, and the struct may be
2426 // passed in a pair of registers (fp+fp, int+fp), and both registers are
2427 // available, then pass as two separate arguments. If either the GPRs or FPRs
2428 // are exhausted, then pass according to the rule below.
2429 // * If a struct could never be passed in registers or directly in a stack
2430 // slot (as it is larger than 2*XLEN and the floating point rules don't
2431 // apply), then pass it using a pointer with the byval attribute.
2432 // * If a struct is less than 2*XLEN, then coerce to either a two-element
2433 // word-sized array or a 2*XLEN scalar (depending on alignment).
2434 // * The frontend can determine whether a struct is returned by reference or
2435 // not based on its size and fields. If it will be returned by reference, the
2436 // frontend must modify the prototype so a pointer with the sret annotation is
2437 // passed as the first argument. This is not necessary for large scalar
2438 // returns.
2439 // * Struct return values and varargs should be coerced to structs containing
2440 // register-size fields in the same situations they would be for fixed
2441 // arguments.
2442 
2443 static const MCPhysReg ArgGPRs[] = {
2444   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
2445   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
2446 };
2447 static const MCPhysReg ArgFPR16s[] = {
2448   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
2449   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
2450 };
2451 static const MCPhysReg ArgFPR32s[] = {
2452   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
2453   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
2454 };
2455 static const MCPhysReg ArgFPR64s[] = {
2456   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
2457   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
2458 };
2459 // This is an interim calling convention and it may be changed in the future.
2460 static const MCPhysReg ArgVRs[] = {
2461   RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, RISCV::V20,
2462   RISCV::V21, RISCV::V22, RISCV::V23
2463 };
2464 static const MCPhysReg ArgVRM2s[] = {
2465   RISCV::V16M2, RISCV::V18M2, RISCV::V20M2, RISCV::V22M2
2466 };
2467 static const MCPhysReg ArgVRM4s[] = {RISCV::V16M4, RISCV::V20M4};
2468 static const MCPhysReg ArgVRM8s[] = {RISCV::V16M8};
2469 
2470 // Pass a 2*XLEN argument that has been split into two XLEN values through
2471 // registers or the stack as necessary.
2472 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
2473                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
2474                                 MVT ValVT2, MVT LocVT2,
2475                                 ISD::ArgFlagsTy ArgFlags2) {
2476   unsigned XLenInBytes = XLen / 8;
2477   if (Register Reg = State.AllocateReg(ArgGPRs)) {
2478     // At least one half can be passed via register.
2479     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
2480                                      VA1.getLocVT(), CCValAssign::Full));
2481   } else {
2482     // Both halves must be passed on the stack, with proper alignment.
2483     Align StackAlign =
2484         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
2485     State.addLoc(
2486         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
2487                             State.AllocateStack(XLenInBytes, StackAlign),
2488                             VA1.getLocVT(), CCValAssign::Full));
2489     State.addLoc(CCValAssign::getMem(
2490         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
2491         LocVT2, CCValAssign::Full));
2492     return false;
2493   }
2494 
2495   if (Register Reg = State.AllocateReg(ArgGPRs)) {
2496     // The second half can also be passed via register.
2497     State.addLoc(
2498         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
2499   } else {
2500     // The second half is passed via the stack, without additional alignment.
2501     State.addLoc(CCValAssign::getMem(
2502         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
2503         LocVT2, CCValAssign::Full));
2504   }
2505 
2506   return false;
2507 }
2508 
2509 // Implements the RISC-V calling convention. Returns true upon failure.
2510 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
2511                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
2512                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
2513                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
2514                      Optional<unsigned> FirstMaskArgument) {
2515   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
2516   assert(XLen == 32 || XLen == 64);
2517   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
2518 
2519   // Any return value split in to more than two values can't be returned
2520   // directly.
2521   if (IsRet && ValNo > 1)
2522     return true;
2523 
2524   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
2525   // variadic argument, or if no F16/F32 argument registers are available.
2526   bool UseGPRForF16_F32 = true;
2527   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
2528   // variadic argument, or if no F64 argument registers are available.
2529   bool UseGPRForF64 = true;
2530 
2531   switch (ABI) {
2532   default:
2533     llvm_unreachable("Unexpected ABI");
2534   case RISCVABI::ABI_ILP32:
2535   case RISCVABI::ABI_LP64:
2536     break;
2537   case RISCVABI::ABI_ILP32F:
2538   case RISCVABI::ABI_LP64F:
2539     UseGPRForF16_F32 = !IsFixed;
2540     break;
2541   case RISCVABI::ABI_ILP32D:
2542   case RISCVABI::ABI_LP64D:
2543     UseGPRForF16_F32 = !IsFixed;
2544     UseGPRForF64 = !IsFixed;
2545     break;
2546   }
2547 
2548   // FPR16, FPR32, and FPR64 alias each other.
2549   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
2550     UseGPRForF16_F32 = true;
2551     UseGPRForF64 = true;
2552   }
2553 
2554   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
2555   // similar local variables rather than directly checking against the target
2556   // ABI.
2557 
2558   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
2559     LocVT = XLenVT;
2560     LocInfo = CCValAssign::BCvt;
2561   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
2562     LocVT = MVT::i64;
2563     LocInfo = CCValAssign::BCvt;
2564   }
2565 
2566   // If this is a variadic argument, the RISC-V calling convention requires
2567   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
2568   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
2569   // be used regardless of whether the original argument was split during
2570   // legalisation or not. The argument will not be passed by registers if the
2571   // original type is larger than 2*XLEN, so the register alignment rule does
2572   // not apply.
2573   unsigned TwoXLenInBytes = (2 * XLen) / 8;
2574   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
2575       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
2576     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
2577     // Skip 'odd' register if necessary.
2578     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
2579       State.AllocateReg(ArgGPRs);
2580   }
2581 
2582   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
2583   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
2584       State.getPendingArgFlags();
2585 
2586   assert(PendingLocs.size() == PendingArgFlags.size() &&
2587          "PendingLocs and PendingArgFlags out of sync");
2588 
2589   // Handle passing f64 on RV32D with a soft float ABI or when floating point
2590   // registers are exhausted.
2591   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
2592     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
2593            "Can't lower f64 if it is split");
2594     // Depending on available argument GPRS, f64 may be passed in a pair of
2595     // GPRs, split between a GPR and the stack, or passed completely on the
2596     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
2597     // cases.
2598     Register Reg = State.AllocateReg(ArgGPRs);
2599     LocVT = MVT::i32;
2600     if (!Reg) {
2601       unsigned StackOffset = State.AllocateStack(8, Align(8));
2602       State.addLoc(
2603           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
2604       return false;
2605     }
2606     if (!State.AllocateReg(ArgGPRs))
2607       State.AllocateStack(4, Align(4));
2608     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2609     return false;
2610   }
2611 
2612   // Split arguments might be passed indirectly, so keep track of the pending
2613   // values.
2614   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
2615     LocVT = XLenVT;
2616     LocInfo = CCValAssign::Indirect;
2617     PendingLocs.push_back(
2618         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
2619     PendingArgFlags.push_back(ArgFlags);
2620     if (!ArgFlags.isSplitEnd()) {
2621       return false;
2622     }
2623   }
2624 
2625   // If the split argument only had two elements, it should be passed directly
2626   // in registers or on the stack.
2627   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
2628     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
2629     // Apply the normal calling convention rules to the first half of the
2630     // split argument.
2631     CCValAssign VA = PendingLocs[0];
2632     ISD::ArgFlagsTy AF = PendingArgFlags[0];
2633     PendingLocs.clear();
2634     PendingArgFlags.clear();
2635     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
2636                                ArgFlags);
2637   }
2638 
2639   // Allocate to a register if possible, or else a stack slot.
2640   Register Reg;
2641   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
2642     Reg = State.AllocateReg(ArgFPR16s);
2643   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
2644     Reg = State.AllocateReg(ArgFPR32s);
2645   else if (ValVT == MVT::f64 && !UseGPRForF64)
2646     Reg = State.AllocateReg(ArgFPR64s);
2647   else if (ValVT.isScalableVector()) {
2648     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
2649     if (RC == &RISCV::VRRegClass) {
2650       // Assign the first mask argument to V0.
2651       // This is an interim calling convention and it may be changed in the
2652       // future.
2653       if (FirstMaskArgument.hasValue() &&
2654           ValNo == FirstMaskArgument.getValue()) {
2655         Reg = State.AllocateReg(RISCV::V0);
2656       } else {
2657         Reg = State.AllocateReg(ArgVRs);
2658       }
2659     } else if (RC == &RISCV::VRM2RegClass) {
2660       Reg = State.AllocateReg(ArgVRM2s);
2661     } else if (RC == &RISCV::VRM4RegClass) {
2662       Reg = State.AllocateReg(ArgVRM4s);
2663     } else if (RC == &RISCV::VRM8RegClass) {
2664       Reg = State.AllocateReg(ArgVRM8s);
2665     } else {
2666       llvm_unreachable("Unhandled class register for ValueType");
2667     }
2668     if (!Reg) {
2669       LocInfo = CCValAssign::Indirect;
2670       // Try using a GPR to pass the address
2671       Reg = State.AllocateReg(ArgGPRs);
2672       LocVT = XLenVT;
2673     }
2674   } else
2675     Reg = State.AllocateReg(ArgGPRs);
2676   unsigned StackOffset =
2677       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
2678 
2679   // If we reach this point and PendingLocs is non-empty, we must be at the
2680   // end of a split argument that must be passed indirectly.
2681   if (!PendingLocs.empty()) {
2682     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
2683     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
2684 
2685     for (auto &It : PendingLocs) {
2686       if (Reg)
2687         It.convertToReg(Reg);
2688       else
2689         It.convertToMem(StackOffset);
2690       State.addLoc(It);
2691     }
2692     PendingLocs.clear();
2693     PendingArgFlags.clear();
2694     return false;
2695   }
2696 
2697   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
2698           (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
2699          "Expected an XLenVT or scalable vector types at this stage");
2700 
2701   if (Reg) {
2702     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2703     return false;
2704   }
2705 
2706   // When a floating-point value is passed on the stack, no bit-conversion is
2707   // needed.
2708   if (ValVT.isFloatingPoint()) {
2709     LocVT = ValVT;
2710     LocInfo = CCValAssign::Full;
2711   }
2712   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
2713   return false;
2714 }
2715 
2716 template <typename ArgTy>
2717 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
2718   for (const auto &ArgIdx : enumerate(Args)) {
2719     MVT ArgVT = ArgIdx.value().VT;
2720     if (ArgVT.isScalableVector() &&
2721         ArgVT.getVectorElementType().SimpleTy == MVT::i1)
2722       return ArgIdx.index();
2723   }
2724   return None;
2725 }
2726 
2727 void RISCVTargetLowering::analyzeInputArgs(
2728     MachineFunction &MF, CCState &CCInfo,
2729     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
2730   unsigned NumArgs = Ins.size();
2731   FunctionType *FType = MF.getFunction().getFunctionType();
2732 
2733   Optional<unsigned> FirstMaskArgument;
2734   if (Subtarget.hasStdExtV())
2735     FirstMaskArgument = preAssignMask(Ins);
2736 
2737   for (unsigned i = 0; i != NumArgs; ++i) {
2738     MVT ArgVT = Ins[i].VT;
2739     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
2740 
2741     Type *ArgTy = nullptr;
2742     if (IsRet)
2743       ArgTy = FType->getReturnType();
2744     else if (Ins[i].isOrigArg())
2745       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
2746 
2747     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2748     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
2749                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
2750                  FirstMaskArgument)) {
2751       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
2752                         << EVT(ArgVT).getEVTString() << '\n');
2753       llvm_unreachable(nullptr);
2754     }
2755   }
2756 }
2757 
2758 void RISCVTargetLowering::analyzeOutputArgs(
2759     MachineFunction &MF, CCState &CCInfo,
2760     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
2761     CallLoweringInfo *CLI) const {
2762   unsigned NumArgs = Outs.size();
2763 
2764   Optional<unsigned> FirstMaskArgument;
2765   if (Subtarget.hasStdExtV())
2766     FirstMaskArgument = preAssignMask(Outs);
2767 
2768   for (unsigned i = 0; i != NumArgs; i++) {
2769     MVT ArgVT = Outs[i].VT;
2770     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2771     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
2772 
2773     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2774     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
2775                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
2776                  FirstMaskArgument)) {
2777       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
2778                         << EVT(ArgVT).getEVTString() << "\n");
2779       llvm_unreachable(nullptr);
2780     }
2781   }
2782 }
2783 
2784 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
2785 // values.
2786 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
2787                                    const CCValAssign &VA, const SDLoc &DL) {
2788   switch (VA.getLocInfo()) {
2789   default:
2790     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2791   case CCValAssign::Full:
2792     break;
2793   case CCValAssign::BCvt:
2794     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
2795       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
2796     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2797       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
2798     else
2799       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
2800     break;
2801   }
2802   return Val;
2803 }
2804 
2805 // The caller is responsible for loading the full value if the argument is
2806 // passed with CCValAssign::Indirect.
2807 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
2808                                 const CCValAssign &VA, const SDLoc &DL,
2809                                 const RISCVTargetLowering &TLI) {
2810   MachineFunction &MF = DAG.getMachineFunction();
2811   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2812   EVT LocVT = VA.getLocVT();
2813   SDValue Val;
2814   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
2815   Register VReg = RegInfo.createVirtualRegister(RC);
2816   RegInfo.addLiveIn(VA.getLocReg(), VReg);
2817   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2818 
2819   if (VA.getLocInfo() == CCValAssign::Indirect)
2820     return Val;
2821 
2822   return convertLocVTToValVT(DAG, Val, VA, DL);
2823 }
2824 
2825 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
2826                                    const CCValAssign &VA, const SDLoc &DL) {
2827   EVT LocVT = VA.getLocVT();
2828 
2829   switch (VA.getLocInfo()) {
2830   default:
2831     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2832   case CCValAssign::Full:
2833     break;
2834   case CCValAssign::BCvt:
2835     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
2836       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
2837     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2838       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
2839     else
2840       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
2841     break;
2842   }
2843   return Val;
2844 }
2845 
2846 // The caller is responsible for loading the full value if the argument is
2847 // passed with CCValAssign::Indirect.
2848 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
2849                                 const CCValAssign &VA, const SDLoc &DL) {
2850   MachineFunction &MF = DAG.getMachineFunction();
2851   MachineFrameInfo &MFI = MF.getFrameInfo();
2852   EVT LocVT = VA.getLocVT();
2853   EVT ValVT = VA.getValVT();
2854   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
2855   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2856                                  VA.getLocMemOffset(), /*Immutable=*/true);
2857   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2858   SDValue Val;
2859 
2860   ISD::LoadExtType ExtType;
2861   switch (VA.getLocInfo()) {
2862   default:
2863     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2864   case CCValAssign::Full:
2865   case CCValAssign::Indirect:
2866   case CCValAssign::BCvt:
2867     ExtType = ISD::NON_EXTLOAD;
2868     break;
2869   }
2870   Val = DAG.getExtLoad(
2871       ExtType, DL, LocVT, Chain, FIN,
2872       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
2873   return Val;
2874 }
2875 
2876 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
2877                                        const CCValAssign &VA, const SDLoc &DL) {
2878   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
2879          "Unexpected VA");
2880   MachineFunction &MF = DAG.getMachineFunction();
2881   MachineFrameInfo &MFI = MF.getFrameInfo();
2882   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2883 
2884   if (VA.isMemLoc()) {
2885     // f64 is passed on the stack.
2886     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
2887     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
2888     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
2889                        MachinePointerInfo::getFixedStack(MF, FI));
2890   }
2891 
2892   assert(VA.isRegLoc() && "Expected register VA assignment");
2893 
2894   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2895   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
2896   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
2897   SDValue Hi;
2898   if (VA.getLocReg() == RISCV::X17) {
2899     // Second half of f64 is passed on the stack.
2900     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
2901     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
2902     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
2903                      MachinePointerInfo::getFixedStack(MF, FI));
2904   } else {
2905     // Second half of f64 is passed in another GPR.
2906     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2907     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
2908     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
2909   }
2910   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
2911 }
2912 
2913 // FastCC has less than 1% performance improvement for some particular
2914 // benchmark. But theoretically, it may has benenfit for some cases.
2915 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
2916                             CCValAssign::LocInfo LocInfo,
2917                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
2918 
2919   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2920     // X5 and X6 might be used for save-restore libcall.
2921     static const MCPhysReg GPRList[] = {
2922         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
2923         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
2924         RISCV::X29, RISCV::X30, RISCV::X31};
2925     if (unsigned Reg = State.AllocateReg(GPRList)) {
2926       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2927       return false;
2928     }
2929   }
2930 
2931   if (LocVT == MVT::f16) {
2932     static const MCPhysReg FPR16List[] = {
2933         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
2934         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
2935         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
2936         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
2937     if (unsigned Reg = State.AllocateReg(FPR16List)) {
2938       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2939       return false;
2940     }
2941   }
2942 
2943   if (LocVT == MVT::f32) {
2944     static const MCPhysReg FPR32List[] = {
2945         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
2946         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
2947         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
2948         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
2949     if (unsigned Reg = State.AllocateReg(FPR32List)) {
2950       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2951       return false;
2952     }
2953   }
2954 
2955   if (LocVT == MVT::f64) {
2956     static const MCPhysReg FPR64List[] = {
2957         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
2958         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
2959         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
2960         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
2961     if (unsigned Reg = State.AllocateReg(FPR64List)) {
2962       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2963       return false;
2964     }
2965   }
2966 
2967   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
2968     unsigned Offset4 = State.AllocateStack(4, Align(4));
2969     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
2970     return false;
2971   }
2972 
2973   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
2974     unsigned Offset5 = State.AllocateStack(8, Align(8));
2975     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
2976     return false;
2977   }
2978 
2979   return true; // CC didn't match.
2980 }
2981 
2982 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
2983                          CCValAssign::LocInfo LocInfo,
2984                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
2985 
2986   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2987     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
2988     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
2989     static const MCPhysReg GPRList[] = {
2990         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
2991         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
2992     if (unsigned Reg = State.AllocateReg(GPRList)) {
2993       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2994       return false;
2995     }
2996   }
2997 
2998   if (LocVT == MVT::f32) {
2999     // Pass in STG registers: F1, ..., F6
3000     //                        fs0 ... fs5
3001     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
3002                                           RISCV::F18_F, RISCV::F19_F,
3003                                           RISCV::F20_F, RISCV::F21_F};
3004     if (unsigned Reg = State.AllocateReg(FPR32List)) {
3005       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3006       return false;
3007     }
3008   }
3009 
3010   if (LocVT == MVT::f64) {
3011     // Pass in STG registers: D1, ..., D6
3012     //                        fs6 ... fs11
3013     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
3014                                           RISCV::F24_D, RISCV::F25_D,
3015                                           RISCV::F26_D, RISCV::F27_D};
3016     if (unsigned Reg = State.AllocateReg(FPR64List)) {
3017       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3018       return false;
3019     }
3020   }
3021 
3022   report_fatal_error("No registers left in GHC calling convention");
3023   return true;
3024 }
3025 
3026 // Transform physical registers into virtual registers.
3027 SDValue RISCVTargetLowering::LowerFormalArguments(
3028     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3029     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3030     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3031 
3032   MachineFunction &MF = DAG.getMachineFunction();
3033 
3034   switch (CallConv) {
3035   default:
3036     report_fatal_error("Unsupported calling convention");
3037   case CallingConv::C:
3038   case CallingConv::Fast:
3039     break;
3040   case CallingConv::GHC:
3041     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
3042         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
3043       report_fatal_error(
3044         "GHC calling convention requires the F and D instruction set extensions");
3045   }
3046 
3047   const Function &Func = MF.getFunction();
3048   if (Func.hasFnAttribute("interrupt")) {
3049     if (!Func.arg_empty())
3050       report_fatal_error(
3051         "Functions with the interrupt attribute cannot have arguments!");
3052 
3053     StringRef Kind =
3054       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
3055 
3056     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
3057       report_fatal_error(
3058         "Function interrupt attribute argument not supported!");
3059   }
3060 
3061   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3062   MVT XLenVT = Subtarget.getXLenVT();
3063   unsigned XLenInBytes = Subtarget.getXLen() / 8;
3064   // Used with vargs to acumulate store chains.
3065   std::vector<SDValue> OutChains;
3066 
3067   // Assign locations to all of the incoming arguments.
3068   SmallVector<CCValAssign, 16> ArgLocs;
3069   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3070 
3071   if (CallConv == CallingConv::Fast)
3072     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
3073   else if (CallConv == CallingConv::GHC)
3074     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
3075   else
3076     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
3077 
3078   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3079     CCValAssign &VA = ArgLocs[i];
3080     SDValue ArgValue;
3081     // Passing f64 on RV32D with a soft float ABI must be handled as a special
3082     // case.
3083     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
3084       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
3085     else if (VA.isRegLoc())
3086       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3087     else
3088       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3089 
3090     if (VA.getLocInfo() == CCValAssign::Indirect) {
3091       // If the original argument was split and passed by reference (e.g. i128
3092       // on RV32), we need to load all parts of it here (using the same
3093       // address).
3094       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3095                                    MachinePointerInfo()));
3096       unsigned ArgIndex = Ins[i].OrigArgIndex;
3097       assert(Ins[i].PartOffset == 0);
3098       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3099         CCValAssign &PartVA = ArgLocs[i + 1];
3100         unsigned PartOffset = Ins[i + 1].PartOffset;
3101         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
3102                                       DAG.getIntPtrConstant(PartOffset, DL));
3103         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3104                                      MachinePointerInfo()));
3105         ++i;
3106       }
3107       continue;
3108     }
3109     InVals.push_back(ArgValue);
3110   }
3111 
3112   if (IsVarArg) {
3113     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
3114     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3115     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
3116     MachineFrameInfo &MFI = MF.getFrameInfo();
3117     MachineRegisterInfo &RegInfo = MF.getRegInfo();
3118     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
3119 
3120     // Offset of the first variable argument from stack pointer, and size of
3121     // the vararg save area. For now, the varargs save area is either zero or
3122     // large enough to hold a0-a7.
3123     int VaArgOffset, VarArgsSaveSize;
3124 
3125     // If all registers are allocated, then all varargs must be passed on the
3126     // stack and we don't need to save any argregs.
3127     if (ArgRegs.size() == Idx) {
3128       VaArgOffset = CCInfo.getNextStackOffset();
3129       VarArgsSaveSize = 0;
3130     } else {
3131       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
3132       VaArgOffset = -VarArgsSaveSize;
3133     }
3134 
3135     // Record the frame index of the first variable argument
3136     // which is a value necessary to VASTART.
3137     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
3138     RVFI->setVarArgsFrameIndex(FI);
3139 
3140     // If saving an odd number of registers then create an extra stack slot to
3141     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
3142     // offsets to even-numbered registered remain 2*XLEN-aligned.
3143     if (Idx % 2) {
3144       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
3145       VarArgsSaveSize += XLenInBytes;
3146     }
3147 
3148     // Copy the integer registers that may have been used for passing varargs
3149     // to the vararg save area.
3150     for (unsigned I = Idx; I < ArgRegs.size();
3151          ++I, VaArgOffset += XLenInBytes) {
3152       const Register Reg = RegInfo.createVirtualRegister(RC);
3153       RegInfo.addLiveIn(ArgRegs[I], Reg);
3154       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
3155       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
3156       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3157       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3158                                    MachinePointerInfo::getFixedStack(MF, FI));
3159       cast<StoreSDNode>(Store.getNode())
3160           ->getMemOperand()
3161           ->setValue((Value *)nullptr);
3162       OutChains.push_back(Store);
3163     }
3164     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
3165   }
3166 
3167   // All stores are grouped in one node to allow the matching between
3168   // the size of Ins and InVals. This only happens for vararg functions.
3169   if (!OutChains.empty()) {
3170     OutChains.push_back(Chain);
3171     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3172   }
3173 
3174   return Chain;
3175 }
3176 
3177 /// isEligibleForTailCallOptimization - Check whether the call is eligible
3178 /// for tail call optimization.
3179 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
3180 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
3181     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
3182     const SmallVector<CCValAssign, 16> &ArgLocs) const {
3183 
3184   auto &Callee = CLI.Callee;
3185   auto CalleeCC = CLI.CallConv;
3186   auto &Outs = CLI.Outs;
3187   auto &Caller = MF.getFunction();
3188   auto CallerCC = Caller.getCallingConv();
3189 
3190   // Exception-handling functions need a special set of instructions to
3191   // indicate a return to the hardware. Tail-calling another function would
3192   // probably break this.
3193   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
3194   // should be expanded as new function attributes are introduced.
3195   if (Caller.hasFnAttribute("interrupt"))
3196     return false;
3197 
3198   // Do not tail call opt if the stack is used to pass parameters.
3199   if (CCInfo.getNextStackOffset() != 0)
3200     return false;
3201 
3202   // Do not tail call opt if any parameters need to be passed indirectly.
3203   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
3204   // passed indirectly. So the address of the value will be passed in a
3205   // register, or if not available, then the address is put on the stack. In
3206   // order to pass indirectly, space on the stack often needs to be allocated
3207   // in order to store the value. In this case the CCInfo.getNextStackOffset()
3208   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
3209   // are passed CCValAssign::Indirect.
3210   for (auto &VA : ArgLocs)
3211     if (VA.getLocInfo() == CCValAssign::Indirect)
3212       return false;
3213 
3214   // Do not tail call opt if either caller or callee uses struct return
3215   // semantics.
3216   auto IsCallerStructRet = Caller.hasStructRetAttr();
3217   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3218   if (IsCallerStructRet || IsCalleeStructRet)
3219     return false;
3220 
3221   // Externally-defined functions with weak linkage should not be
3222   // tail-called. The behaviour of branch instructions in this situation (as
3223   // used for tail calls) is implementation-defined, so we cannot rely on the
3224   // linker replacing the tail call with a return.
3225   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3226     const GlobalValue *GV = G->getGlobal();
3227     if (GV->hasExternalWeakLinkage())
3228       return false;
3229   }
3230 
3231   // The callee has to preserve all registers the caller needs to preserve.
3232   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3233   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3234   if (CalleeCC != CallerCC) {
3235     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3236     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3237       return false;
3238   }
3239 
3240   // Byval parameters hand the function a pointer directly into the stack area
3241   // we want to reuse during a tail call. Working around this *is* possible
3242   // but less efficient and uglier in LowerCall.
3243   for (auto &Arg : Outs)
3244     if (Arg.Flags.isByVal())
3245       return false;
3246 
3247   return true;
3248 }
3249 
3250 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
3251 // and output parameter nodes.
3252 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
3253                                        SmallVectorImpl<SDValue> &InVals) const {
3254   SelectionDAG &DAG = CLI.DAG;
3255   SDLoc &DL = CLI.DL;
3256   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3257   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3258   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3259   SDValue Chain = CLI.Chain;
3260   SDValue Callee = CLI.Callee;
3261   bool &IsTailCall = CLI.IsTailCall;
3262   CallingConv::ID CallConv = CLI.CallConv;
3263   bool IsVarArg = CLI.IsVarArg;
3264   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3265   MVT XLenVT = Subtarget.getXLenVT();
3266 
3267   MachineFunction &MF = DAG.getMachineFunction();
3268 
3269   // Analyze the operands of the call, assigning locations to each operand.
3270   SmallVector<CCValAssign, 16> ArgLocs;
3271   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3272 
3273   if (CallConv == CallingConv::Fast)
3274     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
3275   else if (CallConv == CallingConv::GHC)
3276     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
3277   else
3278     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
3279 
3280   // Check if it's really possible to do a tail call.
3281   if (IsTailCall)
3282     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
3283 
3284   if (IsTailCall)
3285     ++NumTailCalls;
3286   else if (CLI.CB && CLI.CB->isMustTailCall())
3287     report_fatal_error("failed to perform tail call elimination on a call "
3288                        "site marked musttail");
3289 
3290   // Get a count of how many bytes are to be pushed on the stack.
3291   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
3292 
3293   // Create local copies for byval args
3294   SmallVector<SDValue, 8> ByValArgs;
3295   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3296     ISD::ArgFlagsTy Flags = Outs[i].Flags;
3297     if (!Flags.isByVal())
3298       continue;
3299 
3300     SDValue Arg = OutVals[i];
3301     unsigned Size = Flags.getByValSize();
3302     Align Alignment = Flags.getNonZeroByValAlign();
3303 
3304     int FI =
3305         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
3306     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3307     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
3308 
3309     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
3310                           /*IsVolatile=*/false,
3311                           /*AlwaysInline=*/false, IsTailCall,
3312                           MachinePointerInfo(), MachinePointerInfo());
3313     ByValArgs.push_back(FIPtr);
3314   }
3315 
3316   if (!IsTailCall)
3317     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
3318 
3319   // Copy argument values to their designated locations.
3320   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
3321   SmallVector<SDValue, 8> MemOpChains;
3322   SDValue StackPtr;
3323   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
3324     CCValAssign &VA = ArgLocs[i];
3325     SDValue ArgValue = OutVals[i];
3326     ISD::ArgFlagsTy Flags = Outs[i].Flags;
3327 
3328     // Handle passing f64 on RV32D with a soft float ABI as a special case.
3329     bool IsF64OnRV32DSoftABI =
3330         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
3331     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
3332       SDValue SplitF64 = DAG.getNode(
3333           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
3334       SDValue Lo = SplitF64.getValue(0);
3335       SDValue Hi = SplitF64.getValue(1);
3336 
3337       Register RegLo = VA.getLocReg();
3338       RegsToPass.push_back(std::make_pair(RegLo, Lo));
3339 
3340       if (RegLo == RISCV::X17) {
3341         // Second half of f64 is passed on the stack.
3342         // Work out the address of the stack slot.
3343         if (!StackPtr.getNode())
3344           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
3345         // Emit the store.
3346         MemOpChains.push_back(
3347             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
3348       } else {
3349         // Second half of f64 is passed in another GPR.
3350         assert(RegLo < RISCV::X31 && "Invalid register pair");
3351         Register RegHigh = RegLo + 1;
3352         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
3353       }
3354       continue;
3355     }
3356 
3357     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
3358     // as any other MemLoc.
3359 
3360     // Promote the value if needed.
3361     // For now, only handle fully promoted and indirect arguments.
3362     if (VA.getLocInfo() == CCValAssign::Indirect) {
3363       // Store the argument in a stack slot and pass its address.
3364       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
3365       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3366       MemOpChains.push_back(
3367           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
3368                        MachinePointerInfo::getFixedStack(MF, FI)));
3369       // If the original argument was split (e.g. i128), we need
3370       // to store all parts of it here (and pass just one address).
3371       unsigned ArgIndex = Outs[i].OrigArgIndex;
3372       assert(Outs[i].PartOffset == 0);
3373       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
3374         SDValue PartValue = OutVals[i + 1];
3375         unsigned PartOffset = Outs[i + 1].PartOffset;
3376         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
3377                                       DAG.getIntPtrConstant(PartOffset, DL));
3378         MemOpChains.push_back(
3379             DAG.getStore(Chain, DL, PartValue, Address,
3380                          MachinePointerInfo::getFixedStack(MF, FI)));
3381         ++i;
3382       }
3383       ArgValue = SpillSlot;
3384     } else {
3385       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
3386     }
3387 
3388     // Use local copy if it is a byval arg.
3389     if (Flags.isByVal())
3390       ArgValue = ByValArgs[j++];
3391 
3392     if (VA.isRegLoc()) {
3393       // Queue up the argument copies and emit them at the end.
3394       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
3395     } else {
3396       assert(VA.isMemLoc() && "Argument not register or memory");
3397       assert(!IsTailCall && "Tail call not allowed if stack is used "
3398                             "for passing parameters");
3399 
3400       // Work out the address of the stack slot.
3401       if (!StackPtr.getNode())
3402         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
3403       SDValue Address =
3404           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
3405                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
3406 
3407       // Emit the store.
3408       MemOpChains.push_back(
3409           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
3410     }
3411   }
3412 
3413   // Join the stores, which are independent of one another.
3414   if (!MemOpChains.empty())
3415     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3416 
3417   SDValue Glue;
3418 
3419   // Build a sequence of copy-to-reg nodes, chained and glued together.
3420   for (auto &Reg : RegsToPass) {
3421     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
3422     Glue = Chain.getValue(1);
3423   }
3424 
3425   // Validate that none of the argument registers have been marked as
3426   // reserved, if so report an error. Do the same for the return address if this
3427   // is not a tailcall.
3428   validateCCReservedRegs(RegsToPass, MF);
3429   if (!IsTailCall &&
3430       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
3431     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3432         MF.getFunction(),
3433         "Return address register required, but has been reserved."});
3434 
3435   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
3436   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
3437   // split it and then direct call can be matched by PseudoCALL.
3438   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
3439     const GlobalValue *GV = S->getGlobal();
3440 
3441     unsigned OpFlags = RISCVII::MO_CALL;
3442     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
3443       OpFlags = RISCVII::MO_PLT;
3444 
3445     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
3446   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3447     unsigned OpFlags = RISCVII::MO_CALL;
3448 
3449     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
3450                                                  nullptr))
3451       OpFlags = RISCVII::MO_PLT;
3452 
3453     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
3454   }
3455 
3456   // The first call operand is the chain and the second is the target address.
3457   SmallVector<SDValue, 8> Ops;
3458   Ops.push_back(Chain);
3459   Ops.push_back(Callee);
3460 
3461   // Add argument registers to the end of the list so that they are
3462   // known live into the call.
3463   for (auto &Reg : RegsToPass)
3464     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
3465 
3466   if (!IsTailCall) {
3467     // Add a register mask operand representing the call-preserved registers.
3468     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3469     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
3470     assert(Mask && "Missing call preserved mask for calling convention");
3471     Ops.push_back(DAG.getRegisterMask(Mask));
3472   }
3473 
3474   // Glue the call to the argument copies, if any.
3475   if (Glue.getNode())
3476     Ops.push_back(Glue);
3477 
3478   // Emit the call.
3479   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3480 
3481   if (IsTailCall) {
3482     MF.getFrameInfo().setHasTailCall();
3483     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
3484   }
3485 
3486   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
3487   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
3488   Glue = Chain.getValue(1);
3489 
3490   // Mark the end of the call, which is glued to the call itself.
3491   Chain = DAG.getCALLSEQ_END(Chain,
3492                              DAG.getConstant(NumBytes, DL, PtrVT, true),
3493                              DAG.getConstant(0, DL, PtrVT, true),
3494                              Glue, DL);
3495   Glue = Chain.getValue(1);
3496 
3497   // Assign locations to each value returned by this call.
3498   SmallVector<CCValAssign, 16> RVLocs;
3499   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
3500   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
3501 
3502   // Copy all of the result registers out of their specified physreg.
3503   for (auto &VA : RVLocs) {
3504     // Copy the value out
3505     SDValue RetValue =
3506         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
3507     // Glue the RetValue to the end of the call sequence
3508     Chain = RetValue.getValue(1);
3509     Glue = RetValue.getValue(2);
3510 
3511     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
3512       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
3513       SDValue RetValue2 =
3514           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
3515       Chain = RetValue2.getValue(1);
3516       Glue = RetValue2.getValue(2);
3517       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
3518                              RetValue2);
3519     }
3520 
3521     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
3522 
3523     InVals.push_back(RetValue);
3524   }
3525 
3526   return Chain;
3527 }
3528 
3529 bool RISCVTargetLowering::CanLowerReturn(
3530     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
3531     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3532   SmallVector<CCValAssign, 16> RVLocs;
3533   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
3534 
3535   Optional<unsigned> FirstMaskArgument;
3536   if (Subtarget.hasStdExtV())
3537     FirstMaskArgument = preAssignMask(Outs);
3538 
3539   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3540     MVT VT = Outs[i].VT;
3541     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3542     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3543     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
3544                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
3545                  *this, FirstMaskArgument))
3546       return false;
3547   }
3548   return true;
3549 }
3550 
3551 SDValue
3552 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3553                                  bool IsVarArg,
3554                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
3555                                  const SmallVectorImpl<SDValue> &OutVals,
3556                                  const SDLoc &DL, SelectionDAG &DAG) const {
3557   const MachineFunction &MF = DAG.getMachineFunction();
3558   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
3559 
3560   // Stores the assignment of the return value to a location.
3561   SmallVector<CCValAssign, 16> RVLocs;
3562 
3563   // Info about the registers and stack slot.
3564   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
3565                  *DAG.getContext());
3566 
3567   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
3568                     nullptr);
3569 
3570   if (CallConv == CallingConv::GHC && !RVLocs.empty())
3571     report_fatal_error("GHC functions return void only");
3572 
3573   SDValue Glue;
3574   SmallVector<SDValue, 4> RetOps(1, Chain);
3575 
3576   // Copy the result values into the output registers.
3577   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
3578     SDValue Val = OutVals[i];
3579     CCValAssign &VA = RVLocs[i];
3580     assert(VA.isRegLoc() && "Can only return in registers!");
3581 
3582     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
3583       // Handle returning f64 on RV32D with a soft float ABI.
3584       assert(VA.isRegLoc() && "Expected return via registers");
3585       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
3586                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
3587       SDValue Lo = SplitF64.getValue(0);
3588       SDValue Hi = SplitF64.getValue(1);
3589       Register RegLo = VA.getLocReg();
3590       assert(RegLo < RISCV::X31 && "Invalid register pair");
3591       Register RegHi = RegLo + 1;
3592 
3593       if (STI.isRegisterReservedByUser(RegLo) ||
3594           STI.isRegisterReservedByUser(RegHi))
3595         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3596             MF.getFunction(),
3597             "Return value register required, but has been reserved."});
3598 
3599       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
3600       Glue = Chain.getValue(1);
3601       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
3602       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
3603       Glue = Chain.getValue(1);
3604       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
3605     } else {
3606       // Handle a 'normal' return.
3607       Val = convertValVTToLocVT(DAG, Val, VA, DL);
3608       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
3609 
3610       if (STI.isRegisterReservedByUser(VA.getLocReg()))
3611         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3612             MF.getFunction(),
3613             "Return value register required, but has been reserved."});
3614 
3615       // Guarantee that all emitted copies are stuck together.
3616       Glue = Chain.getValue(1);
3617       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3618     }
3619   }
3620 
3621   RetOps[0] = Chain; // Update chain.
3622 
3623   // Add the glue node if we have it.
3624   if (Glue.getNode()) {
3625     RetOps.push_back(Glue);
3626   }
3627 
3628   // Interrupt service routines use different return instructions.
3629   const Function &Func = DAG.getMachineFunction().getFunction();
3630   if (Func.hasFnAttribute("interrupt")) {
3631     if (!Func.getReturnType()->isVoidTy())
3632       report_fatal_error(
3633           "Functions with the interrupt attribute must have void return type!");
3634 
3635     MachineFunction &MF = DAG.getMachineFunction();
3636     StringRef Kind =
3637       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
3638 
3639     unsigned RetOpc;
3640     if (Kind == "user")
3641       RetOpc = RISCVISD::URET_FLAG;
3642     else if (Kind == "supervisor")
3643       RetOpc = RISCVISD::SRET_FLAG;
3644     else
3645       RetOpc = RISCVISD::MRET_FLAG;
3646 
3647     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
3648   }
3649 
3650   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
3651 }
3652 
3653 void RISCVTargetLowering::validateCCReservedRegs(
3654     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
3655     MachineFunction &MF) const {
3656   const Function &F = MF.getFunction();
3657   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
3658 
3659   if (llvm::any_of(Regs, [&STI](auto Reg) {
3660         return STI.isRegisterReservedByUser(Reg.first);
3661       }))
3662     F.getContext().diagnose(DiagnosticInfoUnsupported{
3663         F, "Argument register required, but has been reserved."});
3664 }
3665 
3666 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3667   return CI->isTailCall();
3668 }
3669 
3670 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
3671 #define NODE_NAME_CASE(NODE)                                                   \
3672   case RISCVISD::NODE:                                                         \
3673     return "RISCVISD::" #NODE;
3674   // clang-format off
3675   switch ((RISCVISD::NodeType)Opcode) {
3676   case RISCVISD::FIRST_NUMBER:
3677     break;
3678   NODE_NAME_CASE(RET_FLAG)
3679   NODE_NAME_CASE(URET_FLAG)
3680   NODE_NAME_CASE(SRET_FLAG)
3681   NODE_NAME_CASE(MRET_FLAG)
3682   NODE_NAME_CASE(CALL)
3683   NODE_NAME_CASE(SELECT_CC)
3684   NODE_NAME_CASE(BuildPairF64)
3685   NODE_NAME_CASE(SplitF64)
3686   NODE_NAME_CASE(TAIL)
3687   NODE_NAME_CASE(SLLW)
3688   NODE_NAME_CASE(SRAW)
3689   NODE_NAME_CASE(SRLW)
3690   NODE_NAME_CASE(DIVW)
3691   NODE_NAME_CASE(DIVUW)
3692   NODE_NAME_CASE(REMUW)
3693   NODE_NAME_CASE(ROLW)
3694   NODE_NAME_CASE(RORW)
3695   NODE_NAME_CASE(FSLW)
3696   NODE_NAME_CASE(FSRW)
3697   NODE_NAME_CASE(FMV_H_X)
3698   NODE_NAME_CASE(FMV_X_ANYEXTH)
3699   NODE_NAME_CASE(FMV_W_X_RV64)
3700   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
3701   NODE_NAME_CASE(READ_CYCLE_WIDE)
3702   NODE_NAME_CASE(GREVI)
3703   NODE_NAME_CASE(GREVIW)
3704   NODE_NAME_CASE(GORCI)
3705   NODE_NAME_CASE(GORCIW)
3706   NODE_NAME_CASE(VMV_X_S)
3707   NODE_NAME_CASE(SPLAT_VECTOR_I64)
3708   NODE_NAME_CASE(READ_VLENB)
3709   NODE_NAME_CASE(TRUNCATE_VECTOR)
3710   }
3711   // clang-format on
3712   return nullptr;
3713 #undef NODE_NAME_CASE
3714 }
3715 
3716 /// getConstraintType - Given a constraint letter, return the type of
3717 /// constraint it is for this target.
3718 RISCVTargetLowering::ConstraintType
3719 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
3720   if (Constraint.size() == 1) {
3721     switch (Constraint[0]) {
3722     default:
3723       break;
3724     case 'f':
3725       return C_RegisterClass;
3726     case 'I':
3727     case 'J':
3728     case 'K':
3729       return C_Immediate;
3730     case 'A':
3731       return C_Memory;
3732     }
3733   }
3734   return TargetLowering::getConstraintType(Constraint);
3735 }
3736 
3737 std::pair<unsigned, const TargetRegisterClass *>
3738 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3739                                                   StringRef Constraint,
3740                                                   MVT VT) const {
3741   // First, see if this is a constraint that directly corresponds to a
3742   // RISCV register class.
3743   if (Constraint.size() == 1) {
3744     switch (Constraint[0]) {
3745     case 'r':
3746       return std::make_pair(0U, &RISCV::GPRRegClass);
3747     case 'f':
3748       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
3749         return std::make_pair(0U, &RISCV::FPR16RegClass);
3750       if (Subtarget.hasStdExtF() && VT == MVT::f32)
3751         return std::make_pair(0U, &RISCV::FPR32RegClass);
3752       if (Subtarget.hasStdExtD() && VT == MVT::f64)
3753         return std::make_pair(0U, &RISCV::FPR64RegClass);
3754       break;
3755     default:
3756       break;
3757     }
3758   }
3759 
3760   // Clang will correctly decode the usage of register name aliases into their
3761   // official names. However, other frontends like `rustc` do not. This allows
3762   // users of these frontends to use the ABI names for registers in LLVM-style
3763   // register constraints.
3764   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
3765                                .Case("{zero}", RISCV::X0)
3766                                .Case("{ra}", RISCV::X1)
3767                                .Case("{sp}", RISCV::X2)
3768                                .Case("{gp}", RISCV::X3)
3769                                .Case("{tp}", RISCV::X4)
3770                                .Case("{t0}", RISCV::X5)
3771                                .Case("{t1}", RISCV::X6)
3772                                .Case("{t2}", RISCV::X7)
3773                                .Cases("{s0}", "{fp}", RISCV::X8)
3774                                .Case("{s1}", RISCV::X9)
3775                                .Case("{a0}", RISCV::X10)
3776                                .Case("{a1}", RISCV::X11)
3777                                .Case("{a2}", RISCV::X12)
3778                                .Case("{a3}", RISCV::X13)
3779                                .Case("{a4}", RISCV::X14)
3780                                .Case("{a5}", RISCV::X15)
3781                                .Case("{a6}", RISCV::X16)
3782                                .Case("{a7}", RISCV::X17)
3783                                .Case("{s2}", RISCV::X18)
3784                                .Case("{s3}", RISCV::X19)
3785                                .Case("{s4}", RISCV::X20)
3786                                .Case("{s5}", RISCV::X21)
3787                                .Case("{s6}", RISCV::X22)
3788                                .Case("{s7}", RISCV::X23)
3789                                .Case("{s8}", RISCV::X24)
3790                                .Case("{s9}", RISCV::X25)
3791                                .Case("{s10}", RISCV::X26)
3792                                .Case("{s11}", RISCV::X27)
3793                                .Case("{t3}", RISCV::X28)
3794                                .Case("{t4}", RISCV::X29)
3795                                .Case("{t5}", RISCV::X30)
3796                                .Case("{t6}", RISCV::X31)
3797                                .Default(RISCV::NoRegister);
3798   if (XRegFromAlias != RISCV::NoRegister)
3799     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
3800 
3801   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
3802   // TableGen record rather than the AsmName to choose registers for InlineAsm
3803   // constraints, plus we want to match those names to the widest floating point
3804   // register type available, manually select floating point registers here.
3805   //
3806   // The second case is the ABI name of the register, so that frontends can also
3807   // use the ABI names in register constraint lists.
3808   if (Subtarget.hasStdExtF()) {
3809     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
3810                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
3811                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
3812                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
3813                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
3814                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
3815                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
3816                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
3817                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
3818                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
3819                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
3820                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
3821                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
3822                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
3823                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
3824                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
3825                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
3826                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
3827                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
3828                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
3829                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
3830                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
3831                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
3832                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
3833                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
3834                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
3835                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
3836                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
3837                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
3838                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
3839                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
3840                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
3841                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
3842                         .Default(RISCV::NoRegister);
3843     if (FReg != RISCV::NoRegister) {
3844       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
3845       if (Subtarget.hasStdExtD()) {
3846         unsigned RegNo = FReg - RISCV::F0_F;
3847         unsigned DReg = RISCV::F0_D + RegNo;
3848         return std::make_pair(DReg, &RISCV::FPR64RegClass);
3849       }
3850       return std::make_pair(FReg, &RISCV::FPR32RegClass);
3851     }
3852   }
3853 
3854   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3855 }
3856 
3857 unsigned
3858 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
3859   // Currently only support length 1 constraints.
3860   if (ConstraintCode.size() == 1) {
3861     switch (ConstraintCode[0]) {
3862     case 'A':
3863       return InlineAsm::Constraint_A;
3864     default:
3865       break;
3866     }
3867   }
3868 
3869   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
3870 }
3871 
3872 void RISCVTargetLowering::LowerAsmOperandForConstraint(
3873     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
3874     SelectionDAG &DAG) const {
3875   // Currently only support length 1 constraints.
3876   if (Constraint.length() == 1) {
3877     switch (Constraint[0]) {
3878     case 'I':
3879       // Validate & create a 12-bit signed immediate operand.
3880       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3881         uint64_t CVal = C->getSExtValue();
3882         if (isInt<12>(CVal))
3883           Ops.push_back(
3884               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
3885       }
3886       return;
3887     case 'J':
3888       // Validate & create an integer zero operand.
3889       if (auto *C = dyn_cast<ConstantSDNode>(Op))
3890         if (C->getZExtValue() == 0)
3891           Ops.push_back(
3892               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
3893       return;
3894     case 'K':
3895       // Validate & create a 5-bit unsigned immediate operand.
3896       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3897         uint64_t CVal = C->getZExtValue();
3898         if (isUInt<5>(CVal))
3899           Ops.push_back(
3900               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
3901       }
3902       return;
3903     default:
3904       break;
3905     }
3906   }
3907   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3908 }
3909 
3910 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
3911                                                    Instruction *Inst,
3912                                                    AtomicOrdering Ord) const {
3913   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
3914     return Builder.CreateFence(Ord);
3915   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
3916     return Builder.CreateFence(AtomicOrdering::Release);
3917   return nullptr;
3918 }
3919 
3920 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
3921                                                     Instruction *Inst,
3922                                                     AtomicOrdering Ord) const {
3923   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
3924     return Builder.CreateFence(AtomicOrdering::Acquire);
3925   return nullptr;
3926 }
3927 
3928 TargetLowering::AtomicExpansionKind
3929 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
3930   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
3931   // point operations can't be used in an lr/sc sequence without breaking the
3932   // forward-progress guarantee.
3933   if (AI->isFloatingPointOperation())
3934     return AtomicExpansionKind::CmpXChg;
3935 
3936   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
3937   if (Size == 8 || Size == 16)
3938     return AtomicExpansionKind::MaskedIntrinsic;
3939   return AtomicExpansionKind::None;
3940 }
3941 
3942 static Intrinsic::ID
3943 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
3944   if (XLen == 32) {
3945     switch (BinOp) {
3946     default:
3947       llvm_unreachable("Unexpected AtomicRMW BinOp");
3948     case AtomicRMWInst::Xchg:
3949       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
3950     case AtomicRMWInst::Add:
3951       return Intrinsic::riscv_masked_atomicrmw_add_i32;
3952     case AtomicRMWInst::Sub:
3953       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
3954     case AtomicRMWInst::Nand:
3955       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
3956     case AtomicRMWInst::Max:
3957       return Intrinsic::riscv_masked_atomicrmw_max_i32;
3958     case AtomicRMWInst::Min:
3959       return Intrinsic::riscv_masked_atomicrmw_min_i32;
3960     case AtomicRMWInst::UMax:
3961       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
3962     case AtomicRMWInst::UMin:
3963       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
3964     }
3965   }
3966 
3967   if (XLen == 64) {
3968     switch (BinOp) {
3969     default:
3970       llvm_unreachable("Unexpected AtomicRMW BinOp");
3971     case AtomicRMWInst::Xchg:
3972       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
3973     case AtomicRMWInst::Add:
3974       return Intrinsic::riscv_masked_atomicrmw_add_i64;
3975     case AtomicRMWInst::Sub:
3976       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
3977     case AtomicRMWInst::Nand:
3978       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
3979     case AtomicRMWInst::Max:
3980       return Intrinsic::riscv_masked_atomicrmw_max_i64;
3981     case AtomicRMWInst::Min:
3982       return Intrinsic::riscv_masked_atomicrmw_min_i64;
3983     case AtomicRMWInst::UMax:
3984       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
3985     case AtomicRMWInst::UMin:
3986       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
3987     }
3988   }
3989 
3990   llvm_unreachable("Unexpected XLen\n");
3991 }
3992 
3993 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
3994     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
3995     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
3996   unsigned XLen = Subtarget.getXLen();
3997   Value *Ordering =
3998       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
3999   Type *Tys[] = {AlignedAddr->getType()};
4000   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
4001       AI->getModule(),
4002       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
4003 
4004   if (XLen == 64) {
4005     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4006     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4007     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4008   }
4009 
4010   Value *Result;
4011 
4012   // Must pass the shift amount needed to sign extend the loaded value prior
4013   // to performing a signed comparison for min/max. ShiftAmt is the number of
4014   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
4015   // is the number of bits to left+right shift the value in order to
4016   // sign-extend.
4017   if (AI->getOperation() == AtomicRMWInst::Min ||
4018       AI->getOperation() == AtomicRMWInst::Max) {
4019     const DataLayout &DL = AI->getModule()->getDataLayout();
4020     unsigned ValWidth =
4021         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4022     Value *SextShamt =
4023         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
4024     Result = Builder.CreateCall(LrwOpScwLoop,
4025                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4026   } else {
4027     Result =
4028         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4029   }
4030 
4031   if (XLen == 64)
4032     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4033   return Result;
4034 }
4035 
4036 TargetLowering::AtomicExpansionKind
4037 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
4038     AtomicCmpXchgInst *CI) const {
4039   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
4040   if (Size == 8 || Size == 16)
4041     return AtomicExpansionKind::MaskedIntrinsic;
4042   return AtomicExpansionKind::None;
4043 }
4044 
4045 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
4046     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4047     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4048   unsigned XLen = Subtarget.getXLen();
4049   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
4050   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
4051   if (XLen == 64) {
4052     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4053     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4054     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4055     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
4056   }
4057   Type *Tys[] = {AlignedAddr->getType()};
4058   Function *MaskedCmpXchg =
4059       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4060   Value *Result = Builder.CreateCall(
4061       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
4062   if (XLen == 64)
4063     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4064   return Result;
4065 }
4066 
4067 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
4068                                                      EVT VT) const {
4069   VT = VT.getScalarType();
4070 
4071   if (!VT.isSimple())
4072     return false;
4073 
4074   switch (VT.getSimpleVT().SimpleTy) {
4075   case MVT::f16:
4076     return Subtarget.hasStdExtZfh();
4077   case MVT::f32:
4078     return Subtarget.hasStdExtF();
4079   case MVT::f64:
4080     return Subtarget.hasStdExtD();
4081   default:
4082     break;
4083   }
4084 
4085   return false;
4086 }
4087 
4088 Register RISCVTargetLowering::getExceptionPointerRegister(
4089     const Constant *PersonalityFn) const {
4090   return RISCV::X10;
4091 }
4092 
4093 Register RISCVTargetLowering::getExceptionSelectorRegister(
4094     const Constant *PersonalityFn) const {
4095   return RISCV::X11;
4096 }
4097 
4098 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
4099   // Return false to suppress the unnecessary extensions if the LibCall
4100   // arguments or return value is f32 type for LP64 ABI.
4101   RISCVABI::ABI ABI = Subtarget.getTargetABI();
4102   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
4103     return false;
4104 
4105   return true;
4106 }
4107 
4108 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
4109                                                  SDValue C) const {
4110   // Check integral scalar types.
4111   if (VT.isScalarInteger()) {
4112     // Omit the optimization if the sub target has the M extension and the data
4113     // size exceeds XLen.
4114     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
4115       return false;
4116     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
4117       // Break the MUL to a SLLI and an ADD/SUB.
4118       const APInt &Imm = ConstNode->getAPIntValue();
4119       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
4120           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
4121         return true;
4122       // Omit the following optimization if the sub target has the M extension
4123       // and the data size >= XLen.
4124       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
4125         return false;
4126       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
4127       // a pair of LUI/ADDI.
4128       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
4129         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
4130         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
4131             (1 - ImmS).isPowerOf2())
4132         return true;
4133       }
4134     }
4135   }
4136 
4137   return false;
4138 }
4139 
4140 #define GET_REGISTER_MATCHER
4141 #include "RISCVGenAsmMatcher.inc"
4142 
4143 Register
4144 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
4145                                        const MachineFunction &MF) const {
4146   Register Reg = MatchRegisterAltName(RegName);
4147   if (Reg == RISCV::NoRegister)
4148     Reg = MatchRegisterName(RegName);
4149   if (Reg == RISCV::NoRegister)
4150     report_fatal_error(
4151         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
4152   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
4153   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
4154     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
4155                              StringRef(RegName) + "\"."));
4156   return Reg;
4157 }
4158 
4159 namespace llvm {
4160 namespace RISCVVIntrinsicsTable {
4161 
4162 #define GET_RISCVVIntrinsicsTable_IMPL
4163 #include "RISCVGenSearchableTables.inc"
4164 
4165 } // namespace RISCVVIntrinsicsTable
4166 } // namespace llvm
4167