1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtZfh())
87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88   if (Subtarget.hasStdExtF())
89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90   if (Subtarget.hasStdExtD())
91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93   static const MVT::SimpleValueType BoolVecVTs[] = {
94       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
95       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
96   static const MVT::SimpleValueType IntVecVTs[] = {
97       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
98       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
99       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
100       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
101       MVT::nxv4i64, MVT::nxv8i64};
102   static const MVT::SimpleValueType F16VecVTs[] = {
103       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
104       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
105   static const MVT::SimpleValueType F32VecVTs[] = {
106       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
107   static const MVT::SimpleValueType F64VecVTs[] = {
108       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
109 
110   if (Subtarget.hasStdExtV()) {
111     auto addRegClassForRVV = [this](MVT VT) {
112       unsigned Size = VT.getSizeInBits().getKnownMinValue();
113       assert(Size <= 512 && isPowerOf2_32(Size));
114       const TargetRegisterClass *RC;
115       if (Size <= 64)
116         RC = &RISCV::VRRegClass;
117       else if (Size == 128)
118         RC = &RISCV::VRM2RegClass;
119       else if (Size == 256)
120         RC = &RISCV::VRM4RegClass;
121       else
122         RC = &RISCV::VRM8RegClass;
123 
124       addRegisterClass(VT, RC);
125     };
126 
127     for (MVT VT : BoolVecVTs)
128       addRegClassForRVV(VT);
129     for (MVT VT : IntVecVTs)
130       addRegClassForRVV(VT);
131 
132     if (Subtarget.hasStdExtZfh())
133       for (MVT VT : F16VecVTs)
134         addRegClassForRVV(VT);
135 
136     if (Subtarget.hasStdExtF())
137       for (MVT VT : F32VecVTs)
138         addRegClassForRVV(VT);
139 
140     if (Subtarget.hasStdExtD())
141       for (MVT VT : F64VecVTs)
142         addRegClassForRVV(VT);
143 
144     if (Subtarget.useRVVForFixedLengthVectors()) {
145       auto addRegClassForFixedVectors = [this](MVT VT) {
146         unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
147         const TargetRegisterClass *RC;
148         if (LMul == 1)
149           RC = &RISCV::VRRegClass;
150         else if (LMul == 2)
151           RC = &RISCV::VRM2RegClass;
152         else if (LMul == 4)
153           RC = &RISCV::VRM4RegClass;
154         else if (LMul == 8)
155           RC = &RISCV::VRM8RegClass;
156         else
157           llvm_unreachable("Unexpected LMul!");
158 
159         addRegisterClass(VT, RC);
160       };
161       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
162         if (useRVVForFixedLengthVectorVT(VT))
163           addRegClassForFixedVectors(VT);
164 
165       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
166         if (useRVVForFixedLengthVectorVT(VT))
167           addRegClassForFixedVectors(VT);
168     }
169   }
170 
171   // Compute derived properties from the register classes.
172   computeRegisterProperties(STI.getRegisterInfo());
173 
174   setStackPointerRegisterToSaveRestore(RISCV::X2);
175 
176   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
177     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
178 
179   // TODO: add all necessary setOperationAction calls.
180   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
181 
182   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
183   setOperationAction(ISD::BR_CC, XLenVT, Expand);
184   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
185 
186   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
187   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
188 
189   setOperationAction(ISD::VASTART, MVT::Other, Custom);
190   setOperationAction(ISD::VAARG, MVT::Other, Expand);
191   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
192   setOperationAction(ISD::VAEND, MVT::Other, Expand);
193 
194   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
195   if (!Subtarget.hasStdExtZbb()) {
196     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
197     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
198   }
199 
200   if (Subtarget.is64Bit()) {
201     setOperationAction(ISD::ADD, MVT::i32, Custom);
202     setOperationAction(ISD::SUB, MVT::i32, Custom);
203     setOperationAction(ISD::SHL, MVT::i32, Custom);
204     setOperationAction(ISD::SRA, MVT::i32, Custom);
205     setOperationAction(ISD::SRL, MVT::i32, Custom);
206   }
207 
208   if (!Subtarget.hasStdExtM()) {
209     setOperationAction(ISD::MUL, XLenVT, Expand);
210     setOperationAction(ISD::MULHS, XLenVT, Expand);
211     setOperationAction(ISD::MULHU, XLenVT, Expand);
212     setOperationAction(ISD::SDIV, XLenVT, Expand);
213     setOperationAction(ISD::UDIV, XLenVT, Expand);
214     setOperationAction(ISD::SREM, XLenVT, Expand);
215     setOperationAction(ISD::UREM, XLenVT, Expand);
216   }
217 
218   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
219     setOperationAction(ISD::MUL, MVT::i32, Custom);
220 
221     setOperationAction(ISD::SDIV, MVT::i8, Custom);
222     setOperationAction(ISD::UDIV, MVT::i8, Custom);
223     setOperationAction(ISD::UREM, MVT::i8, Custom);
224     setOperationAction(ISD::SDIV, MVT::i16, Custom);
225     setOperationAction(ISD::UDIV, MVT::i16, Custom);
226     setOperationAction(ISD::UREM, MVT::i16, Custom);
227     setOperationAction(ISD::SDIV, MVT::i32, Custom);
228     setOperationAction(ISD::UDIV, MVT::i32, Custom);
229     setOperationAction(ISD::UREM, MVT::i32, Custom);
230   }
231 
232   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
233   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
234   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
235   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
236 
237   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
238   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
239   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
240 
241   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
242     if (Subtarget.is64Bit()) {
243       setOperationAction(ISD::ROTL, MVT::i32, Custom);
244       setOperationAction(ISD::ROTR, MVT::i32, Custom);
245     }
246   } else {
247     setOperationAction(ISD::ROTL, XLenVT, Expand);
248     setOperationAction(ISD::ROTR, XLenVT, Expand);
249   }
250 
251   if (Subtarget.hasStdExtZbp()) {
252     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
253     // more combining.
254     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
255     setOperationAction(ISD::BSWAP, XLenVT, Custom);
256 
257     if (Subtarget.is64Bit()) {
258       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
259       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
260     }
261   } else {
262     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
263     // pattern match it directly in isel.
264     setOperationAction(ISD::BSWAP, XLenVT,
265                        Subtarget.hasStdExtZbb() ? Legal : Expand);
266   }
267 
268   if (Subtarget.hasStdExtZbb()) {
269     setOperationAction(ISD::SMIN, XLenVT, Legal);
270     setOperationAction(ISD::SMAX, XLenVT, Legal);
271     setOperationAction(ISD::UMIN, XLenVT, Legal);
272     setOperationAction(ISD::UMAX, XLenVT, Legal);
273   } else {
274     setOperationAction(ISD::CTTZ, XLenVT, Expand);
275     setOperationAction(ISD::CTLZ, XLenVT, Expand);
276     setOperationAction(ISD::CTPOP, XLenVT, Expand);
277   }
278 
279   if (Subtarget.hasStdExtZbt()) {
280     setOperationAction(ISD::FSHL, XLenVT, Custom);
281     setOperationAction(ISD::FSHR, XLenVT, Custom);
282     setOperationAction(ISD::SELECT, XLenVT, Legal);
283 
284     if (Subtarget.is64Bit()) {
285       setOperationAction(ISD::FSHL, MVT::i32, Custom);
286       setOperationAction(ISD::FSHR, MVT::i32, Custom);
287     }
288   } else {
289     setOperationAction(ISD::SELECT, XLenVT, Custom);
290   }
291 
292   ISD::CondCode FPCCToExpand[] = {
293       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
294       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
295       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
296 
297   ISD::NodeType FPOpToExpand[] = {
298       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
299       ISD::FP_TO_FP16};
300 
301   if (Subtarget.hasStdExtZfh())
302     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
303 
304   if (Subtarget.hasStdExtZfh()) {
305     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
306     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
307     for (auto CC : FPCCToExpand)
308       setCondCodeAction(CC, MVT::f16, Expand);
309     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
310     setOperationAction(ISD::SELECT, MVT::f16, Custom);
311     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
312     for (auto Op : FPOpToExpand)
313       setOperationAction(Op, MVT::f16, Expand);
314   }
315 
316   if (Subtarget.hasStdExtF()) {
317     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
318     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
319     for (auto CC : FPCCToExpand)
320       setCondCodeAction(CC, MVT::f32, Expand);
321     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
322     setOperationAction(ISD::SELECT, MVT::f32, Custom);
323     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
324     for (auto Op : FPOpToExpand)
325       setOperationAction(Op, MVT::f32, Expand);
326     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
327     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
328   }
329 
330   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
331     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
332 
333   if (Subtarget.hasStdExtD()) {
334     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
335     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
336     for (auto CC : FPCCToExpand)
337       setCondCodeAction(CC, MVT::f64, Expand);
338     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
339     setOperationAction(ISD::SELECT, MVT::f64, Custom);
340     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
341     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
342     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
343     for (auto Op : FPOpToExpand)
344       setOperationAction(Op, MVT::f64, Expand);
345     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
346     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
347   }
348 
349   if (Subtarget.is64Bit()) {
350     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
351     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
352     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
353     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
354   }
355 
356   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
357   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
358   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
359   setOperationAction(ISD::JumpTable, XLenVT, Custom);
360 
361   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
362 
363   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
364   // Unfortunately this can't be determined just from the ISA naming string.
365   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
366                      Subtarget.is64Bit() ? Legal : Custom);
367 
368   setOperationAction(ISD::TRAP, MVT::Other, Legal);
369   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
370   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
371 
372   if (Subtarget.hasStdExtA()) {
373     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
374     setMinCmpXchgSizeInBits(32);
375   } else {
376     setMaxAtomicSizeInBitsSupported(0);
377   }
378 
379   setBooleanContents(ZeroOrOneBooleanContent);
380 
381   if (Subtarget.hasStdExtV()) {
382     setBooleanVectorContents(ZeroOrOneBooleanContent);
383 
384     setOperationAction(ISD::VSCALE, XLenVT, Custom);
385 
386     // RVV intrinsics may have illegal operands.
387     // We also need to custom legalize vmv.x.s.
388     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
389     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
390     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
391     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
392     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
393     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
394 
395     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
396 
397     if (Subtarget.is64Bit()) {
398       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
399       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
400     } else {
401       // We must custom-lower certain vXi64 operations on RV32 due to the vector
402       // element type being illegal.
403       setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
404       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
405       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
406 
407       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
408       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
409       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
410       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
411       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
412       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
413       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
414       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
415     }
416 
417     for (MVT VT : BoolVecVTs) {
418       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
419 
420       // Mask VTs are custom-expanded into a series of standard nodes
421       setOperationAction(ISD::TRUNCATE, VT, Custom);
422     }
423 
424     for (MVT VT : IntVecVTs) {
425       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
426 
427       setOperationAction(ISD::SMIN, VT, Legal);
428       setOperationAction(ISD::SMAX, VT, Legal);
429       setOperationAction(ISD::UMIN, VT, Legal);
430       setOperationAction(ISD::UMAX, VT, Legal);
431 
432       setOperationAction(ISD::ROTL, VT, Expand);
433       setOperationAction(ISD::ROTR, VT, Expand);
434 
435       // Custom-lower extensions and truncations from/to mask types.
436       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
437       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
438       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
439 
440       // RVV has native int->float & float->int conversions where the
441       // element type sizes are within one power-of-two of each other. Any
442       // wider distances between type sizes have to be lowered as sequences
443       // which progressively narrow the gap in stages.
444       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
445       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
446       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
447       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
448 
449       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR"
450       // nodes which truncate by one power of two at a time.
451       setOperationAction(ISD::TRUNCATE, VT, Custom);
452 
453       // Custom-lower insert/extract operations to simplify patterns.
454       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
455       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
456 
457       // Custom-lower reduction operations to set up the corresponding custom
458       // nodes' operands.
459       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
460       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
461       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
462       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
463       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
464       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
465       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
466       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
467 
468       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
469       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
470     }
471 
472     // Expand various CCs to best match the RVV ISA, which natively supports UNE
473     // but no other unordered comparisons, and supports all ordered comparisons
474     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
475     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
476     // and we pattern-match those back to the "original", swapping operands once
477     // more. This way we catch both operations and both "vf" and "fv" forms with
478     // fewer patterns.
479     ISD::CondCode VFPCCToExpand[] = {
480         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
481         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
482         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
483     };
484 
485     // Sets common operation actions on RVV floating-point vector types.
486     const auto SetCommonVFPActions = [&](MVT VT) {
487       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
488       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
489       // sizes are within one power-of-two of each other. Therefore conversions
490       // between vXf16 and vXf64 must be lowered as sequences which convert via
491       // vXf32.
492       setOperationAction(ISD::FP_ROUND, VT, Custom);
493       setOperationAction(ISD::FP_EXTEND, VT, Custom);
494       // Custom-lower insert/extract operations to simplify patterns.
495       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
496       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
497       // Expand various condition codes (explained above).
498       for (auto CC : VFPCCToExpand)
499         setCondCodeAction(CC, VT, Expand);
500 
501       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
502       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
503       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
504 
505       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
506       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
507     };
508 
509     if (Subtarget.hasStdExtZfh())
510       for (MVT VT : F16VecVTs)
511         SetCommonVFPActions(VT);
512 
513     if (Subtarget.hasStdExtF())
514       for (MVT VT : F32VecVTs)
515         SetCommonVFPActions(VT);
516 
517     if (Subtarget.hasStdExtD())
518       for (MVT VT : F64VecVTs)
519         SetCommonVFPActions(VT);
520 
521     if (Subtarget.useRVVForFixedLengthVectors()) {
522       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
523         if (!useRVVForFixedLengthVectorVT(VT))
524           continue;
525 
526         // By default everything must be expanded.
527         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
528           setOperationAction(Op, VT, Expand);
529 
530         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
531         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
532 
533         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
534 
535         setOperationAction(ISD::LOAD, VT, Custom);
536         setOperationAction(ISD::STORE, VT, Custom);
537 
538         // Operations below are different for between masks and other vectors.
539         if (VT.getVectorElementType() == MVT::i1) {
540           setOperationAction(ISD::AND, VT, Custom);
541           setOperationAction(ISD::OR, VT, Custom);
542           setOperationAction(ISD::XOR, VT, Custom);
543           setOperationAction(ISD::SETCC, VT, Custom);
544           continue;
545         }
546 
547         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
548 
549         setOperationAction(ISD::ADD, VT, Custom);
550         setOperationAction(ISD::MUL, VT, Custom);
551         setOperationAction(ISD::SUB, VT, Custom);
552         setOperationAction(ISD::AND, VT, Custom);
553         setOperationAction(ISD::OR, VT, Custom);
554         setOperationAction(ISD::XOR, VT, Custom);
555         setOperationAction(ISD::SDIV, VT, Custom);
556         setOperationAction(ISD::SREM, VT, Custom);
557         setOperationAction(ISD::UDIV, VT, Custom);
558         setOperationAction(ISD::UREM, VT, Custom);
559         setOperationAction(ISD::SHL, VT, Custom);
560         setOperationAction(ISD::SRA, VT, Custom);
561         setOperationAction(ISD::SRL, VT, Custom);
562 
563         setOperationAction(ISD::SMIN, VT, Custom);
564         setOperationAction(ISD::SMAX, VT, Custom);
565         setOperationAction(ISD::UMIN, VT, Custom);
566         setOperationAction(ISD::UMAX, VT, Custom);
567 
568         setOperationAction(ISD::MULHS, VT, Custom);
569         setOperationAction(ISD::MULHU, VT, Custom);
570 
571         setOperationAction(ISD::VSELECT, VT, Custom);
572 
573         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
574         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
575         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
576 
577         setOperationAction(ISD::BITCAST, VT, Custom);
578       }
579 
580       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
581         if (!useRVVForFixedLengthVectorVT(VT))
582           continue;
583 
584         // By default everything must be expanded.
585         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
586           setOperationAction(Op, VT, Expand);
587 
588         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
589         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
590 
591         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
592         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
593 
594         setOperationAction(ISD::LOAD, VT, Custom);
595         setOperationAction(ISD::STORE, VT, Custom);
596         setOperationAction(ISD::FADD, VT, Custom);
597         setOperationAction(ISD::FSUB, VT, Custom);
598         setOperationAction(ISD::FMUL, VT, Custom);
599         setOperationAction(ISD::FDIV, VT, Custom);
600         setOperationAction(ISD::FNEG, VT, Custom);
601         setOperationAction(ISD::FABS, VT, Custom);
602         setOperationAction(ISD::FSQRT, VT, Custom);
603         setOperationAction(ISD::FMA, VT, Custom);
604 
605         for (auto CC : VFPCCToExpand)
606           setCondCodeAction(CC, VT, Expand);
607 
608         setOperationAction(ISD::VSELECT, VT, Custom);
609 
610         setOperationAction(ISD::BITCAST, VT, Custom);
611       }
612     }
613   }
614 
615   // Function alignments.
616   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
617   setMinFunctionAlignment(FunctionAlignment);
618   setPrefFunctionAlignment(FunctionAlignment);
619 
620   setMinimumJumpTableEntries(5);
621 
622   // Jumps are expensive, compared to logic
623   setJumpIsExpensive();
624 
625   // We can use any register for comparisons
626   setHasMultipleConditionRegisters();
627 
628   setTargetDAGCombine(ISD::SETCC);
629   if (Subtarget.hasStdExtZbp()) {
630     setTargetDAGCombine(ISD::OR);
631   }
632   if (Subtarget.hasStdExtV())
633     setTargetDAGCombine(ISD::FCOPYSIGN);
634 }
635 
636 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
637                                             LLVMContext &Context,
638                                             EVT VT) const {
639   if (!VT.isVector())
640     return getPointerTy(DL);
641   if (Subtarget.hasStdExtV() &&
642       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
643     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
644   return VT.changeVectorElementTypeToInteger();
645 }
646 
647 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
648                                              const CallInst &I,
649                                              MachineFunction &MF,
650                                              unsigned Intrinsic) const {
651   switch (Intrinsic) {
652   default:
653     return false;
654   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
655   case Intrinsic::riscv_masked_atomicrmw_add_i32:
656   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
657   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
658   case Intrinsic::riscv_masked_atomicrmw_max_i32:
659   case Intrinsic::riscv_masked_atomicrmw_min_i32:
660   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
661   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
662   case Intrinsic::riscv_masked_cmpxchg_i32:
663     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
664     Info.opc = ISD::INTRINSIC_W_CHAIN;
665     Info.memVT = MVT::getVT(PtrTy->getElementType());
666     Info.ptrVal = I.getArgOperand(0);
667     Info.offset = 0;
668     Info.align = Align(4);
669     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
670                  MachineMemOperand::MOVolatile;
671     return true;
672   }
673 }
674 
675 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
676                                                 const AddrMode &AM, Type *Ty,
677                                                 unsigned AS,
678                                                 Instruction *I) const {
679   // No global is ever allowed as a base.
680   if (AM.BaseGV)
681     return false;
682 
683   // Require a 12-bit signed offset.
684   if (!isInt<12>(AM.BaseOffs))
685     return false;
686 
687   switch (AM.Scale) {
688   case 0: // "r+i" or just "i", depending on HasBaseReg.
689     break;
690   case 1:
691     if (!AM.HasBaseReg) // allow "r+i".
692       break;
693     return false; // disallow "r+r" or "r+r+i".
694   default:
695     return false;
696   }
697 
698   return true;
699 }
700 
701 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
702   return isInt<12>(Imm);
703 }
704 
705 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
706   return isInt<12>(Imm);
707 }
708 
709 // On RV32, 64-bit integers are split into their high and low parts and held
710 // in two different registers, so the trunc is free since the low register can
711 // just be used.
712 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
713   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
714     return false;
715   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
716   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
717   return (SrcBits == 64 && DestBits == 32);
718 }
719 
720 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
721   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
722       !SrcVT.isInteger() || !DstVT.isInteger())
723     return false;
724   unsigned SrcBits = SrcVT.getSizeInBits();
725   unsigned DestBits = DstVT.getSizeInBits();
726   return (SrcBits == 64 && DestBits == 32);
727 }
728 
729 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
730   // Zexts are free if they can be combined with a load.
731   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
732     EVT MemVT = LD->getMemoryVT();
733     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
734          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
735         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
736          LD->getExtensionType() == ISD::ZEXTLOAD))
737       return true;
738   }
739 
740   return TargetLowering::isZExtFree(Val, VT2);
741 }
742 
743 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
744   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
745 }
746 
747 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
748   return Subtarget.hasStdExtZbb();
749 }
750 
751 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
752   return Subtarget.hasStdExtZbb();
753 }
754 
755 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
756                                        bool ForCodeSize) const {
757   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
758     return false;
759   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
760     return false;
761   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
762     return false;
763   if (Imm.isNegZero())
764     return false;
765   return Imm.isZero();
766 }
767 
768 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
769   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
770          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
771          (VT == MVT::f64 && Subtarget.hasStdExtD());
772 }
773 
774 // Changes the condition code and swaps operands if necessary, so the SetCC
775 // operation matches one of the comparisons supported directly in the RISC-V
776 // ISA.
777 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
778   switch (CC) {
779   default:
780     break;
781   case ISD::SETGT:
782   case ISD::SETLE:
783   case ISD::SETUGT:
784   case ISD::SETULE:
785     CC = ISD::getSetCCSwappedOperands(CC);
786     std::swap(LHS, RHS);
787     break;
788   }
789 }
790 
791 // Return the RISC-V branch opcode that matches the given DAG integer
792 // condition code. The CondCode must be one of those supported by the RISC-V
793 // ISA (see normaliseSetCC).
794 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
795   switch (CC) {
796   default:
797     llvm_unreachable("Unsupported CondCode");
798   case ISD::SETEQ:
799     return RISCV::BEQ;
800   case ISD::SETNE:
801     return RISCV::BNE;
802   case ISD::SETLT:
803     return RISCV::BLT;
804   case ISD::SETGE:
805     return RISCV::BGE;
806   case ISD::SETULT:
807     return RISCV::BLTU;
808   case ISD::SETUGE:
809     return RISCV::BGEU;
810   }
811 }
812 
813 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) {
814   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
815   if (VT.getVectorElementType() == MVT::i1)
816     KnownSize *= 8;
817 
818   switch (KnownSize) {
819   default:
820     llvm_unreachable("Invalid LMUL.");
821   case 8:
822     return RISCVVLMUL::LMUL_F8;
823   case 16:
824     return RISCVVLMUL::LMUL_F4;
825   case 32:
826     return RISCVVLMUL::LMUL_F2;
827   case 64:
828     return RISCVVLMUL::LMUL_1;
829   case 128:
830     return RISCVVLMUL::LMUL_2;
831   case 256:
832     return RISCVVLMUL::LMUL_4;
833   case 512:
834     return RISCVVLMUL::LMUL_8;
835   }
836 }
837 
838 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) {
839   switch (LMul) {
840   default:
841     llvm_unreachable("Invalid LMUL.");
842   case RISCVVLMUL::LMUL_F8:
843   case RISCVVLMUL::LMUL_F4:
844   case RISCVVLMUL::LMUL_F2:
845   case RISCVVLMUL::LMUL_1:
846     return RISCV::VRRegClassID;
847   case RISCVVLMUL::LMUL_2:
848     return RISCV::VRM2RegClassID;
849   case RISCVVLMUL::LMUL_4:
850     return RISCV::VRM4RegClassID;
851   case RISCVVLMUL::LMUL_8:
852     return RISCV::VRM8RegClassID;
853   }
854 }
855 
856 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
857   RISCVVLMUL LMUL = getLMUL(VT);
858   if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 ||
859       LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) {
860     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
861                   "Unexpected subreg numbering");
862     return RISCV::sub_vrm1_0 + Index;
863   }
864   if (LMUL == RISCVVLMUL::LMUL_2) {
865     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
866                   "Unexpected subreg numbering");
867     return RISCV::sub_vrm2_0 + Index;
868   }
869   if (LMUL == RISCVVLMUL::LMUL_4) {
870     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
871                   "Unexpected subreg numbering");
872     return RISCV::sub_vrm4_0 + Index;
873   }
874   llvm_unreachable("Invalid vector type.");
875 }
876 
877 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
878   if (VT.getVectorElementType() == MVT::i1)
879     return RISCV::VRRegClassID;
880   return getRegClassIDForLMUL(getLMUL(VT));
881 }
882 
883 // Attempt to decompose a subvector insert/extract between VecVT and
884 // SubVecVT via subregister indices. Returns the subregister index that
885 // can perform the subvector insert/extract with the given element index, as
886 // well as the index corresponding to any leftover subvectors that must be
887 // further inserted/extracted within the register class for SubVecVT.
888 std::pair<unsigned, unsigned>
889 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
890     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
891     const RISCVRegisterInfo *TRI) {
892   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
893                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
894                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
895                 "Register classes not ordered");
896   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
897   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
898   // Try to compose a subregister index that takes us from the incoming
899   // LMUL>1 register class down to the outgoing one. At each step we half
900   // the LMUL:
901   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
902   // Note that this is not guaranteed to find a subregister index, such as
903   // when we are extracting from one VR type to another.
904   unsigned SubRegIdx = RISCV::NoSubRegister;
905   for (const unsigned RCID :
906        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
907     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
908       VecVT = VecVT.getHalfNumVectorElementsVT();
909       bool IsHi =
910           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
911       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
912                                             getSubregIndexByMVT(VecVT, IsHi));
913       if (IsHi)
914         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
915     }
916   return {SubRegIdx, InsertExtractIdx};
917 }
918 
919 // Return the largest legal scalable vector type that matches VT's element type.
920 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
921                                             const RISCVSubtarget &Subtarget) {
922   assert(VT.isFixedLengthVector() &&
923          DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
924          "Expected legal fixed length vector!");
925 
926   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
927   assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
928 
929   MVT EltVT = VT.getVectorElementType();
930   switch (EltVT.SimpleTy) {
931   default:
932     llvm_unreachable("unexpected element type for RVV container");
933   case MVT::i1: {
934     // Masks are calculated assuming 8-bit elements since that's when we need
935     // the most elements.
936     unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8;
937     return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock);
938   }
939   case MVT::i8:
940   case MVT::i16:
941   case MVT::i32:
942   case MVT::i64:
943   case MVT::f16:
944   case MVT::f32:
945   case MVT::f64: {
946     unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits();
947     return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock);
948   }
949   }
950 }
951 
952 // Grow V to consume an entire RVV register.
953 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
954                                        const RISCVSubtarget &Subtarget) {
955   assert(VT.isScalableVector() &&
956          "Expected to convert into a scalable vector!");
957   assert(V.getValueType().isFixedLengthVector() &&
958          "Expected a fixed length vector operand!");
959   SDLoc DL(V);
960   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
961   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
962 }
963 
964 // Shrink V so it's just big enough to maintain a VT's worth of data.
965 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
966                                          const RISCVSubtarget &Subtarget) {
967   assert(VT.isFixedLengthVector() &&
968          "Expected to convert into a fixed length vector!");
969   assert(V.getValueType().isScalableVector() &&
970          "Expected a scalable vector operand!");
971   SDLoc DL(V);
972   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
973   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
974 }
975 
976 // Gets the two common "VL" operands: an all-ones mask and the vector length.
977 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
978 // the vector type that it is contained in.
979 static std::pair<SDValue, SDValue>
980 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
981                 const RISCVSubtarget &Subtarget) {
982   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
983   MVT XLenVT = Subtarget.getXLenVT();
984   SDValue VL = VecVT.isFixedLengthVector()
985                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
986                    : DAG.getRegister(RISCV::X0, XLenVT);
987   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
988   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
989   return {Mask, VL};
990 }
991 
992 // As above but assuming the given type is a scalable vector type.
993 static std::pair<SDValue, SDValue>
994 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
995                         const RISCVSubtarget &Subtarget) {
996   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
997   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
998 }
999 
1000 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
1001                                  const RISCVSubtarget &Subtarget) {
1002   MVT VT = Op.getSimpleValueType();
1003   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1004 
1005   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1006 
1007   SDLoc DL(Op);
1008   SDValue Mask, VL;
1009   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1010 
1011   if (VT.getVectorElementType() == MVT::i1) {
1012     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1013       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1014       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1015     }
1016 
1017     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1018       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1019       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1020     }
1021 
1022     return SDValue();
1023   }
1024 
1025   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1026     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1027                                         : RISCVISD::VMV_V_X_VL;
1028     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1029     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1030   }
1031 
1032   // Try and match an index sequence, which we can lower directly to the vid
1033   // instruction. An all-undef vector is matched by getSplatValue, above.
1034   if (VT.isInteger()) {
1035     bool IsVID = true;
1036     for (unsigned i = 0, e = Op.getNumOperands(); i < e && IsVID; i++)
1037       IsVID &= Op.getOperand(i).isUndef() ||
1038                (isa<ConstantSDNode>(Op.getOperand(i)) &&
1039                 Op.getConstantOperandVal(i) == i);
1040 
1041     if (IsVID) {
1042       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1043       return convertFromScalableVector(VT, VID, DAG, Subtarget);
1044     }
1045   }
1046 
1047   return SDValue();
1048 }
1049 
1050 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
1051                                    const RISCVSubtarget &Subtarget) {
1052   SDValue V1 = Op.getOperand(0);
1053   SDLoc DL(Op);
1054   MVT VT = Op.getSimpleValueType();
1055   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
1056 
1057   if (SVN->isSplat()) {
1058     int Lane = SVN->getSplatIndex();
1059     if (Lane >= 0) {
1060       MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1061 
1062       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
1063       assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!");
1064 
1065       SDValue Mask, VL;
1066       std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1067       MVT XLenVT = Subtarget.getXLenVT();
1068       SDValue Gather =
1069           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
1070                       DAG.getConstant(Lane, DL, XLenVT), Mask, VL);
1071       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1072     }
1073   }
1074 
1075   return SDValue();
1076 }
1077 
1078 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
1079                                             SelectionDAG &DAG) const {
1080   switch (Op.getOpcode()) {
1081   default:
1082     report_fatal_error("unimplemented operand");
1083   case ISD::GlobalAddress:
1084     return lowerGlobalAddress(Op, DAG);
1085   case ISD::BlockAddress:
1086     return lowerBlockAddress(Op, DAG);
1087   case ISD::ConstantPool:
1088     return lowerConstantPool(Op, DAG);
1089   case ISD::JumpTable:
1090     return lowerJumpTable(Op, DAG);
1091   case ISD::GlobalTLSAddress:
1092     return lowerGlobalTLSAddress(Op, DAG);
1093   case ISD::SELECT:
1094     return lowerSELECT(Op, DAG);
1095   case ISD::VASTART:
1096     return lowerVASTART(Op, DAG);
1097   case ISD::FRAMEADDR:
1098     return lowerFRAMEADDR(Op, DAG);
1099   case ISD::RETURNADDR:
1100     return lowerRETURNADDR(Op, DAG);
1101   case ISD::SHL_PARTS:
1102     return lowerShiftLeftParts(Op, DAG);
1103   case ISD::SRA_PARTS:
1104     return lowerShiftRightParts(Op, DAG, true);
1105   case ISD::SRL_PARTS:
1106     return lowerShiftRightParts(Op, DAG, false);
1107   case ISD::BITCAST: {
1108     SDValue Op0 = Op.getOperand(0);
1109     // We can handle fixed length vector bitcasts with a simple replacement
1110     // in isel.
1111     if (Op.getValueType().isFixedLengthVector()) {
1112       if (Op0.getValueType().isFixedLengthVector())
1113         return Op;
1114       return SDValue();
1115     }
1116     assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
1117             Subtarget.hasStdExtZfh()) &&
1118            "Unexpected custom legalisation");
1119     SDLoc DL(Op);
1120     if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
1121       if (Op0.getValueType() != MVT::i16)
1122         return SDValue();
1123       SDValue NewOp0 =
1124           DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
1125       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
1126       return FPConv;
1127     } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
1128                Subtarget.hasStdExtF()) {
1129       if (Op0.getValueType() != MVT::i32)
1130         return SDValue();
1131       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1132       SDValue FPConv =
1133           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
1134       return FPConv;
1135     }
1136     return SDValue();
1137   }
1138   case ISD::INTRINSIC_WO_CHAIN:
1139     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
1140   case ISD::INTRINSIC_W_CHAIN:
1141     return LowerINTRINSIC_W_CHAIN(Op, DAG);
1142   case ISD::BSWAP:
1143   case ISD::BITREVERSE: {
1144     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
1145     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1146     MVT VT = Op.getSimpleValueType();
1147     SDLoc DL(Op);
1148     // Start with the maximum immediate value which is the bitwidth - 1.
1149     unsigned Imm = VT.getSizeInBits() - 1;
1150     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
1151     if (Op.getOpcode() == ISD::BSWAP)
1152       Imm &= ~0x7U;
1153     return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
1154                        DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
1155   }
1156   case ISD::FSHL:
1157   case ISD::FSHR: {
1158     MVT VT = Op.getSimpleValueType();
1159     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
1160     SDLoc DL(Op);
1161     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
1162     // use log(XLen) bits. Mask the shift amount accordingly.
1163     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
1164     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
1165                                 DAG.getConstant(ShAmtWidth, DL, VT));
1166     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
1167     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
1168   }
1169   case ISD::TRUNCATE: {
1170     SDLoc DL(Op);
1171     EVT VT = Op.getValueType();
1172     // Only custom-lower vector truncates
1173     if (!VT.isVector())
1174       return Op;
1175 
1176     // Truncates to mask types are handled differently
1177     if (VT.getVectorElementType() == MVT::i1)
1178       return lowerVectorMaskTrunc(Op, DAG);
1179 
1180     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
1181     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which
1182     // truncate by one power of two at a time.
1183     EVT DstEltVT = VT.getVectorElementType();
1184 
1185     SDValue Src = Op.getOperand(0);
1186     EVT SrcVT = Src.getValueType();
1187     EVT SrcEltVT = SrcVT.getVectorElementType();
1188 
1189     assert(DstEltVT.bitsLT(SrcEltVT) &&
1190            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
1191            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
1192            "Unexpected vector truncate lowering");
1193 
1194     SDValue Result = Src;
1195     LLVMContext &Context = *DAG.getContext();
1196     const ElementCount Count = SrcVT.getVectorElementCount();
1197     do {
1198       SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2);
1199       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
1200       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result);
1201     } while (SrcEltVT != DstEltVT);
1202 
1203     return Result;
1204   }
1205   case ISD::ANY_EXTEND:
1206   case ISD::ZERO_EXTEND:
1207     return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
1208   case ISD::SIGN_EXTEND:
1209     return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
1210   case ISD::SPLAT_VECTOR:
1211     return lowerSPLATVECTOR(Op, DAG);
1212   case ISD::INSERT_VECTOR_ELT:
1213     return lowerINSERT_VECTOR_ELT(Op, DAG);
1214   case ISD::EXTRACT_VECTOR_ELT:
1215     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1216   case ISD::VSCALE: {
1217     MVT VT = Op.getSimpleValueType();
1218     SDLoc DL(Op);
1219     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
1220     // We define our scalable vector types for lmul=1 to use a 64 bit known
1221     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
1222     // vscale as VLENB / 8.
1223     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
1224                                  DAG.getConstant(3, DL, VT));
1225     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
1226   }
1227   case ISD::FP_EXTEND: {
1228     // RVV can only do fp_extend to types double the size as the source. We
1229     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
1230     // via f32.
1231     MVT VT = Op.getSimpleValueType();
1232     MVT SrcVT = Op.getOperand(0).getSimpleValueType();
1233     // We only need to close the gap between vXf16->vXf64.
1234     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
1235         SrcVT.getVectorElementType() != MVT::f16)
1236       return Op;
1237     SDLoc DL(Op);
1238     MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1239     SDValue IntermediateRound =
1240         DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT);
1241     return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
1242   }
1243   case ISD::FP_ROUND: {
1244     // RVV can only do fp_round to types half the size as the source. We
1245     // custom-lower f64->f16 rounds via RVV's round-to-odd float
1246     // conversion instruction.
1247     MVT VT = Op.getSimpleValueType();
1248     MVT SrcVT = Op.getOperand(0).getSimpleValueType();
1249     // We only need to close the gap between vXf64<->vXf16.
1250     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
1251         SrcVT.getVectorElementType() != MVT::f64)
1252       return Op;
1253     SDLoc DL(Op);
1254     MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1255     SDValue IntermediateRound =
1256         DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0));
1257     return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
1258   }
1259   case ISD::FP_TO_SINT:
1260   case ISD::FP_TO_UINT:
1261   case ISD::SINT_TO_FP:
1262   case ISD::UINT_TO_FP: {
1263     // RVV can only do fp<->int conversions to types half/double the size as
1264     // the source. We custom-lower any conversions that do two hops into
1265     // sequences.
1266     MVT VT = Op.getSimpleValueType();
1267     if (!VT.isVector())
1268       return Op;
1269     SDLoc DL(Op);
1270     SDValue Src = Op.getOperand(0);
1271     MVT EltVT = VT.getVectorElementType();
1272     MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType();
1273     unsigned EltSize = EltVT.getSizeInBits();
1274     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
1275     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
1276            "Unexpected vector element types");
1277     bool IsInt2FP = SrcEltVT.isInteger();
1278     // Widening conversions
1279     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
1280       if (IsInt2FP) {
1281         // Do a regular integer sign/zero extension then convert to float.
1282         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
1283                                       VT.getVectorElementCount());
1284         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
1285                                  ? ISD::ZERO_EXTEND
1286                                  : ISD::SIGN_EXTEND;
1287         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
1288         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
1289       }
1290       // FP2Int
1291       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
1292       // Do one doubling fp_extend then complete the operation by converting
1293       // to int.
1294       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1295       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
1296       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
1297     }
1298 
1299     // Narrowing conversions
1300     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
1301       if (IsInt2FP) {
1302         // One narrowing int_to_fp, then an fp_round.
1303         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
1304         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1305         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
1306         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
1307       }
1308       // FP2Int
1309       // One narrowing fp_to_int, then truncate the integer. If the float isn't
1310       // representable by the integer, the result is poison.
1311       MVT IVecVT =
1312           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
1313                            VT.getVectorElementCount());
1314       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
1315       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
1316     }
1317 
1318     return Op;
1319   }
1320   case ISD::VECREDUCE_ADD:
1321   case ISD::VECREDUCE_UMAX:
1322   case ISD::VECREDUCE_SMAX:
1323   case ISD::VECREDUCE_UMIN:
1324   case ISD::VECREDUCE_SMIN:
1325   case ISD::VECREDUCE_AND:
1326   case ISD::VECREDUCE_OR:
1327   case ISD::VECREDUCE_XOR:
1328     return lowerVECREDUCE(Op, DAG);
1329   case ISD::VECREDUCE_FADD:
1330   case ISD::VECREDUCE_SEQ_FADD:
1331     return lowerFPVECREDUCE(Op, DAG);
1332   case ISD::INSERT_SUBVECTOR:
1333     return lowerINSERT_SUBVECTOR(Op, DAG);
1334   case ISD::EXTRACT_SUBVECTOR:
1335     return lowerEXTRACT_SUBVECTOR(Op, DAG);
1336   case ISD::BUILD_VECTOR:
1337     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
1338   case ISD::VECTOR_SHUFFLE:
1339     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
1340   case ISD::LOAD:
1341     return lowerFixedLengthVectorLoadToRVV(Op, DAG);
1342   case ISD::STORE:
1343     return lowerFixedLengthVectorStoreToRVV(Op, DAG);
1344   case ISD::SETCC:
1345     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
1346   case ISD::ADD:
1347     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
1348   case ISD::SUB:
1349     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
1350   case ISD::MUL:
1351     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
1352   case ISD::MULHS:
1353     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
1354   case ISD::MULHU:
1355     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
1356   case ISD::AND:
1357     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
1358                                               RISCVISD::AND_VL);
1359   case ISD::OR:
1360     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
1361                                               RISCVISD::OR_VL);
1362   case ISD::XOR:
1363     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
1364                                               RISCVISD::XOR_VL);
1365   case ISD::SDIV:
1366     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
1367   case ISD::SREM:
1368     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
1369   case ISD::UDIV:
1370     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
1371   case ISD::UREM:
1372     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
1373   case ISD::SHL:
1374     return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
1375   case ISD::SRA:
1376     return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
1377   case ISD::SRL:
1378     return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
1379   case ISD::FADD:
1380     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
1381   case ISD::FSUB:
1382     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
1383   case ISD::FMUL:
1384     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
1385   case ISD::FDIV:
1386     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
1387   case ISD::FNEG:
1388     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
1389   case ISD::FABS:
1390     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
1391   case ISD::FSQRT:
1392     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
1393   case ISD::FMA:
1394     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
1395   case ISD::SMIN:
1396     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
1397   case ISD::SMAX:
1398     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
1399   case ISD::UMIN:
1400     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
1401   case ISD::UMAX:
1402     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
1403   case ISD::VSELECT:
1404     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
1405   }
1406 }
1407 
1408 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1409                              SelectionDAG &DAG, unsigned Flags) {
1410   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1411 }
1412 
1413 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1414                              SelectionDAG &DAG, unsigned Flags) {
1415   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1416                                    Flags);
1417 }
1418 
1419 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1420                              SelectionDAG &DAG, unsigned Flags) {
1421   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1422                                    N->getOffset(), Flags);
1423 }
1424 
1425 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1426                              SelectionDAG &DAG, unsigned Flags) {
1427   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1428 }
1429 
1430 template <class NodeTy>
1431 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1432                                      bool IsLocal) const {
1433   SDLoc DL(N);
1434   EVT Ty = getPointerTy(DAG.getDataLayout());
1435 
1436   if (isPositionIndependent()) {
1437     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1438     if (IsLocal)
1439       // Use PC-relative addressing to access the symbol. This generates the
1440       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
1441       // %pcrel_lo(auipc)).
1442       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1443 
1444     // Use PC-relative addressing to access the GOT for this symbol, then load
1445     // the address from the GOT. This generates the pattern (PseudoLA sym),
1446     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
1447     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
1448   }
1449 
1450   switch (getTargetMachine().getCodeModel()) {
1451   default:
1452     report_fatal_error("Unsupported code model for lowering");
1453   case CodeModel::Small: {
1454     // Generate a sequence for accessing addresses within the first 2 GiB of
1455     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
1456     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
1457     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
1458     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1459     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
1460   }
1461   case CodeModel::Medium: {
1462     // Generate a sequence for accessing addresses within any 2GiB range within
1463     // the address space. This generates the pattern (PseudoLLA sym), which
1464     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
1465     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1466     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1467   }
1468   }
1469 }
1470 
1471 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
1472                                                 SelectionDAG &DAG) const {
1473   SDLoc DL(Op);
1474   EVT Ty = Op.getValueType();
1475   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1476   int64_t Offset = N->getOffset();
1477   MVT XLenVT = Subtarget.getXLenVT();
1478 
1479   const GlobalValue *GV = N->getGlobal();
1480   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1481   SDValue Addr = getAddr(N, DAG, IsLocal);
1482 
1483   // In order to maximise the opportunity for common subexpression elimination,
1484   // emit a separate ADD node for the global address offset instead of folding
1485   // it in the global address node. Later peephole optimisations may choose to
1486   // fold it back in when profitable.
1487   if (Offset != 0)
1488     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1489                        DAG.getConstant(Offset, DL, XLenVT));
1490   return Addr;
1491 }
1492 
1493 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
1494                                                SelectionDAG &DAG) const {
1495   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
1496 
1497   return getAddr(N, DAG);
1498 }
1499 
1500 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
1501                                                SelectionDAG &DAG) const {
1502   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
1503 
1504   return getAddr(N, DAG);
1505 }
1506 
1507 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
1508                                             SelectionDAG &DAG) const {
1509   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
1510 
1511   return getAddr(N, DAG);
1512 }
1513 
1514 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1515                                               SelectionDAG &DAG,
1516                                               bool UseGOT) const {
1517   SDLoc DL(N);
1518   EVT Ty = getPointerTy(DAG.getDataLayout());
1519   const GlobalValue *GV = N->getGlobal();
1520   MVT XLenVT = Subtarget.getXLenVT();
1521 
1522   if (UseGOT) {
1523     // Use PC-relative addressing to access the GOT for this TLS symbol, then
1524     // load the address from the GOT and add the thread pointer. This generates
1525     // the pattern (PseudoLA_TLS_IE sym), which expands to
1526     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
1527     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1528     SDValue Load =
1529         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
1530 
1531     // Add the thread pointer.
1532     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1533     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
1534   }
1535 
1536   // Generate a sequence for accessing the address relative to the thread
1537   // pointer, with the appropriate adjustment for the thread pointer offset.
1538   // This generates the pattern
1539   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
1540   SDValue AddrHi =
1541       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
1542   SDValue AddrAdd =
1543       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
1544   SDValue AddrLo =
1545       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
1546 
1547   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1548   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1549   SDValue MNAdd = SDValue(
1550       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
1551       0);
1552   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
1553 }
1554 
1555 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1556                                                SelectionDAG &DAG) const {
1557   SDLoc DL(N);
1558   EVT Ty = getPointerTy(DAG.getDataLayout());
1559   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1560   const GlobalValue *GV = N->getGlobal();
1561 
1562   // Use a PC-relative addressing mode to access the global dynamic GOT address.
1563   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
1564   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
1565   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1566   SDValue Load =
1567       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
1568 
1569   // Prepare argument list to generate call.
1570   ArgListTy Args;
1571   ArgListEntry Entry;
1572   Entry.Node = Load;
1573   Entry.Ty = CallTy;
1574   Args.push_back(Entry);
1575 
1576   // Setup call to __tls_get_addr.
1577   TargetLowering::CallLoweringInfo CLI(DAG);
1578   CLI.setDebugLoc(DL)
1579       .setChain(DAG.getEntryNode())
1580       .setLibCallee(CallingConv::C, CallTy,
1581                     DAG.getExternalSymbol("__tls_get_addr", Ty),
1582                     std::move(Args));
1583 
1584   return LowerCallTo(CLI).first;
1585 }
1586 
1587 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1588                                                    SelectionDAG &DAG) const {
1589   SDLoc DL(Op);
1590   EVT Ty = Op.getValueType();
1591   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1592   int64_t Offset = N->getOffset();
1593   MVT XLenVT = Subtarget.getXLenVT();
1594 
1595   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
1596 
1597   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1598       CallingConv::GHC)
1599     report_fatal_error("In GHC calling convention TLS is not supported");
1600 
1601   SDValue Addr;
1602   switch (Model) {
1603   case TLSModel::LocalExec:
1604     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
1605     break;
1606   case TLSModel::InitialExec:
1607     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
1608     break;
1609   case TLSModel::LocalDynamic:
1610   case TLSModel::GeneralDynamic:
1611     Addr = getDynamicTLSAddr(N, DAG);
1612     break;
1613   }
1614 
1615   // In order to maximise the opportunity for common subexpression elimination,
1616   // emit a separate ADD node for the global address offset instead of folding
1617   // it in the global address node. Later peephole optimisations may choose to
1618   // fold it back in when profitable.
1619   if (Offset != 0)
1620     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1621                        DAG.getConstant(Offset, DL, XLenVT));
1622   return Addr;
1623 }
1624 
1625 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
1626   SDValue CondV = Op.getOperand(0);
1627   SDValue TrueV = Op.getOperand(1);
1628   SDValue FalseV = Op.getOperand(2);
1629   SDLoc DL(Op);
1630   MVT XLenVT = Subtarget.getXLenVT();
1631 
1632   // If the result type is XLenVT and CondV is the output of a SETCC node
1633   // which also operated on XLenVT inputs, then merge the SETCC node into the
1634   // lowered RISCVISD::SELECT_CC to take advantage of the integer
1635   // compare+branch instructions. i.e.:
1636   // (select (setcc lhs, rhs, cc), truev, falsev)
1637   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
1638   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
1639       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
1640     SDValue LHS = CondV.getOperand(0);
1641     SDValue RHS = CondV.getOperand(1);
1642     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
1643     ISD::CondCode CCVal = CC->get();
1644 
1645     normaliseSetCC(LHS, RHS, CCVal);
1646 
1647     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
1648     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1649     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1650   }
1651 
1652   // Otherwise:
1653   // (select condv, truev, falsev)
1654   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
1655   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
1656   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
1657 
1658   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1659 
1660   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1661 }
1662 
1663 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1664   MachineFunction &MF = DAG.getMachineFunction();
1665   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
1666 
1667   SDLoc DL(Op);
1668   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1669                                  getPointerTy(MF.getDataLayout()));
1670 
1671   // vastart just stores the address of the VarArgsFrameIndex slot into the
1672   // memory location argument.
1673   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1674   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1675                       MachinePointerInfo(SV));
1676 }
1677 
1678 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
1679                                             SelectionDAG &DAG) const {
1680   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1681   MachineFunction &MF = DAG.getMachineFunction();
1682   MachineFrameInfo &MFI = MF.getFrameInfo();
1683   MFI.setFrameAddressIsTaken(true);
1684   Register FrameReg = RI.getFrameRegister(MF);
1685   int XLenInBytes = Subtarget.getXLen() / 8;
1686 
1687   EVT VT = Op.getValueType();
1688   SDLoc DL(Op);
1689   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1690   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1691   while (Depth--) {
1692     int Offset = -(XLenInBytes * 2);
1693     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1694                               DAG.getIntPtrConstant(Offset, DL));
1695     FrameAddr =
1696         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1697   }
1698   return FrameAddr;
1699 }
1700 
1701 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
1702                                              SelectionDAG &DAG) const {
1703   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1704   MachineFunction &MF = DAG.getMachineFunction();
1705   MachineFrameInfo &MFI = MF.getFrameInfo();
1706   MFI.setReturnAddressIsTaken(true);
1707   MVT XLenVT = Subtarget.getXLenVT();
1708   int XLenInBytes = Subtarget.getXLen() / 8;
1709 
1710   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1711     return SDValue();
1712 
1713   EVT VT = Op.getValueType();
1714   SDLoc DL(Op);
1715   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1716   if (Depth) {
1717     int Off = -XLenInBytes;
1718     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
1719     SDValue Offset = DAG.getConstant(Off, DL, VT);
1720     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1721                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1722                        MachinePointerInfo());
1723   }
1724 
1725   // Return the value of the return address register, marking it an implicit
1726   // live-in.
1727   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
1728   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
1729 }
1730 
1731 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
1732                                                  SelectionDAG &DAG) const {
1733   SDLoc DL(Op);
1734   SDValue Lo = Op.getOperand(0);
1735   SDValue Hi = Op.getOperand(1);
1736   SDValue Shamt = Op.getOperand(2);
1737   EVT VT = Lo.getValueType();
1738 
1739   // if Shamt-XLEN < 0: // Shamt < XLEN
1740   //   Lo = Lo << Shamt
1741   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
1742   // else:
1743   //   Lo = 0
1744   //   Hi = Lo << (Shamt-XLEN)
1745 
1746   SDValue Zero = DAG.getConstant(0, DL, VT);
1747   SDValue One = DAG.getConstant(1, DL, VT);
1748   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1749   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1750   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1751   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1752 
1753   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1754   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1755   SDValue ShiftRightLo =
1756       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
1757   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1758   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1759   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
1760 
1761   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1762 
1763   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1764   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1765 
1766   SDValue Parts[2] = {Lo, Hi};
1767   return DAG.getMergeValues(Parts, DL);
1768 }
1769 
1770 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
1771                                                   bool IsSRA) const {
1772   SDLoc DL(Op);
1773   SDValue Lo = Op.getOperand(0);
1774   SDValue Hi = Op.getOperand(1);
1775   SDValue Shamt = Op.getOperand(2);
1776   EVT VT = Lo.getValueType();
1777 
1778   // SRA expansion:
1779   //   if Shamt-XLEN < 0: // Shamt < XLEN
1780   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1781   //     Hi = Hi >>s Shamt
1782   //   else:
1783   //     Lo = Hi >>s (Shamt-XLEN);
1784   //     Hi = Hi >>s (XLEN-1)
1785   //
1786   // SRL expansion:
1787   //   if Shamt-XLEN < 0: // Shamt < XLEN
1788   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1789   //     Hi = Hi >>u Shamt
1790   //   else:
1791   //     Lo = Hi >>u (Shamt-XLEN);
1792   //     Hi = 0;
1793 
1794   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1795 
1796   SDValue Zero = DAG.getConstant(0, DL, VT);
1797   SDValue One = DAG.getConstant(1, DL, VT);
1798   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1799   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1800   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1801   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1802 
1803   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1804   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1805   SDValue ShiftLeftHi =
1806       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
1807   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1808   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1809   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
1810   SDValue HiFalse =
1811       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
1812 
1813   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1814 
1815   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1816   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1817 
1818   SDValue Parts[2] = {Lo, Hi};
1819   return DAG.getMergeValues(Parts, DL);
1820 }
1821 
1822 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is
1823 // illegal (currently only vXi64 RV32).
1824 // FIXME: We could also catch non-constant sign-extended i32 values and lower
1825 // them to SPLAT_VECTOR_I64
1826 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op,
1827                                               SelectionDAG &DAG) const {
1828   SDLoc DL(Op);
1829   EVT VecVT = Op.getValueType();
1830   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
1831          "Unexpected SPLAT_VECTOR lowering");
1832   SDValue SplatVal = Op.getOperand(0);
1833 
1834   // If we can prove that the value is a sign-extended 32-bit value, lower this
1835   // as a custom node in order to try and match RVV vector/scalar instructions.
1836   if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) {
1837     if (isInt<32>(CVal->getSExtValue()))
1838       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1839                          DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32));
1840   }
1841 
1842   if (SplatVal.getOpcode() == ISD::SIGN_EXTEND &&
1843       SplatVal.getOperand(0).getValueType() == MVT::i32) {
1844     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1845                        SplatVal.getOperand(0));
1846   }
1847 
1848   // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
1849   // to accidentally sign-extend the 32-bit halves to the e64 SEW:
1850   // vmv.v.x vX, hi
1851   // vsll.vx vX, vX, /*32*/
1852   // vmv.v.x vY, lo
1853   // vsll.vx vY, vY, /*32*/
1854   // vsrl.vx vY, vY, /*32*/
1855   // vor.vv vX, vX, vY
1856   SDValue One = DAG.getConstant(1, DL, MVT::i32);
1857   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
1858   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
1859   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero);
1860   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One);
1861 
1862   Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
1863   Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
1864   Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
1865 
1866   if (isNullConstant(Hi))
1867     return Lo;
1868 
1869   Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
1870   Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
1871 
1872   return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
1873 }
1874 
1875 // Custom-lower extensions from mask vectors by using a vselect either with 1
1876 // for zero/any-extension or -1 for sign-extension:
1877 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
1878 // Note that any-extension is lowered identically to zero-extension.
1879 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
1880                                                 int64_t ExtTrueVal) const {
1881   SDLoc DL(Op);
1882   MVT VecVT = Op.getSimpleValueType();
1883   SDValue Src = Op.getOperand(0);
1884   // Only custom-lower extensions from mask types
1885   if (!Src.getValueType().isVector() ||
1886       Src.getValueType().getVectorElementType() != MVT::i1)
1887     return Op;
1888 
1889   MVT XLenVT = Subtarget.getXLenVT();
1890   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
1891   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
1892 
1893   if (VecVT.isScalableVector()) {
1894     // Be careful not to introduce illegal scalar types at this stage, and be
1895     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
1896     // illegal and must be expanded. Since we know that the constants are
1897     // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
1898     bool IsRV32E64 =
1899         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
1900 
1901     if (!IsRV32E64) {
1902       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
1903       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
1904     } else {
1905       SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
1906       SplatTrueVal =
1907           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
1908     }
1909 
1910     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
1911   }
1912 
1913   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
1914   MVT I1ContainerVT =
1915       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1916 
1917   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
1918 
1919   SDValue Mask, VL;
1920   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
1921 
1922   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
1923   SplatTrueVal =
1924       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
1925   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
1926                                SplatTrueVal, SplatZero, VL);
1927 
1928   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
1929 }
1930 
1931 // Custom-lower truncations from vectors to mask vectors by using a mask and a
1932 // setcc operation:
1933 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
1934 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
1935                                                   SelectionDAG &DAG) const {
1936   SDLoc DL(Op);
1937   EVT MaskVT = Op.getValueType();
1938   // Only expect to custom-lower truncations to mask types
1939   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
1940          "Unexpected type for vector mask lowering");
1941   SDValue Src = Op.getOperand(0);
1942   EVT VecVT = Src.getValueType();
1943 
1944   // Be careful not to introduce illegal scalar types at this stage, and be
1945   // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
1946   // illegal and must be expanded. Since we know that the constants are
1947   // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
1948   bool IsRV32E64 =
1949       !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
1950   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
1951   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1952 
1953   if (!IsRV32E64) {
1954     SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne);
1955     SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
1956   } else {
1957     SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne);
1958     SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
1959   }
1960 
1961   SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
1962 
1963   return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
1964 }
1965 
1966 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1967                                                     SelectionDAG &DAG) const {
1968   SDLoc DL(Op);
1969   MVT VecVT = Op.getSimpleValueType();
1970   SDValue Vec = Op.getOperand(0);
1971   SDValue Val = Op.getOperand(1);
1972   SDValue Idx = Op.getOperand(2);
1973 
1974   // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is
1975   // first slid down into position, the value is inserted into the first
1976   // position, and the vector is slid back up. We do this to simplify patterns.
1977   //   (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx),
1978   if (Subtarget.is64Bit() || Val.getValueType() != MVT::i64) {
1979     if (isNullConstant(Idx))
1980       return Op;
1981     SDValue Mask, VL;
1982     std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
1983     SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT,
1984                                     DAG.getUNDEF(VecVT), Vec, Idx, Mask, VL);
1985     SDValue InsertElt0 =
1986         DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val,
1987                     DAG.getConstant(0, DL, Subtarget.getXLenVT()));
1988 
1989     return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, Vec, InsertElt0, Idx,
1990                        Mask, VL);
1991   }
1992 
1993   if (!VecVT.isScalableVector())
1994     return SDValue();
1995 
1996   // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
1997   // is illegal (currently only vXi64 RV32).
1998   // Since there is no easy way of getting a single element into a vector when
1999   // XLEN<SEW, we lower the operation to the following sequence:
2000   //   splat      vVal, rVal
2001   //   vid.v      vVid
2002   //   vmseq.vx   mMask, vVid, rIdx
2003   //   vmerge.vvm vDest, vSrc, vVal, mMask
2004   // This essentially merges the original vector with the inserted element by
2005   // using a mask whose only set bit is that corresponding to the insert
2006   // index.
2007   SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val);
2008   SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx);
2009 
2010   SDValue Mask, VL;
2011   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
2012   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VecVT, Mask, VL);
2013   auto SetCCVT =
2014       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT);
2015   SDValue SelectCond = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ);
2016 
2017   return DAG.getNode(ISD::VSELECT, DL, VecVT, SelectCond, SplattedVal, Vec);
2018 }
2019 
2020 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
2021 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
2022 // types this is done using VMV_X_S to allow us to glean information about the
2023 // sign bits of the result.
2024 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2025                                                      SelectionDAG &DAG) const {
2026   SDLoc DL(Op);
2027   SDValue Idx = Op.getOperand(1);
2028   SDValue Vec = Op.getOperand(0);
2029   EVT EltVT = Op.getValueType();
2030   MVT VecVT = Vec.getSimpleValueType();
2031   MVT XLenVT = Subtarget.getXLenVT();
2032 
2033   // If the index is 0, the vector is already in the right position.
2034   if (!isNullConstant(Idx)) {
2035     SDValue Mask, VL;
2036     std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
2037     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT),
2038                       Vec, Idx, Mask, VL);
2039   }
2040 
2041   if (!EltVT.isInteger()) {
2042     // Floating-point extracts are handled in TableGen.
2043     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
2044                        DAG.getConstant(0, DL, XLenVT));
2045   }
2046 
2047   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
2048   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
2049 }
2050 
2051 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2052                                                      SelectionDAG &DAG) const {
2053   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2054   SDLoc DL(Op);
2055 
2056   if (Subtarget.hasStdExtV()) {
2057     // Some RVV intrinsics may claim that they want an integer operand to be
2058     // extended.
2059     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2060             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
2061       if (II->ExtendedOperand) {
2062         assert(II->ExtendedOperand < Op.getNumOperands());
2063         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
2064         SDValue &ScalarOp = Operands[II->ExtendedOperand];
2065         EVT OpVT = ScalarOp.getValueType();
2066         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
2067             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
2068           // If the operand is a constant, sign extend to increase our chances
2069           // of being able to use a .vi instruction. ANY_EXTEND would become a
2070           // a zero extend and the simm5 check in isel would fail.
2071           // FIXME: Should we ignore the upper bits in isel instead?
2072           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
2073                                                           : ISD::ANY_EXTEND;
2074           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
2075           return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
2076                              Operands);
2077         }
2078       }
2079     }
2080   }
2081 
2082   switch (IntNo) {
2083   default:
2084     return SDValue();    // Don't custom lower most intrinsics.
2085   case Intrinsic::thread_pointer: {
2086     EVT PtrVT = getPointerTy(DAG.getDataLayout());
2087     return DAG.getRegister(RISCV::X4, PtrVT);
2088   }
2089   case Intrinsic::riscv_vmv_x_s:
2090     assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!");
2091     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
2092                        Op.getOperand(1));
2093   case Intrinsic::riscv_vmv_v_x: {
2094     SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(),
2095                                  Op.getOperand(1));
2096     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(),
2097                        Scalar, Op.getOperand(2));
2098   }
2099   case Intrinsic::riscv_vfmv_v_f:
2100     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
2101                        Op.getOperand(1), Op.getOperand(2));
2102   }
2103 }
2104 
2105 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
2106                                                     SelectionDAG &DAG) const {
2107   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2108   SDLoc DL(Op);
2109 
2110   if (Subtarget.hasStdExtV()) {
2111     // Some RVV intrinsics may claim that they want an integer operand to be
2112     // extended.
2113     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2114             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
2115       if (II->ExtendedOperand) {
2116         // The operands start from the second argument in INTRINSIC_W_CHAIN.
2117         unsigned ExtendOp = II->ExtendedOperand + 1;
2118         assert(ExtendOp < Op.getNumOperands());
2119         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
2120         SDValue &ScalarOp = Operands[ExtendOp];
2121         EVT OpVT = ScalarOp.getValueType();
2122         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
2123             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
2124           // If the operand is a constant, sign extend to increase our chances
2125           // of being able to use a .vi instruction. ANY_EXTEND would become a
2126           // a zero extend and the simm5 check in isel would fail.
2127           // FIXME: Should we ignore the upper bits in isel instead?
2128           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
2129                                                           : ISD::ANY_EXTEND;
2130           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
2131           return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(),
2132                              Operands);
2133         }
2134       }
2135     }
2136   }
2137 
2138   return SDValue(); // Don't custom lower most intrinsics.
2139 }
2140 
2141 static std::pair<unsigned, uint64_t>
2142 getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) {
2143   switch (ISDOpcode) {
2144   default:
2145     llvm_unreachable("Unhandled reduction");
2146   case ISD::VECREDUCE_ADD:
2147     return {RISCVISD::VECREDUCE_ADD, 0};
2148   case ISD::VECREDUCE_UMAX:
2149     return {RISCVISD::VECREDUCE_UMAX, 0};
2150   case ISD::VECREDUCE_SMAX:
2151     return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)};
2152   case ISD::VECREDUCE_UMIN:
2153     return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)};
2154   case ISD::VECREDUCE_SMIN:
2155     return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)};
2156   case ISD::VECREDUCE_AND:
2157     return {RISCVISD::VECREDUCE_AND, -1};
2158   case ISD::VECREDUCE_OR:
2159     return {RISCVISD::VECREDUCE_OR, 0};
2160   case ISD::VECREDUCE_XOR:
2161     return {RISCVISD::VECREDUCE_XOR, 0};
2162   }
2163 }
2164 
2165 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV
2166 // reduction opcode. Note that this returns a vector type, which must be
2167 // further processed to access the scalar result in element 0.
2168 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
2169                                             SelectionDAG &DAG) const {
2170   SDLoc DL(Op);
2171   assert(Op.getValueType().isSimple() &&
2172          Op.getOperand(0).getValueType().isSimple() &&
2173          "Unexpected vector-reduce lowering");
2174   MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType();
2175   unsigned RVVOpcode;
2176   uint64_t IdentityVal;
2177   std::tie(RVVOpcode, IdentityVal) =
2178       getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits());
2179   // We have to perform a bit of a dance to get from our vector type to the
2180   // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector
2181   // element type to find the type which fills a single register. Be careful to
2182   // use the operand's vector element type rather than the reduction's value
2183   // type, as that has likely been extended to XLEN.
2184   unsigned NumElts = 64 / VecEltVT.getSizeInBits();
2185   MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts);
2186   SDValue IdentitySplat =
2187       DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT));
2188   SDValue Reduction =
2189       DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat);
2190   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
2191                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
2192   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
2193 }
2194 
2195 // Given a reduction op, this function returns the matching reduction opcode,
2196 // the vector SDValue and the scalar SDValue required to lower this to a
2197 // RISCVISD node.
2198 static std::tuple<unsigned, SDValue, SDValue>
2199 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
2200   SDLoc DL(Op);
2201   switch (Op.getOpcode()) {
2202   default:
2203     llvm_unreachable("Unhandled reduction");
2204   case ISD::VECREDUCE_FADD:
2205     return std::make_tuple(RISCVISD::VECREDUCE_FADD, Op.getOperand(0),
2206                            DAG.getConstantFP(0.0, DL, EltVT));
2207   case ISD::VECREDUCE_SEQ_FADD:
2208     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD, Op.getOperand(1),
2209                            Op.getOperand(0));
2210   }
2211 }
2212 
2213 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
2214                                               SelectionDAG &DAG) const {
2215   SDLoc DL(Op);
2216   MVT VecEltVT = Op.getSimpleValueType();
2217   // We have to perform a bit of a dance to get from our vector type to the
2218   // correct LMUL=1 vector type. See above for an explanation.
2219   unsigned NumElts = 64 / VecEltVT.getSizeInBits();
2220   MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts);
2221 
2222   unsigned RVVOpcode;
2223   SDValue VectorVal, ScalarVal;
2224   std::tie(RVVOpcode, VectorVal, ScalarVal) =
2225       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
2226 
2227   SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
2228   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat);
2229   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
2230                      DAG.getConstant(0, DL, Subtarget.getXLenVT()));
2231 }
2232 
2233 static MVT getLMUL1VT(MVT VT) {
2234   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
2235          "Unexpected vector MVT");
2236   return MVT::getScalableVectorVT(
2237       VT.getVectorElementType(),
2238       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
2239 }
2240 
2241 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
2242                                                    SelectionDAG &DAG) const {
2243   SDValue Vec = Op.getOperand(0);
2244   SDValue SubVec = Op.getOperand(1);
2245   MVT VecVT = Vec.getSimpleValueType();
2246   MVT SubVecVT = SubVec.getSimpleValueType();
2247 
2248   // TODO: Only handle scalable->scalable inserts for now, and revisit this for
2249   // fixed-length vectors later.
2250   if (!SubVecVT.isScalableVector() || !VecVT.isScalableVector())
2251     return Op;
2252 
2253   SDLoc DL(Op);
2254   unsigned OrigIdx = Op.getConstantOperandVal(2);
2255   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
2256 
2257   unsigned SubRegIdx, RemIdx;
2258   std::tie(SubRegIdx, RemIdx) =
2259       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2260           VecVT, SubVecVT, OrigIdx, TRI);
2261 
2262   RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
2263   bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 ||
2264                          SubVecLMUL == RISCVVLMUL::LMUL_F4 ||
2265                          SubVecLMUL == RISCVVLMUL::LMUL_F8;
2266 
2267   // If the Idx has been completely eliminated and this subvector's size is a
2268   // vector register or a multiple thereof, or the surrounding elements are
2269   // undef, then this is a subvector insert which naturally aligns to a vector
2270   // register. These can easily be handled using subregister manipulation.
2271   // If the subvector is smaller than a vector register, then the insertion
2272   // must preserve the undisturbed elements of the register. We do this by
2273   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
2274   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
2275   // subvector within the vector register, and an INSERT_SUBVECTOR of that
2276   // LMUL=1 type back into the larger vector (resolving to another subregister
2277   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
2278   // to avoid allocating a large register group to hold our subvector.
2279   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
2280     return Op;
2281 
2282   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
2283   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
2284   // (in our case undisturbed). This means we can set up a subvector insertion
2285   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
2286   // size of the subvector.
2287   MVT XLenVT = Subtarget.getXLenVT();
2288   MVT InterSubVT = getLMUL1VT(VecVT);
2289 
2290   // Extract a subvector equal to the nearest full vector register type. This
2291   // should resolve to a EXTRACT_SUBREG instruction.
2292   unsigned AlignedIdx = OrigIdx - RemIdx;
2293   SDValue AlignedExtract =
2294       DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
2295                   DAG.getConstant(AlignedIdx, DL, XLenVT));
2296 
2297   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
2298   // For scalable vectors this must be further multiplied by vscale.
2299   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
2300 
2301   SDValue Mask, VL;
2302   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
2303 
2304   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
2305   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
2306   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
2307   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
2308 
2309   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
2310                        DAG.getUNDEF(InterSubVT), SubVec,
2311                        DAG.getConstant(0, DL, XLenVT));
2312 
2313   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
2314                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
2315 
2316   // Insert this subvector into the correct vector register. This should
2317   // resolve to an INSERT_SUBREG instruction.
2318   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
2319                      DAG.getConstant(AlignedIdx, DL, XLenVT));
2320 }
2321 
2322 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
2323                                                     SelectionDAG &DAG) const {
2324   SDValue Vec = Op.getOperand(0);
2325   MVT SubVecVT = Op.getSimpleValueType();
2326   MVT VecVT = Vec.getSimpleValueType();
2327 
2328   // TODO: Only handle scalable->scalable extracts for now, and revisit this
2329   // for fixed-length vectors later.
2330   if (!SubVecVT.isScalableVector() || !VecVT.isScalableVector())
2331     return Op;
2332 
2333   SDLoc DL(Op);
2334   unsigned OrigIdx = Op.getConstantOperandVal(1);
2335   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
2336 
2337   unsigned SubRegIdx, RemIdx;
2338   std::tie(SubRegIdx, RemIdx) =
2339       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2340           VecVT, SubVecVT, OrigIdx, TRI);
2341 
2342   // If the Idx has been completely eliminated then this is a subvector extract
2343   // which naturally aligns to a vector register. These can easily be handled
2344   // using subregister manipulation.
2345   if (RemIdx == 0)
2346     return Op;
2347 
2348   // Else we must shift our vector register directly to extract the subvector.
2349   // Do this using VSLIDEDOWN.
2350   MVT XLenVT = Subtarget.getXLenVT();
2351 
2352   // Extract a subvector equal to the nearest full vector register type. This
2353   // should resolve to a EXTRACT_SUBREG instruction.
2354   unsigned AlignedIdx = OrigIdx - RemIdx;
2355   MVT InterSubVT = getLMUL1VT(VecVT);
2356   SDValue AlignedExtract =
2357       DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
2358                   DAG.getConstant(AlignedIdx, DL, XLenVT));
2359 
2360   // Slide this vector register down by the desired number of elements in order
2361   // to place the desired subvector starting at element 0.
2362   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
2363   // For scalable vectors this must be further multiplied by vscale.
2364   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
2365 
2366   SDValue Mask, VL;
2367   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
2368   SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
2369                                   DAG.getUNDEF(InterSubVT), AlignedExtract,
2370                                   SlidedownAmt, Mask, VL);
2371 
2372   // Now the vector is in the right position, extract our final subvector. This
2373   // should resolve to a COPY.
2374   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
2375                      DAG.getConstant(0, DL, XLenVT));
2376 }
2377 
2378 SDValue
2379 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
2380                                                      SelectionDAG &DAG) const {
2381   auto *Load = cast<LoadSDNode>(Op);
2382 
2383   SDLoc DL(Op);
2384   MVT VT = Op.getSimpleValueType();
2385   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2386 
2387   SDValue VL =
2388       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
2389 
2390   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2391   SDValue NewLoad = DAG.getMemIntrinsicNode(
2392       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
2393       Load->getMemoryVT(), Load->getMemOperand());
2394 
2395   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2396   return DAG.getMergeValues({Result, Load->getChain()}, DL);
2397 }
2398 
2399 SDValue
2400 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
2401                                                       SelectionDAG &DAG) const {
2402   auto *Store = cast<StoreSDNode>(Op);
2403 
2404   SDLoc DL(Op);
2405   MVT VT = Store->getValue().getSimpleValueType();
2406 
2407   // FIXME: We probably need to zero any extra bits in a byte for mask stores.
2408   // This is tricky to do.
2409 
2410   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2411 
2412   SDValue VL =
2413       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
2414 
2415   SDValue NewValue =
2416       convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget);
2417   return DAG.getMemIntrinsicNode(
2418       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
2419       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
2420       Store->getMemoryVT(), Store->getMemOperand());
2421 }
2422 
2423 SDValue
2424 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
2425                                                       SelectionDAG &DAG) const {
2426   MVT InVT = Op.getOperand(0).getSimpleValueType();
2427   MVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT, Subtarget);
2428 
2429   MVT VT = Op.getSimpleValueType();
2430 
2431   SDValue Op1 =
2432       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
2433   SDValue Op2 =
2434       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
2435 
2436   SDLoc DL(Op);
2437   SDValue VL =
2438       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
2439 
2440   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2441 
2442   bool Invert = false;
2443   Optional<unsigned> LogicOpc;
2444   if (ContainerVT.isFloatingPoint()) {
2445     bool Swap = false;
2446     switch (CC) {
2447     default:
2448       break;
2449     case ISD::SETULE:
2450     case ISD::SETULT:
2451       Swap = true;
2452       LLVM_FALLTHROUGH;
2453     case ISD::SETUGE:
2454     case ISD::SETUGT:
2455       CC = getSetCCInverse(CC, ContainerVT);
2456       Invert = true;
2457       break;
2458     case ISD::SETOGE:
2459     case ISD::SETOGT:
2460     case ISD::SETGE:
2461     case ISD::SETGT:
2462       Swap = true;
2463       break;
2464     case ISD::SETUEQ:
2465       // Use !((OLT Op1, Op2) || (OLT Op2, Op1))
2466       Invert = true;
2467       LogicOpc = RISCVISD::VMOR_VL;
2468       CC = ISD::SETOLT;
2469       break;
2470     case ISD::SETONE:
2471       // Use ((OLT Op1, Op2) || (OLT Op2, Op1))
2472       LogicOpc = RISCVISD::VMOR_VL;
2473       CC = ISD::SETOLT;
2474       break;
2475     case ISD::SETO:
2476       // Use (OEQ Op1, Op1) && (OEQ Op2, Op2)
2477       LogicOpc = RISCVISD::VMAND_VL;
2478       CC = ISD::SETOEQ;
2479       break;
2480     case ISD::SETUO:
2481       // Use (UNE Op1, Op1) || (UNE Op2, Op2)
2482       LogicOpc = RISCVISD::VMOR_VL;
2483       CC = ISD::SETUNE;
2484       break;
2485     }
2486 
2487     if (Swap) {
2488       CC = getSetCCSwappedOperands(CC);
2489       std::swap(Op1, Op2);
2490     }
2491   }
2492 
2493   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2494   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2495 
2496   // There are 3 cases we need to emit.
2497   // 1. For (OEQ Op1, Op1) && (OEQ Op2, Op2) or (UNE Op1, Op1) || (UNE Op2, Op2)
2498   //    we need to compare each operand with itself.
2499   // 2. For (OLT Op1, Op2) || (OLT Op2, Op1) we need to compare Op1 and Op2 in
2500   //    both orders.
2501   // 3. For any other case we just need one compare with Op1 and Op2.
2502   SDValue Cmp;
2503   if (LogicOpc && (CC == ISD::SETOEQ || CC == ISD::SETUNE)) {
2504     Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1,
2505                       DAG.getCondCode(CC), Mask, VL);
2506     SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2,
2507                                DAG.getCondCode(CC), Mask, VL);
2508     Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL);
2509   } else {
2510     Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
2511                       DAG.getCondCode(CC), Mask, VL);
2512     if (LogicOpc) {
2513       SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1,
2514                                  DAG.getCondCode(CC), Mask, VL);
2515       Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL);
2516     }
2517   }
2518 
2519   if (Invert) {
2520     SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2521     Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL);
2522   }
2523 
2524   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
2525 }
2526 
2527 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
2528     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
2529   MVT VT = Op.getSimpleValueType();
2530 
2531   if (VT.getVectorElementType() == MVT::i1)
2532     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
2533 
2534   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
2535 }
2536 
2537 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
2538     SDValue Op, SelectionDAG &DAG) const {
2539   MVT VT = Op.getSimpleValueType();
2540   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2541 
2542   MVT I1ContainerVT =
2543       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2544 
2545   SDValue CC =
2546       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
2547   SDValue Op1 =
2548       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
2549   SDValue Op2 =
2550       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
2551 
2552   SDLoc DL(Op);
2553   SDValue Mask, VL;
2554   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2555 
2556   SDValue Select =
2557       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
2558 
2559   return convertFromScalableVector(VT, Select, DAG, Subtarget);
2560 }
2561 
2562 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
2563                                                unsigned NewOpc,
2564                                                bool HasMask) const {
2565   MVT VT = Op.getSimpleValueType();
2566   assert(useRVVForFixedLengthVectorVT(VT) &&
2567          "Only expected to lower fixed length vector operation!");
2568   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2569 
2570   // Create list of operands by converting existing ones to scalable types.
2571   SmallVector<SDValue, 6> Ops;
2572   for (const SDValue &V : Op->op_values()) {
2573     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
2574 
2575     // Pass through non-vector operands.
2576     if (!V.getValueType().isVector()) {
2577       Ops.push_back(V);
2578       continue;
2579     }
2580 
2581     // "cast" fixed length vector to a scalable vector.
2582     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
2583            "Only fixed length vectors are supported!");
2584     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
2585   }
2586 
2587   SDLoc DL(Op);
2588   SDValue Mask, VL;
2589   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2590   if (HasMask)
2591     Ops.push_back(Mask);
2592   Ops.push_back(VL);
2593 
2594   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
2595   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
2596 }
2597 
2598 // Returns the opcode of the target-specific SDNode that implements the 32-bit
2599 // form of the given Opcode.
2600 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
2601   switch (Opcode) {
2602   default:
2603     llvm_unreachable("Unexpected opcode");
2604   case ISD::SHL:
2605     return RISCVISD::SLLW;
2606   case ISD::SRA:
2607     return RISCVISD::SRAW;
2608   case ISD::SRL:
2609     return RISCVISD::SRLW;
2610   case ISD::SDIV:
2611     return RISCVISD::DIVW;
2612   case ISD::UDIV:
2613     return RISCVISD::DIVUW;
2614   case ISD::UREM:
2615     return RISCVISD::REMUW;
2616   case ISD::ROTL:
2617     return RISCVISD::ROLW;
2618   case ISD::ROTR:
2619     return RISCVISD::RORW;
2620   case RISCVISD::GREVI:
2621     return RISCVISD::GREVIW;
2622   case RISCVISD::GORCI:
2623     return RISCVISD::GORCIW;
2624   }
2625 }
2626 
2627 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
2628 // Because i32 isn't a legal type for RV64, these operations would otherwise
2629 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
2630 // later one because the fact the operation was originally of type i32 is
2631 // lost.
2632 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
2633                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
2634   SDLoc DL(N);
2635   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
2636   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2637   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2638   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2639   // ReplaceNodeResults requires we maintain the same type for the return value.
2640   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2641 }
2642 
2643 // Converts the given 32-bit operation to a i64 operation with signed extension
2644 // semantic to reduce the signed extension instructions.
2645 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
2646   SDLoc DL(N);
2647   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2648   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2649   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2650   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2651                                DAG.getValueType(MVT::i32));
2652   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2653 }
2654 
2655 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
2656                                              SmallVectorImpl<SDValue> &Results,
2657                                              SelectionDAG &DAG) const {
2658   SDLoc DL(N);
2659   switch (N->getOpcode()) {
2660   default:
2661     llvm_unreachable("Don't know how to custom type legalize this operation!");
2662   case ISD::STRICT_FP_TO_SINT:
2663   case ISD::STRICT_FP_TO_UINT:
2664   case ISD::FP_TO_SINT:
2665   case ISD::FP_TO_UINT: {
2666     bool IsStrict = N->isStrictFPOpcode();
2667     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2668            "Unexpected custom legalisation");
2669     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
2670     // If the FP type needs to be softened, emit a library call using the 'si'
2671     // version. If we left it to default legalization we'd end up with 'di'. If
2672     // the FP type doesn't need to be softened just let generic type
2673     // legalization promote the result type.
2674     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
2675         TargetLowering::TypeSoftenFloat)
2676       return;
2677     RTLIB::Libcall LC;
2678     if (N->getOpcode() == ISD::FP_TO_SINT ||
2679         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
2680       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
2681     else
2682       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
2683     MakeLibCallOptions CallOptions;
2684     EVT OpVT = Op0.getValueType();
2685     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
2686     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
2687     SDValue Result;
2688     std::tie(Result, Chain) =
2689         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
2690     Results.push_back(Result);
2691     if (IsStrict)
2692       Results.push_back(Chain);
2693     break;
2694   }
2695   case ISD::READCYCLECOUNTER: {
2696     assert(!Subtarget.is64Bit() &&
2697            "READCYCLECOUNTER only has custom type legalization on riscv32");
2698 
2699     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
2700     SDValue RCW =
2701         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
2702 
2703     Results.push_back(
2704         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
2705     Results.push_back(RCW.getValue(2));
2706     break;
2707   }
2708   case ISD::ADD:
2709   case ISD::SUB:
2710   case ISD::MUL:
2711     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2712            "Unexpected custom legalisation");
2713     if (N->getOperand(1).getOpcode() == ISD::Constant)
2714       return;
2715     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2716     break;
2717   case ISD::SHL:
2718   case ISD::SRA:
2719   case ISD::SRL:
2720     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2721            "Unexpected custom legalisation");
2722     if (N->getOperand(1).getOpcode() == ISD::Constant)
2723       return;
2724     Results.push_back(customLegalizeToWOp(N, DAG));
2725     break;
2726   case ISD::ROTL:
2727   case ISD::ROTR:
2728     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2729            "Unexpected custom legalisation");
2730     Results.push_back(customLegalizeToWOp(N, DAG));
2731     break;
2732   case ISD::SDIV:
2733   case ISD::UDIV:
2734   case ISD::UREM: {
2735     MVT VT = N->getSimpleValueType(0);
2736     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
2737            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
2738            "Unexpected custom legalisation");
2739     if (N->getOperand(0).getOpcode() == ISD::Constant ||
2740         N->getOperand(1).getOpcode() == ISD::Constant)
2741       return;
2742 
2743     // If the input is i32, use ANY_EXTEND since the W instructions don't read
2744     // the upper 32 bits. For other types we need to sign or zero extend
2745     // based on the opcode.
2746     unsigned ExtOpc = ISD::ANY_EXTEND;
2747     if (VT != MVT::i32)
2748       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
2749                                            : ISD::ZERO_EXTEND;
2750 
2751     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
2752     break;
2753   }
2754   case ISD::BITCAST: {
2755     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2756              Subtarget.hasStdExtF()) ||
2757             (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
2758            "Unexpected custom legalisation");
2759     SDValue Op0 = N->getOperand(0);
2760     if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
2761       if (Op0.getValueType() != MVT::f16)
2762         return;
2763       SDValue FPConv =
2764           DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
2765       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
2766     } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2767                Subtarget.hasStdExtF()) {
2768       if (Op0.getValueType() != MVT::f32)
2769         return;
2770       SDValue FPConv =
2771           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
2772       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
2773     }
2774     break;
2775   }
2776   case RISCVISD::GREVI:
2777   case RISCVISD::GORCI: {
2778     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2779            "Unexpected custom legalisation");
2780     // This is similar to customLegalizeToWOp, except that we pass the second
2781     // operand (a TargetConstant) straight through: it is already of type
2782     // XLenVT.
2783     SDLoc DL(N);
2784     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
2785     SDValue NewOp0 =
2786         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2787     SDValue NewRes =
2788         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
2789     // ReplaceNodeResults requires we maintain the same type for the return
2790     // value.
2791     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
2792     break;
2793   }
2794   case RISCVISD::SHFLI: {
2795     // There is no SHFLIW instruction, but we can just promote the operation.
2796     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2797            "Unexpected custom legalisation");
2798     SDLoc DL(N);
2799     SDValue NewOp0 =
2800         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2801     SDValue NewRes =
2802         DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1));
2803     // ReplaceNodeResults requires we maintain the same type for the return
2804     // value.
2805     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
2806     break;
2807   }
2808   case ISD::BSWAP:
2809   case ISD::BITREVERSE: {
2810     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2811            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2812     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2813                                  N->getOperand(0));
2814     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
2815     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
2816                                  DAG.getTargetConstant(Imm, DL,
2817                                                        Subtarget.getXLenVT()));
2818     // ReplaceNodeResults requires we maintain the same type for the return
2819     // value.
2820     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
2821     break;
2822   }
2823   case ISD::FSHL:
2824   case ISD::FSHR: {
2825     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2826            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
2827     SDValue NewOp0 =
2828         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2829     SDValue NewOp1 =
2830         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2831     SDValue NewOp2 =
2832         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
2833     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
2834     // Mask the shift amount to 5 bits.
2835     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
2836                          DAG.getConstant(0x1f, DL, MVT::i64));
2837     unsigned Opc =
2838         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
2839     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
2840     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
2841     break;
2842   }
2843   case ISD::EXTRACT_VECTOR_ELT: {
2844     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
2845     // type is illegal (currently only vXi64 RV32).
2846     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
2847     // transferred to the destination register. We issue two of these from the
2848     // upper- and lower- halves of the SEW-bit vector element, slid down to the
2849     // first element.
2850     SDLoc DL(N);
2851     SDValue Vec = N->getOperand(0);
2852     SDValue Idx = N->getOperand(1);
2853     EVT VecVT = Vec.getValueType();
2854     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
2855            VecVT.getVectorElementType() == MVT::i64 &&
2856            "Unexpected EXTRACT_VECTOR_ELT legalization");
2857 
2858     if (!VecVT.isScalableVector())
2859       return;
2860 
2861     SDValue Slidedown = Vec;
2862     MVT XLenVT = Subtarget.getXLenVT();
2863     // Unless the index is known to be 0, we must slide the vector down to get
2864     // the desired element into index 0.
2865     if (!isNullConstant(Idx)) {
2866       SDValue Mask, VL;
2867       std::tie(Mask, VL) =
2868           getDefaultScalableVLOps(VecVT.getSimpleVT(), DL, DAG, Subtarget);
2869       Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT,
2870                               DAG.getUNDEF(VecVT), Vec, Idx, Mask, VL);
2871     }
2872 
2873     // Extract the lower XLEN bits of the correct vector element.
2874     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx);
2875 
2876     // To extract the upper XLEN bits of the vector element, shift the first
2877     // element right by 32 bits and re-extract the lower XLEN bits.
2878     SDValue ThirtyTwoV =
2879         DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
2880                     DAG.getConstant(32, DL, Subtarget.getXLenVT()));
2881     SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV);
2882 
2883     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx);
2884 
2885     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
2886     break;
2887   }
2888   case ISD::INTRINSIC_WO_CHAIN: {
2889     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2890     switch (IntNo) {
2891     default:
2892       llvm_unreachable(
2893           "Don't know how to custom type legalize this intrinsic!");
2894     case Intrinsic::riscv_vmv_x_s: {
2895       EVT VT = N->getValueType(0);
2896       assert((VT == MVT::i8 || VT == MVT::i16 ||
2897               (Subtarget.is64Bit() && VT == MVT::i32)) &&
2898              "Unexpected custom legalisation!");
2899       SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
2900                                     Subtarget.getXLenVT(), N->getOperand(1));
2901       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
2902       break;
2903     }
2904     }
2905     break;
2906   }
2907   case ISD::VECREDUCE_ADD:
2908   case ISD::VECREDUCE_AND:
2909   case ISD::VECREDUCE_OR:
2910   case ISD::VECREDUCE_XOR:
2911   case ISD::VECREDUCE_SMAX:
2912   case ISD::VECREDUCE_UMAX:
2913   case ISD::VECREDUCE_SMIN:
2914   case ISD::VECREDUCE_UMIN:
2915     // The custom-lowering for these nodes returns a vector whose first element
2916     // is the result of the reduction. Extract its first element and let the
2917     // legalization for EXTRACT_VECTOR_ELT do the rest of the job.
2918     Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG));
2919     break;
2920   }
2921 }
2922 
2923 // A structure to hold one of the bit-manipulation patterns below. Together, a
2924 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
2925 //   (or (and (shl x, 1), 0xAAAAAAAA),
2926 //       (and (srl x, 1), 0x55555555))
2927 struct RISCVBitmanipPat {
2928   SDValue Op;
2929   unsigned ShAmt;
2930   bool IsSHL;
2931 
2932   bool formsPairWith(const RISCVBitmanipPat &Other) const {
2933     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
2934   }
2935 };
2936 
2937 // Matches patterns of the form
2938 //   (and (shl x, C2), (C1 << C2))
2939 //   (and (srl x, C2), C1)
2940 //   (shl (and x, C1), C2)
2941 //   (srl (and x, (C1 << C2)), C2)
2942 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
2943 // The expected masks for each shift amount are specified in BitmanipMasks where
2944 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
2945 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
2946 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
2947 // XLen is 64.
2948 static Optional<RISCVBitmanipPat>
2949 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
2950   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
2951          "Unexpected number of masks");
2952   Optional<uint64_t> Mask;
2953   // Optionally consume a mask around the shift operation.
2954   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
2955     Mask = Op.getConstantOperandVal(1);
2956     Op = Op.getOperand(0);
2957   }
2958   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
2959     return None;
2960   bool IsSHL = Op.getOpcode() == ISD::SHL;
2961 
2962   if (!isa<ConstantSDNode>(Op.getOperand(1)))
2963     return None;
2964   uint64_t ShAmt = Op.getConstantOperandVal(1);
2965 
2966   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
2967   if (ShAmt >= Width && !isPowerOf2_64(ShAmt))
2968     return None;
2969   // If we don't have enough masks for 64 bit, then we must be trying to
2970   // match SHFL so we're only allowed to shift 1/4 of the width.
2971   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
2972     return None;
2973 
2974   SDValue Src = Op.getOperand(0);
2975 
2976   // The expected mask is shifted left when the AND is found around SHL
2977   // patterns.
2978   //   ((x >> 1) & 0x55555555)
2979   //   ((x << 1) & 0xAAAAAAAA)
2980   bool SHLExpMask = IsSHL;
2981 
2982   if (!Mask) {
2983     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
2984     // the mask is all ones: consume that now.
2985     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
2986       Mask = Src.getConstantOperandVal(1);
2987       Src = Src.getOperand(0);
2988       // The expected mask is now in fact shifted left for SRL, so reverse the
2989       // decision.
2990       //   ((x & 0xAAAAAAAA) >> 1)
2991       //   ((x & 0x55555555) << 1)
2992       SHLExpMask = !SHLExpMask;
2993     } else {
2994       // Use a default shifted mask of all-ones if there's no AND, truncated
2995       // down to the expected width. This simplifies the logic later on.
2996       Mask = maskTrailingOnes<uint64_t>(Width);
2997       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
2998     }
2999   }
3000 
3001   unsigned MaskIdx = Log2_32(ShAmt);
3002   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
3003 
3004   if (SHLExpMask)
3005     ExpMask <<= ShAmt;
3006 
3007   if (Mask != ExpMask)
3008     return None;
3009 
3010   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
3011 }
3012 
3013 // Matches any of the following bit-manipulation patterns:
3014 //   (and (shl x, 1), (0x55555555 << 1))
3015 //   (and (srl x, 1), 0x55555555)
3016 //   (shl (and x, 0x55555555), 1)
3017 //   (srl (and x, (0x55555555 << 1)), 1)
3018 // where the shift amount and mask may vary thus:
3019 //   [1]  = 0x55555555 / 0xAAAAAAAA
3020 //   [2]  = 0x33333333 / 0xCCCCCCCC
3021 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
3022 //   [8]  = 0x00FF00FF / 0xFF00FF00
3023 //   [16] = 0x0000FFFF / 0xFFFFFFFF
3024 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
3025 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
3026   // These are the unshifted masks which we use to match bit-manipulation
3027   // patterns. They may be shifted left in certain circumstances.
3028   static const uint64_t BitmanipMasks[] = {
3029       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
3030       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
3031 
3032   return matchRISCVBitmanipPat(Op, BitmanipMasks);
3033 }
3034 
3035 // Match the following pattern as a GREVI(W) operation
3036 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
3037 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
3038                                const RISCVSubtarget &Subtarget) {
3039   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
3040   EVT VT = Op.getValueType();
3041 
3042   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
3043     auto LHS = matchGREVIPat(Op.getOperand(0));
3044     auto RHS = matchGREVIPat(Op.getOperand(1));
3045     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
3046       SDLoc DL(Op);
3047       return DAG.getNode(
3048           RISCVISD::GREVI, DL, VT, LHS->Op,
3049           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
3050     }
3051   }
3052   return SDValue();
3053 }
3054 
3055 // Matches any the following pattern as a GORCI(W) operation
3056 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
3057 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
3058 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
3059 // Note that with the variant of 3.,
3060 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
3061 // the inner pattern will first be matched as GREVI and then the outer
3062 // pattern will be matched to GORC via the first rule above.
3063 // 4.  (or (rotl/rotr x, bitwidth/2), x)
3064 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
3065                                const RISCVSubtarget &Subtarget) {
3066   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
3067   EVT VT = Op.getValueType();
3068 
3069   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
3070     SDLoc DL(Op);
3071     SDValue Op0 = Op.getOperand(0);
3072     SDValue Op1 = Op.getOperand(1);
3073 
3074     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
3075       if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
3076           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
3077         return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
3078       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
3079       if ((Reverse.getOpcode() == ISD::ROTL ||
3080            Reverse.getOpcode() == ISD::ROTR) &&
3081           Reverse.getOperand(0) == X &&
3082           isa<ConstantSDNode>(Reverse.getOperand(1))) {
3083         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
3084         if (RotAmt == (VT.getSizeInBits() / 2))
3085           return DAG.getNode(
3086               RISCVISD::GORCI, DL, VT, X,
3087               DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
3088       }
3089       return SDValue();
3090     };
3091 
3092     // Check for either commutable permutation of (or (GREVI x, shamt), x)
3093     if (SDValue V = MatchOROfReverse(Op0, Op1))
3094       return V;
3095     if (SDValue V = MatchOROfReverse(Op1, Op0))
3096       return V;
3097 
3098     // OR is commutable so canonicalize its OR operand to the left
3099     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
3100       std::swap(Op0, Op1);
3101     if (Op0.getOpcode() != ISD::OR)
3102       return SDValue();
3103     SDValue OrOp0 = Op0.getOperand(0);
3104     SDValue OrOp1 = Op0.getOperand(1);
3105     auto LHS = matchGREVIPat(OrOp0);
3106     // OR is commutable so swap the operands and try again: x might have been
3107     // on the left
3108     if (!LHS) {
3109       std::swap(OrOp0, OrOp1);
3110       LHS = matchGREVIPat(OrOp0);
3111     }
3112     auto RHS = matchGREVIPat(Op1);
3113     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
3114       return DAG.getNode(
3115           RISCVISD::GORCI, DL, VT, LHS->Op,
3116           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
3117     }
3118   }
3119   return SDValue();
3120 }
3121 
3122 // Matches any of the following bit-manipulation patterns:
3123 //   (and (shl x, 1), (0x22222222 << 1))
3124 //   (and (srl x, 1), 0x22222222)
3125 //   (shl (and x, 0x22222222), 1)
3126 //   (srl (and x, (0x22222222 << 1)), 1)
3127 // where the shift amount and mask may vary thus:
3128 //   [1]  = 0x22222222 / 0x44444444
3129 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
3130 //   [4]  = 0x00F000F0 / 0x0F000F00
3131 //   [8]  = 0x0000FF00 / 0x00FF0000
3132 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
3133 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
3134   // These are the unshifted masks which we use to match bit-manipulation
3135   // patterns. They may be shifted left in certain circumstances.
3136   static const uint64_t BitmanipMasks[] = {
3137       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
3138       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
3139 
3140   return matchRISCVBitmanipPat(Op, BitmanipMasks);
3141 }
3142 
3143 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
3144 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
3145                                const RISCVSubtarget &Subtarget) {
3146   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
3147   EVT VT = Op.getValueType();
3148 
3149   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
3150     return SDValue();
3151 
3152   SDValue Op0 = Op.getOperand(0);
3153   SDValue Op1 = Op.getOperand(1);
3154 
3155   // Or is commutable so canonicalize the second OR to the LHS.
3156   if (Op0.getOpcode() != ISD::OR)
3157     std::swap(Op0, Op1);
3158   if (Op0.getOpcode() != ISD::OR)
3159     return SDValue();
3160 
3161   // We found an inner OR, so our operands are the operands of the inner OR
3162   // and the other operand of the outer OR.
3163   SDValue A = Op0.getOperand(0);
3164   SDValue B = Op0.getOperand(1);
3165   SDValue C = Op1;
3166 
3167   auto Match1 = matchSHFLPat(A);
3168   auto Match2 = matchSHFLPat(B);
3169 
3170   // If neither matched, we failed.
3171   if (!Match1 && !Match2)
3172     return SDValue();
3173 
3174   // We had at least one match. if one failed, try the remaining C operand.
3175   if (!Match1) {
3176     std::swap(A, C);
3177     Match1 = matchSHFLPat(A);
3178     if (!Match1)
3179       return SDValue();
3180   } else if (!Match2) {
3181     std::swap(B, C);
3182     Match2 = matchSHFLPat(B);
3183     if (!Match2)
3184       return SDValue();
3185   }
3186   assert(Match1 && Match2);
3187 
3188   // Make sure our matches pair up.
3189   if (!Match1->formsPairWith(*Match2))
3190     return SDValue();
3191 
3192   // All the remains is to make sure C is an AND with the same input, that masks
3193   // out the bits that are being shuffled.
3194   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
3195       C.getOperand(0) != Match1->Op)
3196     return SDValue();
3197 
3198   uint64_t Mask = C.getConstantOperandVal(1);
3199 
3200   static const uint64_t BitmanipMasks[] = {
3201       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
3202       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
3203   };
3204 
3205   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
3206   unsigned MaskIdx = Log2_32(Match1->ShAmt);
3207   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
3208 
3209   if (Mask != ExpMask)
3210     return SDValue();
3211 
3212   SDLoc DL(Op);
3213   return DAG.getNode(
3214       RISCVISD::SHFLI, DL, VT, Match1->Op,
3215       DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT()));
3216 }
3217 
3218 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
3219 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
3220 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
3221 // not undo itself, but they are redundant.
3222 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
3223   unsigned ShAmt1 = N->getConstantOperandVal(1);
3224   SDValue Src = N->getOperand(0);
3225 
3226   if (Src.getOpcode() != N->getOpcode())
3227     return SDValue();
3228 
3229   unsigned ShAmt2 = Src.getConstantOperandVal(1);
3230   Src = Src.getOperand(0);
3231 
3232   unsigned CombinedShAmt;
3233   if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
3234     CombinedShAmt = ShAmt1 | ShAmt2;
3235   else
3236     CombinedShAmt = ShAmt1 ^ ShAmt2;
3237 
3238   if (CombinedShAmt == 0)
3239     return Src;
3240 
3241   SDLoc DL(N);
3242   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
3243                      DAG.getTargetConstant(CombinedShAmt, DL,
3244                                            N->getOperand(1).getValueType()));
3245 }
3246 
3247 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
3248                                                DAGCombinerInfo &DCI) const {
3249   SelectionDAG &DAG = DCI.DAG;
3250 
3251   switch (N->getOpcode()) {
3252   default:
3253     break;
3254   case RISCVISD::SplitF64: {
3255     SDValue Op0 = N->getOperand(0);
3256     // If the input to SplitF64 is just BuildPairF64 then the operation is
3257     // redundant. Instead, use BuildPairF64's operands directly.
3258     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
3259       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
3260 
3261     SDLoc DL(N);
3262 
3263     // It's cheaper to materialise two 32-bit integers than to load a double
3264     // from the constant pool and transfer it to integer registers through the
3265     // stack.
3266     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
3267       APInt V = C->getValueAPF().bitcastToAPInt();
3268       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
3269       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
3270       return DCI.CombineTo(N, Lo, Hi);
3271     }
3272 
3273     // This is a target-specific version of a DAGCombine performed in
3274     // DAGCombiner::visitBITCAST. It performs the equivalent of:
3275     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
3276     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
3277     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
3278         !Op0.getNode()->hasOneUse())
3279       break;
3280     SDValue NewSplitF64 =
3281         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
3282                     Op0.getOperand(0));
3283     SDValue Lo = NewSplitF64.getValue(0);
3284     SDValue Hi = NewSplitF64.getValue(1);
3285     APInt SignBit = APInt::getSignMask(32);
3286     if (Op0.getOpcode() == ISD::FNEG) {
3287       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
3288                                   DAG.getConstant(SignBit, DL, MVT::i32));
3289       return DCI.CombineTo(N, Lo, NewHi);
3290     }
3291     assert(Op0.getOpcode() == ISD::FABS);
3292     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
3293                                 DAG.getConstant(~SignBit, DL, MVT::i32));
3294     return DCI.CombineTo(N, Lo, NewHi);
3295   }
3296   case RISCVISD::SLLW:
3297   case RISCVISD::SRAW:
3298   case RISCVISD::SRLW:
3299   case RISCVISD::ROLW:
3300   case RISCVISD::RORW: {
3301     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
3302     SDValue LHS = N->getOperand(0);
3303     SDValue RHS = N->getOperand(1);
3304     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
3305     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
3306     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
3307         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
3308       if (N->getOpcode() != ISD::DELETED_NODE)
3309         DCI.AddToWorklist(N);
3310       return SDValue(N, 0);
3311     }
3312     break;
3313   }
3314   case RISCVISD::FSL:
3315   case RISCVISD::FSR: {
3316     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
3317     SDValue ShAmt = N->getOperand(2);
3318     unsigned BitWidth = ShAmt.getValueSizeInBits();
3319     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
3320     APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
3321     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
3322       if (N->getOpcode() != ISD::DELETED_NODE)
3323         DCI.AddToWorklist(N);
3324       return SDValue(N, 0);
3325     }
3326     break;
3327   }
3328   case RISCVISD::FSLW:
3329   case RISCVISD::FSRW: {
3330     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
3331     // read.
3332     SDValue Op0 = N->getOperand(0);
3333     SDValue Op1 = N->getOperand(1);
3334     SDValue ShAmt = N->getOperand(2);
3335     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
3336     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
3337     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
3338         SimplifyDemandedBits(Op1, OpMask, DCI) ||
3339         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
3340       if (N->getOpcode() != ISD::DELETED_NODE)
3341         DCI.AddToWorklist(N);
3342       return SDValue(N, 0);
3343     }
3344     break;
3345   }
3346   case RISCVISD::GREVIW:
3347   case RISCVISD::GORCIW: {
3348     // Only the lower 32 bits of the first operand are read
3349     SDValue Op0 = N->getOperand(0);
3350     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
3351     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
3352       if (N->getOpcode() != ISD::DELETED_NODE)
3353         DCI.AddToWorklist(N);
3354       return SDValue(N, 0);
3355     }
3356 
3357     return combineGREVI_GORCI(N, DCI.DAG);
3358   }
3359   case RISCVISD::FMV_X_ANYEXTW_RV64: {
3360     SDLoc DL(N);
3361     SDValue Op0 = N->getOperand(0);
3362     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
3363     // conversion is unnecessary and can be replaced with an ANY_EXTEND
3364     // of the FMV_W_X_RV64 operand.
3365     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
3366       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
3367              "Unexpected value type!");
3368       return Op0.getOperand(0);
3369     }
3370 
3371     // This is a target-specific version of a DAGCombine performed in
3372     // DAGCombiner::visitBITCAST. It performs the equivalent of:
3373     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
3374     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
3375     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
3376         !Op0.getNode()->hasOneUse())
3377       break;
3378     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
3379                                  Op0.getOperand(0));
3380     APInt SignBit = APInt::getSignMask(32).sext(64);
3381     if (Op0.getOpcode() == ISD::FNEG)
3382       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
3383                          DAG.getConstant(SignBit, DL, MVT::i64));
3384 
3385     assert(Op0.getOpcode() == ISD::FABS);
3386     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
3387                        DAG.getConstant(~SignBit, DL, MVT::i64));
3388   }
3389   case RISCVISD::GREVI:
3390   case RISCVISD::GORCI:
3391     return combineGREVI_GORCI(N, DCI.DAG);
3392   case ISD::OR:
3393     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
3394       return GREV;
3395     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
3396       return GORC;
3397     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget))
3398       return SHFL;
3399     break;
3400   case RISCVISD::SELECT_CC: {
3401     // Transform
3402     // (select_cc (xor X, 1), 0, setne, trueV, falseV) ->
3403     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
3404     // This can occur when legalizing some floating point comparisons.
3405     SDValue LHS = N->getOperand(0);
3406     SDValue RHS = N->getOperand(1);
3407     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
3408     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
3409     if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) &&
3410         LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) &&
3411         DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) {
3412       SDLoc DL(N);
3413       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
3414       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
3415       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
3416                          {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3),
3417                           N->getOperand(4)});
3418     }
3419     break;
3420   }
3421   case ISD::SETCC: {
3422     // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1.
3423     // Comparing with 0 may allow us to fold into bnez/beqz.
3424     SDValue LHS = N->getOperand(0);
3425     SDValue RHS = N->getOperand(1);
3426     if (LHS.getValueType().isScalableVector())
3427       break;
3428     auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3429     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
3430     if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) &&
3431         DAG.MaskedValueIsZero(LHS, Mask)) {
3432       SDLoc DL(N);
3433       SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType());
3434       CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3435       return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC);
3436     }
3437     break;
3438   }
3439   case ISD::FCOPYSIGN: {
3440     EVT VT = N->getValueType(0);
3441     if (!VT.isVector())
3442       break;
3443     // There is a form of VFSGNJ which injects the negated sign of its second
3444     // operand. Try and bubble any FNEG up after the extend/round to produce
3445     // this optimized pattern. Avoid modifying cases where FP_ROUND and
3446     // TRUNC=1.
3447     SDValue In2 = N->getOperand(1);
3448     // Avoid cases where the extend/round has multiple uses, as duplicating
3449     // those is typically more expensive than removing a fneg.
3450     if (!In2.hasOneUse())
3451       break;
3452     if (In2.getOpcode() != ISD::FP_EXTEND &&
3453         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
3454       break;
3455     In2 = In2.getOperand(0);
3456     if (In2.getOpcode() != ISD::FNEG)
3457       break;
3458     SDLoc DL(N);
3459     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
3460     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
3461                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
3462   }
3463   }
3464 
3465   return SDValue();
3466 }
3467 
3468 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
3469     const SDNode *N, CombineLevel Level) const {
3470   // The following folds are only desirable if `(OP _, c1 << c2)` can be
3471   // materialised in fewer instructions than `(OP _, c1)`:
3472   //
3473   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
3474   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
3475   SDValue N0 = N->getOperand(0);
3476   EVT Ty = N0.getValueType();
3477   if (Ty.isScalarInteger() &&
3478       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
3479     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3480     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
3481     if (C1 && C2) {
3482       const APInt &C1Int = C1->getAPIntValue();
3483       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
3484 
3485       // We can materialise `c1 << c2` into an add immediate, so it's "free",
3486       // and the combine should happen, to potentially allow further combines
3487       // later.
3488       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
3489           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
3490         return true;
3491 
3492       // We can materialise `c1` in an add immediate, so it's "free", and the
3493       // combine should be prevented.
3494       if (C1Int.getMinSignedBits() <= 64 &&
3495           isLegalAddImmediate(C1Int.getSExtValue()))
3496         return false;
3497 
3498       // Neither constant will fit into an immediate, so find materialisation
3499       // costs.
3500       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
3501                                               Subtarget.is64Bit());
3502       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
3503           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
3504 
3505       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
3506       // combine should be prevented.
3507       if (C1Cost < ShiftedC1Cost)
3508         return false;
3509     }
3510   }
3511   return true;
3512 }
3513 
3514 bool RISCVTargetLowering::targetShrinkDemandedConstant(
3515     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3516     TargetLoweringOpt &TLO) const {
3517   // Delay this optimization as late as possible.
3518   if (!TLO.LegalOps)
3519     return false;
3520 
3521   EVT VT = Op.getValueType();
3522   if (VT.isVector())
3523     return false;
3524 
3525   // Only handle AND for now.
3526   if (Op.getOpcode() != ISD::AND)
3527     return false;
3528 
3529   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3530   if (!C)
3531     return false;
3532 
3533   const APInt &Mask = C->getAPIntValue();
3534 
3535   // Clear all non-demanded bits initially.
3536   APInt ShrunkMask = Mask & DemandedBits;
3537 
3538   // If the shrunk mask fits in sign extended 12 bits, let the target
3539   // independent code apply it.
3540   if (ShrunkMask.isSignedIntN(12))
3541     return false;
3542 
3543   // Try to make a smaller immediate by setting undemanded bits.
3544 
3545   // We need to be able to make a negative number through a combination of mask
3546   // and undemanded bits.
3547   APInt ExpandedMask = Mask | ~DemandedBits;
3548   if (!ExpandedMask.isNegative())
3549     return false;
3550 
3551   // What is the fewest number of bits we need to represent the negative number.
3552   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
3553 
3554   // Try to make a 12 bit negative immediate. If that fails try to make a 32
3555   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
3556   APInt NewMask = ShrunkMask;
3557   if (MinSignedBits <= 12)
3558     NewMask.setBitsFrom(11);
3559   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
3560     NewMask.setBitsFrom(31);
3561   else
3562     return false;
3563 
3564   // Sanity check that our new mask is a subset of the demanded mask.
3565   assert(NewMask.isSubsetOf(ExpandedMask));
3566 
3567   // If we aren't changing the mask, just return true to keep it and prevent
3568   // the caller from optimizing.
3569   if (NewMask == Mask)
3570     return true;
3571 
3572   // Replace the constant with the new mask.
3573   SDLoc DL(Op);
3574   SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
3575   SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
3576   return TLO.CombineTo(Op, NewOp);
3577 }
3578 
3579 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3580                                                         KnownBits &Known,
3581                                                         const APInt &DemandedElts,
3582                                                         const SelectionDAG &DAG,
3583                                                         unsigned Depth) const {
3584   unsigned BitWidth = Known.getBitWidth();
3585   unsigned Opc = Op.getOpcode();
3586   assert((Opc >= ISD::BUILTIN_OP_END ||
3587           Opc == ISD::INTRINSIC_WO_CHAIN ||
3588           Opc == ISD::INTRINSIC_W_CHAIN ||
3589           Opc == ISD::INTRINSIC_VOID) &&
3590          "Should use MaskedValueIsZero if you don't know whether Op"
3591          " is a target node!");
3592 
3593   Known.resetAll();
3594   switch (Opc) {
3595   default: break;
3596   case RISCVISD::REMUW: {
3597     KnownBits Known2;
3598     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
3599     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
3600     // We only care about the lower 32 bits.
3601     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
3602     // Restore the original width by sign extending.
3603     Known = Known.sext(BitWidth);
3604     break;
3605   }
3606   case RISCVISD::DIVUW: {
3607     KnownBits Known2;
3608     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
3609     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
3610     // We only care about the lower 32 bits.
3611     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
3612     // Restore the original width by sign extending.
3613     Known = Known.sext(BitWidth);
3614     break;
3615   }
3616   case RISCVISD::READ_VLENB:
3617     // We assume VLENB is at least 8 bytes.
3618     // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
3619     Known.Zero.setLowBits(3);
3620     break;
3621   }
3622 }
3623 
3624 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
3625     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3626     unsigned Depth) const {
3627   switch (Op.getOpcode()) {
3628   default:
3629     break;
3630   case RISCVISD::SLLW:
3631   case RISCVISD::SRAW:
3632   case RISCVISD::SRLW:
3633   case RISCVISD::DIVW:
3634   case RISCVISD::DIVUW:
3635   case RISCVISD::REMUW:
3636   case RISCVISD::ROLW:
3637   case RISCVISD::RORW:
3638   case RISCVISD::GREVIW:
3639   case RISCVISD::GORCIW:
3640   case RISCVISD::FSLW:
3641   case RISCVISD::FSRW:
3642     // TODO: As the result is sign-extended, this is conservatively correct. A
3643     // more precise answer could be calculated for SRAW depending on known
3644     // bits in the shift amount.
3645     return 33;
3646   case RISCVISD::SHFLI: {
3647     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
3648     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
3649     // will stay within the upper 32 bits. If there were more than 32 sign bits
3650     // before there will be at least 33 sign bits after.
3651     if (Op.getValueType() == MVT::i64 &&
3652         (Op.getConstantOperandVal(1) & 0x10) == 0) {
3653       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
3654       if (Tmp > 32)
3655         return 33;
3656     }
3657     break;
3658   }
3659   case RISCVISD::VMV_X_S:
3660     // The number of sign bits of the scalar result is computed by obtaining the
3661     // element type of the input vector operand, subtracting its width from the
3662     // XLEN, and then adding one (sign bit within the element type). If the
3663     // element type is wider than XLen, the least-significant XLEN bits are
3664     // taken.
3665     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
3666       return 1;
3667     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
3668   }
3669 
3670   return 1;
3671 }
3672 
3673 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
3674                                                   MachineBasicBlock *BB) {
3675   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
3676 
3677   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
3678   // Should the count have wrapped while it was being read, we need to try
3679   // again.
3680   // ...
3681   // read:
3682   // rdcycleh x3 # load high word of cycle
3683   // rdcycle  x2 # load low word of cycle
3684   // rdcycleh x4 # load high word of cycle
3685   // bne x3, x4, read # check if high word reads match, otherwise try again
3686   // ...
3687 
3688   MachineFunction &MF = *BB->getParent();
3689   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3690   MachineFunction::iterator It = ++BB->getIterator();
3691 
3692   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
3693   MF.insert(It, LoopMBB);
3694 
3695   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
3696   MF.insert(It, DoneMBB);
3697 
3698   // Transfer the remainder of BB and its successor edges to DoneMBB.
3699   DoneMBB->splice(DoneMBB->begin(), BB,
3700                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
3701   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
3702 
3703   BB->addSuccessor(LoopMBB);
3704 
3705   MachineRegisterInfo &RegInfo = MF.getRegInfo();
3706   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
3707   Register LoReg = MI.getOperand(0).getReg();
3708   Register HiReg = MI.getOperand(1).getReg();
3709   DebugLoc DL = MI.getDebugLoc();
3710 
3711   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
3712   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
3713       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
3714       .addReg(RISCV::X0);
3715   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
3716       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
3717       .addReg(RISCV::X0);
3718   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
3719       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
3720       .addReg(RISCV::X0);
3721 
3722   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
3723       .addReg(HiReg)
3724       .addReg(ReadAgainReg)
3725       .addMBB(LoopMBB);
3726 
3727   LoopMBB->addSuccessor(LoopMBB);
3728   LoopMBB->addSuccessor(DoneMBB);
3729 
3730   MI.eraseFromParent();
3731 
3732   return DoneMBB;
3733 }
3734 
3735 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
3736                                              MachineBasicBlock *BB) {
3737   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
3738 
3739   MachineFunction &MF = *BB->getParent();
3740   DebugLoc DL = MI.getDebugLoc();
3741   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3742   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
3743   Register LoReg = MI.getOperand(0).getReg();
3744   Register HiReg = MI.getOperand(1).getReg();
3745   Register SrcReg = MI.getOperand(2).getReg();
3746   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
3747   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
3748 
3749   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
3750                           RI);
3751   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
3752   MachineMemOperand *MMOLo =
3753       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
3754   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
3755       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
3756   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
3757       .addFrameIndex(FI)
3758       .addImm(0)
3759       .addMemOperand(MMOLo);
3760   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
3761       .addFrameIndex(FI)
3762       .addImm(4)
3763       .addMemOperand(MMOHi);
3764   MI.eraseFromParent(); // The pseudo instruction is gone now.
3765   return BB;
3766 }
3767 
3768 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
3769                                                  MachineBasicBlock *BB) {
3770   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
3771          "Unexpected instruction");
3772 
3773   MachineFunction &MF = *BB->getParent();
3774   DebugLoc DL = MI.getDebugLoc();
3775   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3776   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
3777   Register DstReg = MI.getOperand(0).getReg();
3778   Register LoReg = MI.getOperand(1).getReg();
3779   Register HiReg = MI.getOperand(2).getReg();
3780   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
3781   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
3782 
3783   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
3784   MachineMemOperand *MMOLo =
3785       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
3786   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
3787       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
3788   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
3789       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
3790       .addFrameIndex(FI)
3791       .addImm(0)
3792       .addMemOperand(MMOLo);
3793   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
3794       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
3795       .addFrameIndex(FI)
3796       .addImm(4)
3797       .addMemOperand(MMOHi);
3798   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
3799   MI.eraseFromParent(); // The pseudo instruction is gone now.
3800   return BB;
3801 }
3802 
3803 static bool isSelectPseudo(MachineInstr &MI) {
3804   switch (MI.getOpcode()) {
3805   default:
3806     return false;
3807   case RISCV::Select_GPR_Using_CC_GPR:
3808   case RISCV::Select_FPR16_Using_CC_GPR:
3809   case RISCV::Select_FPR32_Using_CC_GPR:
3810   case RISCV::Select_FPR64_Using_CC_GPR:
3811     return true;
3812   }
3813 }
3814 
3815 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
3816                                            MachineBasicBlock *BB) {
3817   // To "insert" Select_* instructions, we actually have to insert the triangle
3818   // control-flow pattern.  The incoming instructions know the destination vreg
3819   // to set, the condition code register to branch on, the true/false values to
3820   // select between, and the condcode to use to select the appropriate branch.
3821   //
3822   // We produce the following control flow:
3823   //     HeadMBB
3824   //     |  \
3825   //     |  IfFalseMBB
3826   //     | /
3827   //    TailMBB
3828   //
3829   // When we find a sequence of selects we attempt to optimize their emission
3830   // by sharing the control flow. Currently we only handle cases where we have
3831   // multiple selects with the exact same condition (same LHS, RHS and CC).
3832   // The selects may be interleaved with other instructions if the other
3833   // instructions meet some requirements we deem safe:
3834   // - They are debug instructions. Otherwise,
3835   // - They do not have side-effects, do not access memory and their inputs do
3836   //   not depend on the results of the select pseudo-instructions.
3837   // The TrueV/FalseV operands of the selects cannot depend on the result of
3838   // previous selects in the sequence.
3839   // These conditions could be further relaxed. See the X86 target for a
3840   // related approach and more information.
3841   Register LHS = MI.getOperand(1).getReg();
3842   Register RHS = MI.getOperand(2).getReg();
3843   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
3844 
3845   SmallVector<MachineInstr *, 4> SelectDebugValues;
3846   SmallSet<Register, 4> SelectDests;
3847   SelectDests.insert(MI.getOperand(0).getReg());
3848 
3849   MachineInstr *LastSelectPseudo = &MI;
3850 
3851   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
3852        SequenceMBBI != E; ++SequenceMBBI) {
3853     if (SequenceMBBI->isDebugInstr())
3854       continue;
3855     else if (isSelectPseudo(*SequenceMBBI)) {
3856       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
3857           SequenceMBBI->getOperand(2).getReg() != RHS ||
3858           SequenceMBBI->getOperand(3).getImm() != CC ||
3859           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
3860           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
3861         break;
3862       LastSelectPseudo = &*SequenceMBBI;
3863       SequenceMBBI->collectDebugValues(SelectDebugValues);
3864       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
3865     } else {
3866       if (SequenceMBBI->hasUnmodeledSideEffects() ||
3867           SequenceMBBI->mayLoadOrStore())
3868         break;
3869       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
3870             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
3871           }))
3872         break;
3873     }
3874   }
3875 
3876   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
3877   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3878   DebugLoc DL = MI.getDebugLoc();
3879   MachineFunction::iterator I = ++BB->getIterator();
3880 
3881   MachineBasicBlock *HeadMBB = BB;
3882   MachineFunction *F = BB->getParent();
3883   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
3884   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
3885 
3886   F->insert(I, IfFalseMBB);
3887   F->insert(I, TailMBB);
3888 
3889   // Transfer debug instructions associated with the selects to TailMBB.
3890   for (MachineInstr *DebugInstr : SelectDebugValues) {
3891     TailMBB->push_back(DebugInstr->removeFromParent());
3892   }
3893 
3894   // Move all instructions after the sequence to TailMBB.
3895   TailMBB->splice(TailMBB->end(), HeadMBB,
3896                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
3897   // Update machine-CFG edges by transferring all successors of the current
3898   // block to the new block which will contain the Phi nodes for the selects.
3899   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
3900   // Set the successors for HeadMBB.
3901   HeadMBB->addSuccessor(IfFalseMBB);
3902   HeadMBB->addSuccessor(TailMBB);
3903 
3904   // Insert appropriate branch.
3905   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
3906 
3907   BuildMI(HeadMBB, DL, TII.get(Opcode))
3908     .addReg(LHS)
3909     .addReg(RHS)
3910     .addMBB(TailMBB);
3911 
3912   // IfFalseMBB just falls through to TailMBB.
3913   IfFalseMBB->addSuccessor(TailMBB);
3914 
3915   // Create PHIs for all of the select pseudo-instructions.
3916   auto SelectMBBI = MI.getIterator();
3917   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
3918   auto InsertionPoint = TailMBB->begin();
3919   while (SelectMBBI != SelectEnd) {
3920     auto Next = std::next(SelectMBBI);
3921     if (isSelectPseudo(*SelectMBBI)) {
3922       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
3923       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
3924               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
3925           .addReg(SelectMBBI->getOperand(4).getReg())
3926           .addMBB(HeadMBB)
3927           .addReg(SelectMBBI->getOperand(5).getReg())
3928           .addMBB(IfFalseMBB);
3929       SelectMBBI->eraseFromParent();
3930     }
3931     SelectMBBI = Next;
3932   }
3933 
3934   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
3935   return TailMBB;
3936 }
3937 
3938 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
3939                                     int VLIndex, unsigned SEWIndex,
3940                                     RISCVVLMUL VLMul, bool ForceTailAgnostic) {
3941   MachineFunction &MF = *BB->getParent();
3942   DebugLoc DL = MI.getDebugLoc();
3943   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3944 
3945   unsigned SEW = MI.getOperand(SEWIndex).getImm();
3946   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
3947   RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
3948 
3949   MachineRegisterInfo &MRI = MF.getRegInfo();
3950 
3951   // VL and VTYPE are alive here.
3952   MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI));
3953 
3954   if (VLIndex >= 0) {
3955     // Set VL (rs1 != X0).
3956     Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3957     MIB.addReg(DestReg, RegState::Define | RegState::Dead)
3958         .addReg(MI.getOperand(VLIndex).getReg());
3959   } else
3960     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
3961     MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead)
3962         .addReg(RISCV::X0, RegState::Kill);
3963 
3964   // Default to tail agnostic unless the destination is tied to a source. In
3965   // that case the user would have some control over the tail values. The tail
3966   // policy is also ignored on instructions that only update element 0 like
3967   // vmv.s.x or reductions so use agnostic there to match the common case.
3968   // FIXME: This is conservatively correct, but we might want to detect that
3969   // the input is undefined.
3970   bool TailAgnostic = true;
3971   unsigned UseOpIdx;
3972   if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
3973     TailAgnostic = false;
3974     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
3975     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
3976     MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
3977     if (UseMI && UseMI->isImplicitDef())
3978       TailAgnostic = true;
3979   }
3980 
3981   // For simplicity we reuse the vtype representation here.
3982   MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
3983                                      /*TailAgnostic*/ TailAgnostic,
3984                                      /*MaskAgnostic*/ false));
3985 
3986   // Remove (now) redundant operands from pseudo
3987   MI.getOperand(SEWIndex).setImm(-1);
3988   if (VLIndex >= 0) {
3989     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
3990     MI.getOperand(VLIndex).setIsKill(false);
3991   }
3992 
3993   return BB;
3994 }
3995 
3996 MachineBasicBlock *
3997 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3998                                                  MachineBasicBlock *BB) const {
3999   uint64_t TSFlags = MI.getDesc().TSFlags;
4000 
4001   if (TSFlags & RISCVII::HasSEWOpMask) {
4002     unsigned NumOperands = MI.getNumExplicitOperands();
4003     int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
4004     unsigned SEWIndex = NumOperands - 1;
4005     bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask;
4006 
4007     RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
4008                                                RISCVII::VLMulShift);
4009     return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic);
4010   }
4011 
4012   switch (MI.getOpcode()) {
4013   default:
4014     llvm_unreachable("Unexpected instr type to insert");
4015   case RISCV::ReadCycleWide:
4016     assert(!Subtarget.is64Bit() &&
4017            "ReadCycleWrite is only to be used on riscv32");
4018     return emitReadCycleWidePseudo(MI, BB);
4019   case RISCV::Select_GPR_Using_CC_GPR:
4020   case RISCV::Select_FPR16_Using_CC_GPR:
4021   case RISCV::Select_FPR32_Using_CC_GPR:
4022   case RISCV::Select_FPR64_Using_CC_GPR:
4023     return emitSelectPseudo(MI, BB);
4024   case RISCV::BuildPairF64Pseudo:
4025     return emitBuildPairF64Pseudo(MI, BB);
4026   case RISCV::SplitF64Pseudo:
4027     return emitSplitF64Pseudo(MI, BB);
4028   }
4029 }
4030 
4031 // Calling Convention Implementation.
4032 // The expectations for frontend ABI lowering vary from target to target.
4033 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
4034 // details, but this is a longer term goal. For now, we simply try to keep the
4035 // role of the frontend as simple and well-defined as possible. The rules can
4036 // be summarised as:
4037 // * Never split up large scalar arguments. We handle them here.
4038 // * If a hardfloat calling convention is being used, and the struct may be
4039 // passed in a pair of registers (fp+fp, int+fp), and both registers are
4040 // available, then pass as two separate arguments. If either the GPRs or FPRs
4041 // are exhausted, then pass according to the rule below.
4042 // * If a struct could never be passed in registers or directly in a stack
4043 // slot (as it is larger than 2*XLEN and the floating point rules don't
4044 // apply), then pass it using a pointer with the byval attribute.
4045 // * If a struct is less than 2*XLEN, then coerce to either a two-element
4046 // word-sized array or a 2*XLEN scalar (depending on alignment).
4047 // * The frontend can determine whether a struct is returned by reference or
4048 // not based on its size and fields. If it will be returned by reference, the
4049 // frontend must modify the prototype so a pointer with the sret annotation is
4050 // passed as the first argument. This is not necessary for large scalar
4051 // returns.
4052 // * Struct return values and varargs should be coerced to structs containing
4053 // register-size fields in the same situations they would be for fixed
4054 // arguments.
4055 
4056 static const MCPhysReg ArgGPRs[] = {
4057   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
4058   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
4059 };
4060 static const MCPhysReg ArgFPR16s[] = {
4061   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
4062   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
4063 };
4064 static const MCPhysReg ArgFPR32s[] = {
4065   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
4066   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
4067 };
4068 static const MCPhysReg ArgFPR64s[] = {
4069   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
4070   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
4071 };
4072 // This is an interim calling convention and it may be changed in the future.
4073 static const MCPhysReg ArgVRs[] = {
4074     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
4075     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
4076     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
4077 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
4078                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
4079                                      RISCV::V20M2, RISCV::V22M2};
4080 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
4081                                      RISCV::V20M4};
4082 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
4083 
4084 // Pass a 2*XLEN argument that has been split into two XLEN values through
4085 // registers or the stack as necessary.
4086 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
4087                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
4088                                 MVT ValVT2, MVT LocVT2,
4089                                 ISD::ArgFlagsTy ArgFlags2) {
4090   unsigned XLenInBytes = XLen / 8;
4091   if (Register Reg = State.AllocateReg(ArgGPRs)) {
4092     // At least one half can be passed via register.
4093     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4094                                      VA1.getLocVT(), CCValAssign::Full));
4095   } else {
4096     // Both halves must be passed on the stack, with proper alignment.
4097     Align StackAlign =
4098         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4099     State.addLoc(
4100         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
4101                             State.AllocateStack(XLenInBytes, StackAlign),
4102                             VA1.getLocVT(), CCValAssign::Full));
4103     State.addLoc(CCValAssign::getMem(
4104         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
4105         LocVT2, CCValAssign::Full));
4106     return false;
4107   }
4108 
4109   if (Register Reg = State.AllocateReg(ArgGPRs)) {
4110     // The second half can also be passed via register.
4111     State.addLoc(
4112         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4113   } else {
4114     // The second half is passed via the stack, without additional alignment.
4115     State.addLoc(CCValAssign::getMem(
4116         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
4117         LocVT2, CCValAssign::Full));
4118   }
4119 
4120   return false;
4121 }
4122 
4123 // Implements the RISC-V calling convention. Returns true upon failure.
4124 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
4125                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
4126                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
4127                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
4128                      Optional<unsigned> FirstMaskArgument) {
4129   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
4130   assert(XLen == 32 || XLen == 64);
4131   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
4132 
4133   // Any return value split in to more than two values can't be returned
4134   // directly.
4135   if (IsRet && ValNo > 1)
4136     return true;
4137 
4138   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
4139   // variadic argument, or if no F16/F32 argument registers are available.
4140   bool UseGPRForF16_F32 = true;
4141   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
4142   // variadic argument, or if no F64 argument registers are available.
4143   bool UseGPRForF64 = true;
4144 
4145   switch (ABI) {
4146   default:
4147     llvm_unreachable("Unexpected ABI");
4148   case RISCVABI::ABI_ILP32:
4149   case RISCVABI::ABI_LP64:
4150     break;
4151   case RISCVABI::ABI_ILP32F:
4152   case RISCVABI::ABI_LP64F:
4153     UseGPRForF16_F32 = !IsFixed;
4154     break;
4155   case RISCVABI::ABI_ILP32D:
4156   case RISCVABI::ABI_LP64D:
4157     UseGPRForF16_F32 = !IsFixed;
4158     UseGPRForF64 = !IsFixed;
4159     break;
4160   }
4161 
4162   // FPR16, FPR32, and FPR64 alias each other.
4163   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
4164     UseGPRForF16_F32 = true;
4165     UseGPRForF64 = true;
4166   }
4167 
4168   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
4169   // similar local variables rather than directly checking against the target
4170   // ABI.
4171 
4172   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
4173     LocVT = XLenVT;
4174     LocInfo = CCValAssign::BCvt;
4175   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
4176     LocVT = MVT::i64;
4177     LocInfo = CCValAssign::BCvt;
4178   }
4179 
4180   // If this is a variadic argument, the RISC-V calling convention requires
4181   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
4182   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
4183   // be used regardless of whether the original argument was split during
4184   // legalisation or not. The argument will not be passed by registers if the
4185   // original type is larger than 2*XLEN, so the register alignment rule does
4186   // not apply.
4187   unsigned TwoXLenInBytes = (2 * XLen) / 8;
4188   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
4189       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
4190     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4191     // Skip 'odd' register if necessary.
4192     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
4193       State.AllocateReg(ArgGPRs);
4194   }
4195 
4196   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4197   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4198       State.getPendingArgFlags();
4199 
4200   assert(PendingLocs.size() == PendingArgFlags.size() &&
4201          "PendingLocs and PendingArgFlags out of sync");
4202 
4203   // Handle passing f64 on RV32D with a soft float ABI or when floating point
4204   // registers are exhausted.
4205   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
4206     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
4207            "Can't lower f64 if it is split");
4208     // Depending on available argument GPRS, f64 may be passed in a pair of
4209     // GPRs, split between a GPR and the stack, or passed completely on the
4210     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
4211     // cases.
4212     Register Reg = State.AllocateReg(ArgGPRs);
4213     LocVT = MVT::i32;
4214     if (!Reg) {
4215       unsigned StackOffset = State.AllocateStack(8, Align(8));
4216       State.addLoc(
4217           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
4218       return false;
4219     }
4220     if (!State.AllocateReg(ArgGPRs))
4221       State.AllocateStack(4, Align(4));
4222     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4223     return false;
4224   }
4225 
4226   // Split arguments might be passed indirectly, so keep track of the pending
4227   // values.
4228   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
4229     LocVT = XLenVT;
4230     LocInfo = CCValAssign::Indirect;
4231     PendingLocs.push_back(
4232         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4233     PendingArgFlags.push_back(ArgFlags);
4234     if (!ArgFlags.isSplitEnd()) {
4235       return false;
4236     }
4237   }
4238 
4239   // If the split argument only had two elements, it should be passed directly
4240   // in registers or on the stack.
4241   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
4242     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4243     // Apply the normal calling convention rules to the first half of the
4244     // split argument.
4245     CCValAssign VA = PendingLocs[0];
4246     ISD::ArgFlagsTy AF = PendingArgFlags[0];
4247     PendingLocs.clear();
4248     PendingArgFlags.clear();
4249     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
4250                                ArgFlags);
4251   }
4252 
4253   // Allocate to a register if possible, or else a stack slot.
4254   Register Reg;
4255   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
4256     Reg = State.AllocateReg(ArgFPR16s);
4257   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
4258     Reg = State.AllocateReg(ArgFPR32s);
4259   else if (ValVT == MVT::f64 && !UseGPRForF64)
4260     Reg = State.AllocateReg(ArgFPR64s);
4261   else if (ValVT.isScalableVector()) {
4262     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
4263     if (RC == &RISCV::VRRegClass) {
4264       // Assign the first mask argument to V0.
4265       // This is an interim calling convention and it may be changed in the
4266       // future.
4267       if (FirstMaskArgument.hasValue() &&
4268           ValNo == FirstMaskArgument.getValue()) {
4269         Reg = State.AllocateReg(RISCV::V0);
4270       } else {
4271         Reg = State.AllocateReg(ArgVRs);
4272       }
4273     } else if (RC == &RISCV::VRM2RegClass) {
4274       Reg = State.AllocateReg(ArgVRM2s);
4275     } else if (RC == &RISCV::VRM4RegClass) {
4276       Reg = State.AllocateReg(ArgVRM4s);
4277     } else if (RC == &RISCV::VRM8RegClass) {
4278       Reg = State.AllocateReg(ArgVRM8s);
4279     } else {
4280       llvm_unreachable("Unhandled class register for ValueType");
4281     }
4282     if (!Reg) {
4283       LocInfo = CCValAssign::Indirect;
4284       // Try using a GPR to pass the address
4285       Reg = State.AllocateReg(ArgGPRs);
4286       LocVT = XLenVT;
4287     }
4288   } else
4289     Reg = State.AllocateReg(ArgGPRs);
4290   unsigned StackOffset =
4291       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
4292 
4293   // If we reach this point and PendingLocs is non-empty, we must be at the
4294   // end of a split argument that must be passed indirectly.
4295   if (!PendingLocs.empty()) {
4296     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4297     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4298 
4299     for (auto &It : PendingLocs) {
4300       if (Reg)
4301         It.convertToReg(Reg);
4302       else
4303         It.convertToMem(StackOffset);
4304       State.addLoc(It);
4305     }
4306     PendingLocs.clear();
4307     PendingArgFlags.clear();
4308     return false;
4309   }
4310 
4311   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
4312           (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
4313          "Expected an XLenVT or scalable vector types at this stage");
4314 
4315   if (Reg) {
4316     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4317     return false;
4318   }
4319 
4320   // When a floating-point value is passed on the stack, no bit-conversion is
4321   // needed.
4322   if (ValVT.isFloatingPoint()) {
4323     LocVT = ValVT;
4324     LocInfo = CCValAssign::Full;
4325   }
4326   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
4327   return false;
4328 }
4329 
4330 template <typename ArgTy>
4331 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
4332   for (const auto &ArgIdx : enumerate(Args)) {
4333     MVT ArgVT = ArgIdx.value().VT;
4334     if (ArgVT.isScalableVector() &&
4335         ArgVT.getVectorElementType().SimpleTy == MVT::i1)
4336       return ArgIdx.index();
4337   }
4338   return None;
4339 }
4340 
4341 void RISCVTargetLowering::analyzeInputArgs(
4342     MachineFunction &MF, CCState &CCInfo,
4343     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
4344   unsigned NumArgs = Ins.size();
4345   FunctionType *FType = MF.getFunction().getFunctionType();
4346 
4347   Optional<unsigned> FirstMaskArgument;
4348   if (Subtarget.hasStdExtV())
4349     FirstMaskArgument = preAssignMask(Ins);
4350 
4351   for (unsigned i = 0; i != NumArgs; ++i) {
4352     MVT ArgVT = Ins[i].VT;
4353     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
4354 
4355     Type *ArgTy = nullptr;
4356     if (IsRet)
4357       ArgTy = FType->getReturnType();
4358     else if (Ins[i].isOrigArg())
4359       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
4360 
4361     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
4362     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
4363                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
4364                  FirstMaskArgument)) {
4365       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
4366                         << EVT(ArgVT).getEVTString() << '\n');
4367       llvm_unreachable(nullptr);
4368     }
4369   }
4370 }
4371 
4372 void RISCVTargetLowering::analyzeOutputArgs(
4373     MachineFunction &MF, CCState &CCInfo,
4374     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
4375     CallLoweringInfo *CLI) const {
4376   unsigned NumArgs = Outs.size();
4377 
4378   Optional<unsigned> FirstMaskArgument;
4379   if (Subtarget.hasStdExtV())
4380     FirstMaskArgument = preAssignMask(Outs);
4381 
4382   for (unsigned i = 0; i != NumArgs; i++) {
4383     MVT ArgVT = Outs[i].VT;
4384     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4385     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
4386 
4387     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
4388     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
4389                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
4390                  FirstMaskArgument)) {
4391       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
4392                         << EVT(ArgVT).getEVTString() << "\n");
4393       llvm_unreachable(nullptr);
4394     }
4395   }
4396 }
4397 
4398 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
4399 // values.
4400 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
4401                                    const CCValAssign &VA, const SDLoc &DL) {
4402   switch (VA.getLocInfo()) {
4403   default:
4404     llvm_unreachable("Unexpected CCValAssign::LocInfo");
4405   case CCValAssign::Full:
4406     break;
4407   case CCValAssign::BCvt:
4408     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
4409       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
4410     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4411       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
4412     else
4413       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
4414     break;
4415   }
4416   return Val;
4417 }
4418 
4419 // The caller is responsible for loading the full value if the argument is
4420 // passed with CCValAssign::Indirect.
4421 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
4422                                 const CCValAssign &VA, const SDLoc &DL,
4423                                 const RISCVTargetLowering &TLI) {
4424   MachineFunction &MF = DAG.getMachineFunction();
4425   MachineRegisterInfo &RegInfo = MF.getRegInfo();
4426   EVT LocVT = VA.getLocVT();
4427   SDValue Val;
4428   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
4429   Register VReg = RegInfo.createVirtualRegister(RC);
4430   RegInfo.addLiveIn(VA.getLocReg(), VReg);
4431   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
4432 
4433   if (VA.getLocInfo() == CCValAssign::Indirect)
4434     return Val;
4435 
4436   return convertLocVTToValVT(DAG, Val, VA, DL);
4437 }
4438 
4439 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
4440                                    const CCValAssign &VA, const SDLoc &DL) {
4441   EVT LocVT = VA.getLocVT();
4442 
4443   switch (VA.getLocInfo()) {
4444   default:
4445     llvm_unreachable("Unexpected CCValAssign::LocInfo");
4446   case CCValAssign::Full:
4447     break;
4448   case CCValAssign::BCvt:
4449     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
4450       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
4451     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4452       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
4453     else
4454       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
4455     break;
4456   }
4457   return Val;
4458 }
4459 
4460 // The caller is responsible for loading the full value if the argument is
4461 // passed with CCValAssign::Indirect.
4462 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
4463                                 const CCValAssign &VA, const SDLoc &DL) {
4464   MachineFunction &MF = DAG.getMachineFunction();
4465   MachineFrameInfo &MFI = MF.getFrameInfo();
4466   EVT LocVT = VA.getLocVT();
4467   EVT ValVT = VA.getValVT();
4468   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
4469   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
4470                                  VA.getLocMemOffset(), /*Immutable=*/true);
4471   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4472   SDValue Val;
4473 
4474   ISD::LoadExtType ExtType;
4475   switch (VA.getLocInfo()) {
4476   default:
4477     llvm_unreachable("Unexpected CCValAssign::LocInfo");
4478   case CCValAssign::Full:
4479   case CCValAssign::Indirect:
4480   case CCValAssign::BCvt:
4481     ExtType = ISD::NON_EXTLOAD;
4482     break;
4483   }
4484   Val = DAG.getExtLoad(
4485       ExtType, DL, LocVT, Chain, FIN,
4486       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
4487   return Val;
4488 }
4489 
4490 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
4491                                        const CCValAssign &VA, const SDLoc &DL) {
4492   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
4493          "Unexpected VA");
4494   MachineFunction &MF = DAG.getMachineFunction();
4495   MachineFrameInfo &MFI = MF.getFrameInfo();
4496   MachineRegisterInfo &RegInfo = MF.getRegInfo();
4497 
4498   if (VA.isMemLoc()) {
4499     // f64 is passed on the stack.
4500     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
4501     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
4502     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
4503                        MachinePointerInfo::getFixedStack(MF, FI));
4504   }
4505 
4506   assert(VA.isRegLoc() && "Expected register VA assignment");
4507 
4508   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
4509   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
4510   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
4511   SDValue Hi;
4512   if (VA.getLocReg() == RISCV::X17) {
4513     // Second half of f64 is passed on the stack.
4514     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
4515     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
4516     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
4517                      MachinePointerInfo::getFixedStack(MF, FI));
4518   } else {
4519     // Second half of f64 is passed in another GPR.
4520     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
4521     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
4522     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
4523   }
4524   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
4525 }
4526 
4527 // FastCC has less than 1% performance improvement for some particular
4528 // benchmark. But theoretically, it may has benenfit for some cases.
4529 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
4530                             CCValAssign::LocInfo LocInfo,
4531                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
4532 
4533   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
4534     // X5 and X6 might be used for save-restore libcall.
4535     static const MCPhysReg GPRList[] = {
4536         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
4537         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
4538         RISCV::X29, RISCV::X30, RISCV::X31};
4539     if (unsigned Reg = State.AllocateReg(GPRList)) {
4540       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4541       return false;
4542     }
4543   }
4544 
4545   if (LocVT == MVT::f16) {
4546     static const MCPhysReg FPR16List[] = {
4547         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
4548         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
4549         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
4550         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
4551     if (unsigned Reg = State.AllocateReg(FPR16List)) {
4552       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4553       return false;
4554     }
4555   }
4556 
4557   if (LocVT == MVT::f32) {
4558     static const MCPhysReg FPR32List[] = {
4559         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
4560         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
4561         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
4562         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
4563     if (unsigned Reg = State.AllocateReg(FPR32List)) {
4564       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4565       return false;
4566     }
4567   }
4568 
4569   if (LocVT == MVT::f64) {
4570     static const MCPhysReg FPR64List[] = {
4571         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
4572         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
4573         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
4574         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
4575     if (unsigned Reg = State.AllocateReg(FPR64List)) {
4576       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4577       return false;
4578     }
4579   }
4580 
4581   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
4582     unsigned Offset4 = State.AllocateStack(4, Align(4));
4583     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
4584     return false;
4585   }
4586 
4587   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
4588     unsigned Offset5 = State.AllocateStack(8, Align(8));
4589     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
4590     return false;
4591   }
4592 
4593   return true; // CC didn't match.
4594 }
4595 
4596 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
4597                          CCValAssign::LocInfo LocInfo,
4598                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
4599 
4600   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
4601     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
4602     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
4603     static const MCPhysReg GPRList[] = {
4604         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
4605         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
4606     if (unsigned Reg = State.AllocateReg(GPRList)) {
4607       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4608       return false;
4609     }
4610   }
4611 
4612   if (LocVT == MVT::f32) {
4613     // Pass in STG registers: F1, ..., F6
4614     //                        fs0 ... fs5
4615     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
4616                                           RISCV::F18_F, RISCV::F19_F,
4617                                           RISCV::F20_F, RISCV::F21_F};
4618     if (unsigned Reg = State.AllocateReg(FPR32List)) {
4619       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4620       return false;
4621     }
4622   }
4623 
4624   if (LocVT == MVT::f64) {
4625     // Pass in STG registers: D1, ..., D6
4626     //                        fs6 ... fs11
4627     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
4628                                           RISCV::F24_D, RISCV::F25_D,
4629                                           RISCV::F26_D, RISCV::F27_D};
4630     if (unsigned Reg = State.AllocateReg(FPR64List)) {
4631       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4632       return false;
4633     }
4634   }
4635 
4636   report_fatal_error("No registers left in GHC calling convention");
4637   return true;
4638 }
4639 
4640 // Transform physical registers into virtual registers.
4641 SDValue RISCVTargetLowering::LowerFormalArguments(
4642     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4643     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
4644     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4645 
4646   MachineFunction &MF = DAG.getMachineFunction();
4647 
4648   switch (CallConv) {
4649   default:
4650     report_fatal_error("Unsupported calling convention");
4651   case CallingConv::C:
4652   case CallingConv::Fast:
4653     break;
4654   case CallingConv::GHC:
4655     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
4656         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
4657       report_fatal_error(
4658         "GHC calling convention requires the F and D instruction set extensions");
4659   }
4660 
4661   const Function &Func = MF.getFunction();
4662   if (Func.hasFnAttribute("interrupt")) {
4663     if (!Func.arg_empty())
4664       report_fatal_error(
4665         "Functions with the interrupt attribute cannot have arguments!");
4666 
4667     StringRef Kind =
4668       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
4669 
4670     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
4671       report_fatal_error(
4672         "Function interrupt attribute argument not supported!");
4673   }
4674 
4675   EVT PtrVT = getPointerTy(DAG.getDataLayout());
4676   MVT XLenVT = Subtarget.getXLenVT();
4677   unsigned XLenInBytes = Subtarget.getXLen() / 8;
4678   // Used with vargs to acumulate store chains.
4679   std::vector<SDValue> OutChains;
4680 
4681   // Assign locations to all of the incoming arguments.
4682   SmallVector<CCValAssign, 16> ArgLocs;
4683   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4684 
4685   if (CallConv == CallingConv::Fast)
4686     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
4687   else if (CallConv == CallingConv::GHC)
4688     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
4689   else
4690     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
4691 
4692   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4693     CCValAssign &VA = ArgLocs[i];
4694     SDValue ArgValue;
4695     // Passing f64 on RV32D with a soft float ABI must be handled as a special
4696     // case.
4697     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
4698       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
4699     else if (VA.isRegLoc())
4700       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
4701     else
4702       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
4703 
4704     if (VA.getLocInfo() == CCValAssign::Indirect) {
4705       // If the original argument was split and passed by reference (e.g. i128
4706       // on RV32), we need to load all parts of it here (using the same
4707       // address).
4708       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
4709                                    MachinePointerInfo()));
4710       unsigned ArgIndex = Ins[i].OrigArgIndex;
4711       assert(Ins[i].PartOffset == 0);
4712       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
4713         CCValAssign &PartVA = ArgLocs[i + 1];
4714         unsigned PartOffset = Ins[i + 1].PartOffset;
4715         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
4716                                       DAG.getIntPtrConstant(PartOffset, DL));
4717         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
4718                                      MachinePointerInfo()));
4719         ++i;
4720       }
4721       continue;
4722     }
4723     InVals.push_back(ArgValue);
4724   }
4725 
4726   if (IsVarArg) {
4727     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
4728     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
4729     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
4730     MachineFrameInfo &MFI = MF.getFrameInfo();
4731     MachineRegisterInfo &RegInfo = MF.getRegInfo();
4732     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
4733 
4734     // Offset of the first variable argument from stack pointer, and size of
4735     // the vararg save area. For now, the varargs save area is either zero or
4736     // large enough to hold a0-a7.
4737     int VaArgOffset, VarArgsSaveSize;
4738 
4739     // If all registers are allocated, then all varargs must be passed on the
4740     // stack and we don't need to save any argregs.
4741     if (ArgRegs.size() == Idx) {
4742       VaArgOffset = CCInfo.getNextStackOffset();
4743       VarArgsSaveSize = 0;
4744     } else {
4745       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
4746       VaArgOffset = -VarArgsSaveSize;
4747     }
4748 
4749     // Record the frame index of the first variable argument
4750     // which is a value necessary to VASTART.
4751     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
4752     RVFI->setVarArgsFrameIndex(FI);
4753 
4754     // If saving an odd number of registers then create an extra stack slot to
4755     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
4756     // offsets to even-numbered registered remain 2*XLEN-aligned.
4757     if (Idx % 2) {
4758       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
4759       VarArgsSaveSize += XLenInBytes;
4760     }
4761 
4762     // Copy the integer registers that may have been used for passing varargs
4763     // to the vararg save area.
4764     for (unsigned I = Idx; I < ArgRegs.size();
4765          ++I, VaArgOffset += XLenInBytes) {
4766       const Register Reg = RegInfo.createVirtualRegister(RC);
4767       RegInfo.addLiveIn(ArgRegs[I], Reg);
4768       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
4769       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
4770       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4771       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
4772                                    MachinePointerInfo::getFixedStack(MF, FI));
4773       cast<StoreSDNode>(Store.getNode())
4774           ->getMemOperand()
4775           ->setValue((Value *)nullptr);
4776       OutChains.push_back(Store);
4777     }
4778     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
4779   }
4780 
4781   // All stores are grouped in one node to allow the matching between
4782   // the size of Ins and InVals. This only happens for vararg functions.
4783   if (!OutChains.empty()) {
4784     OutChains.push_back(Chain);
4785     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
4786   }
4787 
4788   return Chain;
4789 }
4790 
4791 /// isEligibleForTailCallOptimization - Check whether the call is eligible
4792 /// for tail call optimization.
4793 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
4794 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
4795     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4796     const SmallVector<CCValAssign, 16> &ArgLocs) const {
4797 
4798   auto &Callee = CLI.Callee;
4799   auto CalleeCC = CLI.CallConv;
4800   auto &Outs = CLI.Outs;
4801   auto &Caller = MF.getFunction();
4802   auto CallerCC = Caller.getCallingConv();
4803 
4804   // Exception-handling functions need a special set of instructions to
4805   // indicate a return to the hardware. Tail-calling another function would
4806   // probably break this.
4807   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
4808   // should be expanded as new function attributes are introduced.
4809   if (Caller.hasFnAttribute("interrupt"))
4810     return false;
4811 
4812   // Do not tail call opt if the stack is used to pass parameters.
4813   if (CCInfo.getNextStackOffset() != 0)
4814     return false;
4815 
4816   // Do not tail call opt if any parameters need to be passed indirectly.
4817   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
4818   // passed indirectly. So the address of the value will be passed in a
4819   // register, or if not available, then the address is put on the stack. In
4820   // order to pass indirectly, space on the stack often needs to be allocated
4821   // in order to store the value. In this case the CCInfo.getNextStackOffset()
4822   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
4823   // are passed CCValAssign::Indirect.
4824   for (auto &VA : ArgLocs)
4825     if (VA.getLocInfo() == CCValAssign::Indirect)
4826       return false;
4827 
4828   // Do not tail call opt if either caller or callee uses struct return
4829   // semantics.
4830   auto IsCallerStructRet = Caller.hasStructRetAttr();
4831   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4832   if (IsCallerStructRet || IsCalleeStructRet)
4833     return false;
4834 
4835   // Externally-defined functions with weak linkage should not be
4836   // tail-called. The behaviour of branch instructions in this situation (as
4837   // used for tail calls) is implementation-defined, so we cannot rely on the
4838   // linker replacing the tail call with a return.
4839   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4840     const GlobalValue *GV = G->getGlobal();
4841     if (GV->hasExternalWeakLinkage())
4842       return false;
4843   }
4844 
4845   // The callee has to preserve all registers the caller needs to preserve.
4846   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
4847   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4848   if (CalleeCC != CallerCC) {
4849     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4850     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4851       return false;
4852   }
4853 
4854   // Byval parameters hand the function a pointer directly into the stack area
4855   // we want to reuse during a tail call. Working around this *is* possible
4856   // but less efficient and uglier in LowerCall.
4857   for (auto &Arg : Outs)
4858     if (Arg.Flags.isByVal())
4859       return false;
4860 
4861   return true;
4862 }
4863 
4864 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4865 // and output parameter nodes.
4866 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
4867                                        SmallVectorImpl<SDValue> &InVals) const {
4868   SelectionDAG &DAG = CLI.DAG;
4869   SDLoc &DL = CLI.DL;
4870   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4871   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4872   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4873   SDValue Chain = CLI.Chain;
4874   SDValue Callee = CLI.Callee;
4875   bool &IsTailCall = CLI.IsTailCall;
4876   CallingConv::ID CallConv = CLI.CallConv;
4877   bool IsVarArg = CLI.IsVarArg;
4878   EVT PtrVT = getPointerTy(DAG.getDataLayout());
4879   MVT XLenVT = Subtarget.getXLenVT();
4880 
4881   MachineFunction &MF = DAG.getMachineFunction();
4882 
4883   // Analyze the operands of the call, assigning locations to each operand.
4884   SmallVector<CCValAssign, 16> ArgLocs;
4885   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4886 
4887   if (CallConv == CallingConv::Fast)
4888     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
4889   else if (CallConv == CallingConv::GHC)
4890     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
4891   else
4892     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
4893 
4894   // Check if it's really possible to do a tail call.
4895   if (IsTailCall)
4896     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4897 
4898   if (IsTailCall)
4899     ++NumTailCalls;
4900   else if (CLI.CB && CLI.CB->isMustTailCall())
4901     report_fatal_error("failed to perform tail call elimination on a call "
4902                        "site marked musttail");
4903 
4904   // Get a count of how many bytes are to be pushed on the stack.
4905   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
4906 
4907   // Create local copies for byval args
4908   SmallVector<SDValue, 8> ByValArgs;
4909   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4910     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4911     if (!Flags.isByVal())
4912       continue;
4913 
4914     SDValue Arg = OutVals[i];
4915     unsigned Size = Flags.getByValSize();
4916     Align Alignment = Flags.getNonZeroByValAlign();
4917 
4918     int FI =
4919         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4920     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4921     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
4922 
4923     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4924                           /*IsVolatile=*/false,
4925                           /*AlwaysInline=*/false, IsTailCall,
4926                           MachinePointerInfo(), MachinePointerInfo());
4927     ByValArgs.push_back(FIPtr);
4928   }
4929 
4930   if (!IsTailCall)
4931     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4932 
4933   // Copy argument values to their designated locations.
4934   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4935   SmallVector<SDValue, 8> MemOpChains;
4936   SDValue StackPtr;
4937   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4938     CCValAssign &VA = ArgLocs[i];
4939     SDValue ArgValue = OutVals[i];
4940     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4941 
4942     // Handle passing f64 on RV32D with a soft float ABI as a special case.
4943     bool IsF64OnRV32DSoftABI =
4944         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
4945     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
4946       SDValue SplitF64 = DAG.getNode(
4947           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
4948       SDValue Lo = SplitF64.getValue(0);
4949       SDValue Hi = SplitF64.getValue(1);
4950 
4951       Register RegLo = VA.getLocReg();
4952       RegsToPass.push_back(std::make_pair(RegLo, Lo));
4953 
4954       if (RegLo == RISCV::X17) {
4955         // Second half of f64 is passed on the stack.
4956         // Work out the address of the stack slot.
4957         if (!StackPtr.getNode())
4958           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
4959         // Emit the store.
4960         MemOpChains.push_back(
4961             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
4962       } else {
4963         // Second half of f64 is passed in another GPR.
4964         assert(RegLo < RISCV::X31 && "Invalid register pair");
4965         Register RegHigh = RegLo + 1;
4966         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
4967       }
4968       continue;
4969     }
4970 
4971     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
4972     // as any other MemLoc.
4973 
4974     // Promote the value if needed.
4975     // For now, only handle fully promoted and indirect arguments.
4976     if (VA.getLocInfo() == CCValAssign::Indirect) {
4977       // Store the argument in a stack slot and pass its address.
4978       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
4979       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4980       MemOpChains.push_back(
4981           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4982                        MachinePointerInfo::getFixedStack(MF, FI)));
4983       // If the original argument was split (e.g. i128), we need
4984       // to store all parts of it here (and pass just one address).
4985       unsigned ArgIndex = Outs[i].OrigArgIndex;
4986       assert(Outs[i].PartOffset == 0);
4987       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4988         SDValue PartValue = OutVals[i + 1];
4989         unsigned PartOffset = Outs[i + 1].PartOffset;
4990         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
4991                                       DAG.getIntPtrConstant(PartOffset, DL));
4992         MemOpChains.push_back(
4993             DAG.getStore(Chain, DL, PartValue, Address,
4994                          MachinePointerInfo::getFixedStack(MF, FI)));
4995         ++i;
4996       }
4997       ArgValue = SpillSlot;
4998     } else {
4999       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5000     }
5001 
5002     // Use local copy if it is a byval arg.
5003     if (Flags.isByVal())
5004       ArgValue = ByValArgs[j++];
5005 
5006     if (VA.isRegLoc()) {
5007       // Queue up the argument copies and emit them at the end.
5008       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5009     } else {
5010       assert(VA.isMemLoc() && "Argument not register or memory");
5011       assert(!IsTailCall && "Tail call not allowed if stack is used "
5012                             "for passing parameters");
5013 
5014       // Work out the address of the stack slot.
5015       if (!StackPtr.getNode())
5016         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
5017       SDValue Address =
5018           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5019                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
5020 
5021       // Emit the store.
5022       MemOpChains.push_back(
5023           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5024     }
5025   }
5026 
5027   // Join the stores, which are independent of one another.
5028   if (!MemOpChains.empty())
5029     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5030 
5031   SDValue Glue;
5032 
5033   // Build a sequence of copy-to-reg nodes, chained and glued together.
5034   for (auto &Reg : RegsToPass) {
5035     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5036     Glue = Chain.getValue(1);
5037   }
5038 
5039   // Validate that none of the argument registers have been marked as
5040   // reserved, if so report an error. Do the same for the return address if this
5041   // is not a tailcall.
5042   validateCCReservedRegs(RegsToPass, MF);
5043   if (!IsTailCall &&
5044       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
5045     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
5046         MF.getFunction(),
5047         "Return address register required, but has been reserved."});
5048 
5049   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5050   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5051   // split it and then direct call can be matched by PseudoCALL.
5052   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5053     const GlobalValue *GV = S->getGlobal();
5054 
5055     unsigned OpFlags = RISCVII::MO_CALL;
5056     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
5057       OpFlags = RISCVII::MO_PLT;
5058 
5059     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
5060   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5061     unsigned OpFlags = RISCVII::MO_CALL;
5062 
5063     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
5064                                                  nullptr))
5065       OpFlags = RISCVII::MO_PLT;
5066 
5067     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5068   }
5069 
5070   // The first call operand is the chain and the second is the target address.
5071   SmallVector<SDValue, 8> Ops;
5072   Ops.push_back(Chain);
5073   Ops.push_back(Callee);
5074 
5075   // Add argument registers to the end of the list so that they are
5076   // known live into the call.
5077   for (auto &Reg : RegsToPass)
5078     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5079 
5080   if (!IsTailCall) {
5081     // Add a register mask operand representing the call-preserved registers.
5082     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5083     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5084     assert(Mask && "Missing call preserved mask for calling convention");
5085     Ops.push_back(DAG.getRegisterMask(Mask));
5086   }
5087 
5088   // Glue the call to the argument copies, if any.
5089   if (Glue.getNode())
5090     Ops.push_back(Glue);
5091 
5092   // Emit the call.
5093   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5094 
5095   if (IsTailCall) {
5096     MF.getFrameInfo().setHasTailCall();
5097     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
5098   }
5099 
5100   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
5101   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5102   Glue = Chain.getValue(1);
5103 
5104   // Mark the end of the call, which is glued to the call itself.
5105   Chain = DAG.getCALLSEQ_END(Chain,
5106                              DAG.getConstant(NumBytes, DL, PtrVT, true),
5107                              DAG.getConstant(0, DL, PtrVT, true),
5108                              Glue, DL);
5109   Glue = Chain.getValue(1);
5110 
5111   // Assign locations to each value returned by this call.
5112   SmallVector<CCValAssign, 16> RVLocs;
5113   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5114   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
5115 
5116   // Copy all of the result registers out of their specified physreg.
5117   for (auto &VA : RVLocs) {
5118     // Copy the value out
5119     SDValue RetValue =
5120         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5121     // Glue the RetValue to the end of the call sequence
5122     Chain = RetValue.getValue(1);
5123     Glue = RetValue.getValue(2);
5124 
5125     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
5126       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
5127       SDValue RetValue2 =
5128           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
5129       Chain = RetValue2.getValue(1);
5130       Glue = RetValue2.getValue(2);
5131       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
5132                              RetValue2);
5133     }
5134 
5135     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5136 
5137     InVals.push_back(RetValue);
5138   }
5139 
5140   return Chain;
5141 }
5142 
5143 bool RISCVTargetLowering::CanLowerReturn(
5144     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5145     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5146   SmallVector<CCValAssign, 16> RVLocs;
5147   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5148 
5149   Optional<unsigned> FirstMaskArgument;
5150   if (Subtarget.hasStdExtV())
5151     FirstMaskArgument = preAssignMask(Outs);
5152 
5153   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5154     MVT VT = Outs[i].VT;
5155     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5156     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
5157     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
5158                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
5159                  *this, FirstMaskArgument))
5160       return false;
5161   }
5162   return true;
5163 }
5164 
5165 SDValue
5166 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
5167                                  bool IsVarArg,
5168                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
5169                                  const SmallVectorImpl<SDValue> &OutVals,
5170                                  const SDLoc &DL, SelectionDAG &DAG) const {
5171   const MachineFunction &MF = DAG.getMachineFunction();
5172   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
5173 
5174   // Stores the assignment of the return value to a location.
5175   SmallVector<CCValAssign, 16> RVLocs;
5176 
5177   // Info about the registers and stack slot.
5178   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5179                  *DAG.getContext());
5180 
5181   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5182                     nullptr);
5183 
5184   if (CallConv == CallingConv::GHC && !RVLocs.empty())
5185     report_fatal_error("GHC functions return void only");
5186 
5187   SDValue Glue;
5188   SmallVector<SDValue, 4> RetOps(1, Chain);
5189 
5190   // Copy the result values into the output registers.
5191   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5192     SDValue Val = OutVals[i];
5193     CCValAssign &VA = RVLocs[i];
5194     assert(VA.isRegLoc() && "Can only return in registers!");
5195 
5196     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
5197       // Handle returning f64 on RV32D with a soft float ABI.
5198       assert(VA.isRegLoc() && "Expected return via registers");
5199       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
5200                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
5201       SDValue Lo = SplitF64.getValue(0);
5202       SDValue Hi = SplitF64.getValue(1);
5203       Register RegLo = VA.getLocReg();
5204       assert(RegLo < RISCV::X31 && "Invalid register pair");
5205       Register RegHi = RegLo + 1;
5206 
5207       if (STI.isRegisterReservedByUser(RegLo) ||
5208           STI.isRegisterReservedByUser(RegHi))
5209         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
5210             MF.getFunction(),
5211             "Return value register required, but has been reserved."});
5212 
5213       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
5214       Glue = Chain.getValue(1);
5215       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
5216       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
5217       Glue = Chain.getValue(1);
5218       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
5219     } else {
5220       // Handle a 'normal' return.
5221       Val = convertValVTToLocVT(DAG, Val, VA, DL);
5222       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5223 
5224       if (STI.isRegisterReservedByUser(VA.getLocReg()))
5225         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
5226             MF.getFunction(),
5227             "Return value register required, but has been reserved."});
5228 
5229       // Guarantee that all emitted copies are stuck together.
5230       Glue = Chain.getValue(1);
5231       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5232     }
5233   }
5234 
5235   RetOps[0] = Chain; // Update chain.
5236 
5237   // Add the glue node if we have it.
5238   if (Glue.getNode()) {
5239     RetOps.push_back(Glue);
5240   }
5241 
5242   // Interrupt service routines use different return instructions.
5243   const Function &Func = DAG.getMachineFunction().getFunction();
5244   if (Func.hasFnAttribute("interrupt")) {
5245     if (!Func.getReturnType()->isVoidTy())
5246       report_fatal_error(
5247           "Functions with the interrupt attribute must have void return type!");
5248 
5249     MachineFunction &MF = DAG.getMachineFunction();
5250     StringRef Kind =
5251       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
5252 
5253     unsigned RetOpc;
5254     if (Kind == "user")
5255       RetOpc = RISCVISD::URET_FLAG;
5256     else if (Kind == "supervisor")
5257       RetOpc = RISCVISD::SRET_FLAG;
5258     else
5259       RetOpc = RISCVISD::MRET_FLAG;
5260 
5261     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
5262   }
5263 
5264   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
5265 }
5266 
5267 void RISCVTargetLowering::validateCCReservedRegs(
5268     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
5269     MachineFunction &MF) const {
5270   const Function &F = MF.getFunction();
5271   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
5272 
5273   if (llvm::any_of(Regs, [&STI](auto Reg) {
5274         return STI.isRegisterReservedByUser(Reg.first);
5275       }))
5276     F.getContext().diagnose(DiagnosticInfoUnsupported{
5277         F, "Argument register required, but has been reserved."});
5278 }
5279 
5280 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
5281   return CI->isTailCall();
5282 }
5283 
5284 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
5285 #define NODE_NAME_CASE(NODE)                                                   \
5286   case RISCVISD::NODE:                                                         \
5287     return "RISCVISD::" #NODE;
5288   // clang-format off
5289   switch ((RISCVISD::NodeType)Opcode) {
5290   case RISCVISD::FIRST_NUMBER:
5291     break;
5292   NODE_NAME_CASE(RET_FLAG)
5293   NODE_NAME_CASE(URET_FLAG)
5294   NODE_NAME_CASE(SRET_FLAG)
5295   NODE_NAME_CASE(MRET_FLAG)
5296   NODE_NAME_CASE(CALL)
5297   NODE_NAME_CASE(SELECT_CC)
5298   NODE_NAME_CASE(BuildPairF64)
5299   NODE_NAME_CASE(SplitF64)
5300   NODE_NAME_CASE(TAIL)
5301   NODE_NAME_CASE(SLLW)
5302   NODE_NAME_CASE(SRAW)
5303   NODE_NAME_CASE(SRLW)
5304   NODE_NAME_CASE(DIVW)
5305   NODE_NAME_CASE(DIVUW)
5306   NODE_NAME_CASE(REMUW)
5307   NODE_NAME_CASE(ROLW)
5308   NODE_NAME_CASE(RORW)
5309   NODE_NAME_CASE(FSLW)
5310   NODE_NAME_CASE(FSRW)
5311   NODE_NAME_CASE(FSL)
5312   NODE_NAME_CASE(FSR)
5313   NODE_NAME_CASE(FMV_H_X)
5314   NODE_NAME_CASE(FMV_X_ANYEXTH)
5315   NODE_NAME_CASE(FMV_W_X_RV64)
5316   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
5317   NODE_NAME_CASE(READ_CYCLE_WIDE)
5318   NODE_NAME_CASE(GREVI)
5319   NODE_NAME_CASE(GREVIW)
5320   NODE_NAME_CASE(GORCI)
5321   NODE_NAME_CASE(GORCIW)
5322   NODE_NAME_CASE(SHFLI)
5323   NODE_NAME_CASE(VMV_V_X_VL)
5324   NODE_NAME_CASE(VFMV_V_F_VL)
5325   NODE_NAME_CASE(VMV_X_S)
5326   NODE_NAME_CASE(SPLAT_VECTOR_I64)
5327   NODE_NAME_CASE(READ_VLENB)
5328   NODE_NAME_CASE(TRUNCATE_VECTOR)
5329   NODE_NAME_CASE(VSLIDEUP_VL)
5330   NODE_NAME_CASE(VSLIDEDOWN_VL)
5331   NODE_NAME_CASE(VID_VL)
5332   NODE_NAME_CASE(VFNCVT_ROD)
5333   NODE_NAME_CASE(VECREDUCE_ADD)
5334   NODE_NAME_CASE(VECREDUCE_UMAX)
5335   NODE_NAME_CASE(VECREDUCE_SMAX)
5336   NODE_NAME_CASE(VECREDUCE_UMIN)
5337   NODE_NAME_CASE(VECREDUCE_SMIN)
5338   NODE_NAME_CASE(VECREDUCE_AND)
5339   NODE_NAME_CASE(VECREDUCE_OR)
5340   NODE_NAME_CASE(VECREDUCE_XOR)
5341   NODE_NAME_CASE(VECREDUCE_FADD)
5342   NODE_NAME_CASE(VECREDUCE_SEQ_FADD)
5343   NODE_NAME_CASE(ADD_VL)
5344   NODE_NAME_CASE(AND_VL)
5345   NODE_NAME_CASE(MUL_VL)
5346   NODE_NAME_CASE(OR_VL)
5347   NODE_NAME_CASE(SDIV_VL)
5348   NODE_NAME_CASE(SHL_VL)
5349   NODE_NAME_CASE(SREM_VL)
5350   NODE_NAME_CASE(SRA_VL)
5351   NODE_NAME_CASE(SRL_VL)
5352   NODE_NAME_CASE(SUB_VL)
5353   NODE_NAME_CASE(UDIV_VL)
5354   NODE_NAME_CASE(UREM_VL)
5355   NODE_NAME_CASE(XOR_VL)
5356   NODE_NAME_CASE(FADD_VL)
5357   NODE_NAME_CASE(FSUB_VL)
5358   NODE_NAME_CASE(FMUL_VL)
5359   NODE_NAME_CASE(FDIV_VL)
5360   NODE_NAME_CASE(FNEG_VL)
5361   NODE_NAME_CASE(FABS_VL)
5362   NODE_NAME_CASE(FSQRT_VL)
5363   NODE_NAME_CASE(FMA_VL)
5364   NODE_NAME_CASE(SMIN_VL)
5365   NODE_NAME_CASE(SMAX_VL)
5366   NODE_NAME_CASE(UMIN_VL)
5367   NODE_NAME_CASE(UMAX_VL)
5368   NODE_NAME_CASE(MULHS_VL)
5369   NODE_NAME_CASE(MULHU_VL)
5370   NODE_NAME_CASE(SETCC_VL)
5371   NODE_NAME_CASE(VSELECT_VL)
5372   NODE_NAME_CASE(VMAND_VL)
5373   NODE_NAME_CASE(VMOR_VL)
5374   NODE_NAME_CASE(VMXOR_VL)
5375   NODE_NAME_CASE(VMCLR_VL)
5376   NODE_NAME_CASE(VMSET_VL)
5377   NODE_NAME_CASE(VRGATHER_VX_VL)
5378   NODE_NAME_CASE(VLE_VL)
5379   NODE_NAME_CASE(VSE_VL)
5380   }
5381   // clang-format on
5382   return nullptr;
5383 #undef NODE_NAME_CASE
5384 }
5385 
5386 /// getConstraintType - Given a constraint letter, return the type of
5387 /// constraint it is for this target.
5388 RISCVTargetLowering::ConstraintType
5389 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
5390   if (Constraint.size() == 1) {
5391     switch (Constraint[0]) {
5392     default:
5393       break;
5394     case 'f':
5395       return C_RegisterClass;
5396     case 'I':
5397     case 'J':
5398     case 'K':
5399       return C_Immediate;
5400     case 'A':
5401       return C_Memory;
5402     }
5403   }
5404   return TargetLowering::getConstraintType(Constraint);
5405 }
5406 
5407 std::pair<unsigned, const TargetRegisterClass *>
5408 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
5409                                                   StringRef Constraint,
5410                                                   MVT VT) const {
5411   // First, see if this is a constraint that directly corresponds to a
5412   // RISCV register class.
5413   if (Constraint.size() == 1) {
5414     switch (Constraint[0]) {
5415     case 'r':
5416       return std::make_pair(0U, &RISCV::GPRRegClass);
5417     case 'f':
5418       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
5419         return std::make_pair(0U, &RISCV::FPR16RegClass);
5420       if (Subtarget.hasStdExtF() && VT == MVT::f32)
5421         return std::make_pair(0U, &RISCV::FPR32RegClass);
5422       if (Subtarget.hasStdExtD() && VT == MVT::f64)
5423         return std::make_pair(0U, &RISCV::FPR64RegClass);
5424       break;
5425     default:
5426       break;
5427     }
5428   }
5429 
5430   // Clang will correctly decode the usage of register name aliases into their
5431   // official names. However, other frontends like `rustc` do not. This allows
5432   // users of these frontends to use the ABI names for registers in LLVM-style
5433   // register constraints.
5434   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
5435                                .Case("{zero}", RISCV::X0)
5436                                .Case("{ra}", RISCV::X1)
5437                                .Case("{sp}", RISCV::X2)
5438                                .Case("{gp}", RISCV::X3)
5439                                .Case("{tp}", RISCV::X4)
5440                                .Case("{t0}", RISCV::X5)
5441                                .Case("{t1}", RISCV::X6)
5442                                .Case("{t2}", RISCV::X7)
5443                                .Cases("{s0}", "{fp}", RISCV::X8)
5444                                .Case("{s1}", RISCV::X9)
5445                                .Case("{a0}", RISCV::X10)
5446                                .Case("{a1}", RISCV::X11)
5447                                .Case("{a2}", RISCV::X12)
5448                                .Case("{a3}", RISCV::X13)
5449                                .Case("{a4}", RISCV::X14)
5450                                .Case("{a5}", RISCV::X15)
5451                                .Case("{a6}", RISCV::X16)
5452                                .Case("{a7}", RISCV::X17)
5453                                .Case("{s2}", RISCV::X18)
5454                                .Case("{s3}", RISCV::X19)
5455                                .Case("{s4}", RISCV::X20)
5456                                .Case("{s5}", RISCV::X21)
5457                                .Case("{s6}", RISCV::X22)
5458                                .Case("{s7}", RISCV::X23)
5459                                .Case("{s8}", RISCV::X24)
5460                                .Case("{s9}", RISCV::X25)
5461                                .Case("{s10}", RISCV::X26)
5462                                .Case("{s11}", RISCV::X27)
5463                                .Case("{t3}", RISCV::X28)
5464                                .Case("{t4}", RISCV::X29)
5465                                .Case("{t5}", RISCV::X30)
5466                                .Case("{t6}", RISCV::X31)
5467                                .Default(RISCV::NoRegister);
5468   if (XRegFromAlias != RISCV::NoRegister)
5469     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
5470 
5471   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
5472   // TableGen record rather than the AsmName to choose registers for InlineAsm
5473   // constraints, plus we want to match those names to the widest floating point
5474   // register type available, manually select floating point registers here.
5475   //
5476   // The second case is the ABI name of the register, so that frontends can also
5477   // use the ABI names in register constraint lists.
5478   if (Subtarget.hasStdExtF()) {
5479     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
5480                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
5481                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
5482                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
5483                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
5484                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
5485                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
5486                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
5487                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
5488                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
5489                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
5490                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
5491                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
5492                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
5493                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
5494                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
5495                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
5496                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
5497                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
5498                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
5499                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
5500                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
5501                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
5502                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
5503                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
5504                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
5505                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
5506                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
5507                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
5508                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
5509                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
5510                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
5511                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
5512                         .Default(RISCV::NoRegister);
5513     if (FReg != RISCV::NoRegister) {
5514       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
5515       if (Subtarget.hasStdExtD()) {
5516         unsigned RegNo = FReg - RISCV::F0_F;
5517         unsigned DReg = RISCV::F0_D + RegNo;
5518         return std::make_pair(DReg, &RISCV::FPR64RegClass);
5519       }
5520       return std::make_pair(FReg, &RISCV::FPR32RegClass);
5521     }
5522   }
5523 
5524   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5525 }
5526 
5527 unsigned
5528 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
5529   // Currently only support length 1 constraints.
5530   if (ConstraintCode.size() == 1) {
5531     switch (ConstraintCode[0]) {
5532     case 'A':
5533       return InlineAsm::Constraint_A;
5534     default:
5535       break;
5536     }
5537   }
5538 
5539   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
5540 }
5541 
5542 void RISCVTargetLowering::LowerAsmOperandForConstraint(
5543     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
5544     SelectionDAG &DAG) const {
5545   // Currently only support length 1 constraints.
5546   if (Constraint.length() == 1) {
5547     switch (Constraint[0]) {
5548     case 'I':
5549       // Validate & create a 12-bit signed immediate operand.
5550       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5551         uint64_t CVal = C->getSExtValue();
5552         if (isInt<12>(CVal))
5553           Ops.push_back(
5554               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
5555       }
5556       return;
5557     case 'J':
5558       // Validate & create an integer zero operand.
5559       if (auto *C = dyn_cast<ConstantSDNode>(Op))
5560         if (C->getZExtValue() == 0)
5561           Ops.push_back(
5562               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
5563       return;
5564     case 'K':
5565       // Validate & create a 5-bit unsigned immediate operand.
5566       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5567         uint64_t CVal = C->getZExtValue();
5568         if (isUInt<5>(CVal))
5569           Ops.push_back(
5570               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
5571       }
5572       return;
5573     default:
5574       break;
5575     }
5576   }
5577   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5578 }
5579 
5580 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
5581                                                    Instruction *Inst,
5582                                                    AtomicOrdering Ord) const {
5583   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
5584     return Builder.CreateFence(Ord);
5585   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
5586     return Builder.CreateFence(AtomicOrdering::Release);
5587   return nullptr;
5588 }
5589 
5590 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
5591                                                     Instruction *Inst,
5592                                                     AtomicOrdering Ord) const {
5593   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
5594     return Builder.CreateFence(AtomicOrdering::Acquire);
5595   return nullptr;
5596 }
5597 
5598 TargetLowering::AtomicExpansionKind
5599 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
5600   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
5601   // point operations can't be used in an lr/sc sequence without breaking the
5602   // forward-progress guarantee.
5603   if (AI->isFloatingPointOperation())
5604     return AtomicExpansionKind::CmpXChg;
5605 
5606   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5607   if (Size == 8 || Size == 16)
5608     return AtomicExpansionKind::MaskedIntrinsic;
5609   return AtomicExpansionKind::None;
5610 }
5611 
5612 static Intrinsic::ID
5613 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
5614   if (XLen == 32) {
5615     switch (BinOp) {
5616     default:
5617       llvm_unreachable("Unexpected AtomicRMW BinOp");
5618     case AtomicRMWInst::Xchg:
5619       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
5620     case AtomicRMWInst::Add:
5621       return Intrinsic::riscv_masked_atomicrmw_add_i32;
5622     case AtomicRMWInst::Sub:
5623       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
5624     case AtomicRMWInst::Nand:
5625       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
5626     case AtomicRMWInst::Max:
5627       return Intrinsic::riscv_masked_atomicrmw_max_i32;
5628     case AtomicRMWInst::Min:
5629       return Intrinsic::riscv_masked_atomicrmw_min_i32;
5630     case AtomicRMWInst::UMax:
5631       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
5632     case AtomicRMWInst::UMin:
5633       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
5634     }
5635   }
5636 
5637   if (XLen == 64) {
5638     switch (BinOp) {
5639     default:
5640       llvm_unreachable("Unexpected AtomicRMW BinOp");
5641     case AtomicRMWInst::Xchg:
5642       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
5643     case AtomicRMWInst::Add:
5644       return Intrinsic::riscv_masked_atomicrmw_add_i64;
5645     case AtomicRMWInst::Sub:
5646       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
5647     case AtomicRMWInst::Nand:
5648       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
5649     case AtomicRMWInst::Max:
5650       return Intrinsic::riscv_masked_atomicrmw_max_i64;
5651     case AtomicRMWInst::Min:
5652       return Intrinsic::riscv_masked_atomicrmw_min_i64;
5653     case AtomicRMWInst::UMax:
5654       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
5655     case AtomicRMWInst::UMin:
5656       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
5657     }
5658   }
5659 
5660   llvm_unreachable("Unexpected XLen\n");
5661 }
5662 
5663 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
5664     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
5665     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
5666   unsigned XLen = Subtarget.getXLen();
5667   Value *Ordering =
5668       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
5669   Type *Tys[] = {AlignedAddr->getType()};
5670   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
5671       AI->getModule(),
5672       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
5673 
5674   if (XLen == 64) {
5675     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
5676     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5677     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
5678   }
5679 
5680   Value *Result;
5681 
5682   // Must pass the shift amount needed to sign extend the loaded value prior
5683   // to performing a signed comparison for min/max. ShiftAmt is the number of
5684   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
5685   // is the number of bits to left+right shift the value in order to
5686   // sign-extend.
5687   if (AI->getOperation() == AtomicRMWInst::Min ||
5688       AI->getOperation() == AtomicRMWInst::Max) {
5689     const DataLayout &DL = AI->getModule()->getDataLayout();
5690     unsigned ValWidth =
5691         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
5692     Value *SextShamt =
5693         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
5694     Result = Builder.CreateCall(LrwOpScwLoop,
5695                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
5696   } else {
5697     Result =
5698         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
5699   }
5700 
5701   if (XLen == 64)
5702     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5703   return Result;
5704 }
5705 
5706 TargetLowering::AtomicExpansionKind
5707 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
5708     AtomicCmpXchgInst *CI) const {
5709   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
5710   if (Size == 8 || Size == 16)
5711     return AtomicExpansionKind::MaskedIntrinsic;
5712   return AtomicExpansionKind::None;
5713 }
5714 
5715 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
5716     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5717     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5718   unsigned XLen = Subtarget.getXLen();
5719   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
5720   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
5721   if (XLen == 64) {
5722     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5723     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5724     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5725     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
5726   }
5727   Type *Tys[] = {AlignedAddr->getType()};
5728   Function *MaskedCmpXchg =
5729       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
5730   Value *Result = Builder.CreateCall(
5731       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
5732   if (XLen == 64)
5733     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5734   return Result;
5735 }
5736 
5737 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
5738                                                      EVT VT) const {
5739   VT = VT.getScalarType();
5740 
5741   if (!VT.isSimple())
5742     return false;
5743 
5744   switch (VT.getSimpleVT().SimpleTy) {
5745   case MVT::f16:
5746     return Subtarget.hasStdExtZfh();
5747   case MVT::f32:
5748     return Subtarget.hasStdExtF();
5749   case MVT::f64:
5750     return Subtarget.hasStdExtD();
5751   default:
5752     break;
5753   }
5754 
5755   return false;
5756 }
5757 
5758 Register RISCVTargetLowering::getExceptionPointerRegister(
5759     const Constant *PersonalityFn) const {
5760   return RISCV::X10;
5761 }
5762 
5763 Register RISCVTargetLowering::getExceptionSelectorRegister(
5764     const Constant *PersonalityFn) const {
5765   return RISCV::X11;
5766 }
5767 
5768 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
5769   // Return false to suppress the unnecessary extensions if the LibCall
5770   // arguments or return value is f32 type for LP64 ABI.
5771   RISCVABI::ABI ABI = Subtarget.getTargetABI();
5772   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
5773     return false;
5774 
5775   return true;
5776 }
5777 
5778 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
5779   if (Subtarget.is64Bit() && Type == MVT::i32)
5780     return true;
5781 
5782   return IsSigned;
5783 }
5784 
5785 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5786                                                  SDValue C) const {
5787   // Check integral scalar types.
5788   if (VT.isScalarInteger()) {
5789     // Omit the optimization if the sub target has the M extension and the data
5790     // size exceeds XLen.
5791     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
5792       return false;
5793     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
5794       // Break the MUL to a SLLI and an ADD/SUB.
5795       const APInt &Imm = ConstNode->getAPIntValue();
5796       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
5797           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
5798         return true;
5799       // Omit the following optimization if the sub target has the M extension
5800       // and the data size >= XLen.
5801       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
5802         return false;
5803       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
5804       // a pair of LUI/ADDI.
5805       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
5806         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
5807         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
5808             (1 - ImmS).isPowerOf2())
5809         return true;
5810       }
5811     }
5812   }
5813 
5814   return false;
5815 }
5816 
5817 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
5818   if (!Subtarget.useRVVForFixedLengthVectors())
5819     return false;
5820 
5821   if (!VT.isFixedLengthVector())
5822     return false;
5823 
5824   // Don't use RVV for vectors we cannot scalarize if required.
5825   switch (VT.getVectorElementType().SimpleTy) {
5826   // i1 is supported but has different rules.
5827   default:
5828     return false;
5829   case MVT::i1:
5830     // Masks can only use a single register.
5831     if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits())
5832       return false;
5833     break;
5834   case MVT::i8:
5835   case MVT::i16:
5836   case MVT::i32:
5837   case MVT::i64:
5838     break;
5839   case MVT::f16:
5840     if (!Subtarget.hasStdExtZfh())
5841       return false;
5842     break;
5843   case MVT::f32:
5844     if (!Subtarget.hasStdExtF())
5845       return false;
5846     break;
5847   case MVT::f64:
5848     if (!Subtarget.hasStdExtD())
5849       return false;
5850     break;
5851   }
5852 
5853   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
5854   // Don't use RVV for types that don't fit.
5855   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
5856     return false;
5857 
5858   // TODO: Perhaps an artificial restriction, but worth having whilst getting
5859   // the base fixed length RVV support in place.
5860   if (!VT.isPow2VectorType())
5861     return false;
5862 
5863   return true;
5864 }
5865 
5866 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
5867     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
5868     bool *Fast) const {
5869   if (!VT.isScalableVector())
5870     return false;
5871 
5872   EVT ElemVT = VT.getVectorElementType();
5873   if (Alignment >= ElemVT.getStoreSize()) {
5874     if (Fast)
5875       *Fast = true;
5876     return true;
5877   }
5878 
5879   return false;
5880 }
5881 
5882 #define GET_REGISTER_MATCHER
5883 #include "RISCVGenAsmMatcher.inc"
5884 
5885 Register
5886 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
5887                                        const MachineFunction &MF) const {
5888   Register Reg = MatchRegisterAltName(RegName);
5889   if (Reg == RISCV::NoRegister)
5890     Reg = MatchRegisterName(RegName);
5891   if (Reg == RISCV::NoRegister)
5892     report_fatal_error(
5893         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
5894   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
5895   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
5896     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
5897                              StringRef(RegName) + "\"."));
5898   return Reg;
5899 }
5900 
5901 namespace llvm {
5902 namespace RISCVVIntrinsicsTable {
5903 
5904 #define GET_RISCVVIntrinsicsTable_IMPL
5905 #include "RISCVGenSearchableTables.inc"
5906 
5907 } // namespace RISCVVIntrinsicsTable
5908 
5909 } // namespace llvm
5910