1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtZfh())
87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88   if (Subtarget.hasStdExtF())
89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90   if (Subtarget.hasStdExtD())
91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93   static const MVT::SimpleValueType BoolVecVTs[] = {
94       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
95       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
96   static const MVT::SimpleValueType IntVecVTs[] = {
97       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
98       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
99       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
100       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
101       MVT::nxv4i64, MVT::nxv8i64};
102   static const MVT::SimpleValueType F16VecVTs[] = {
103       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
104       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
105   static const MVT::SimpleValueType F32VecVTs[] = {
106       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
107   static const MVT::SimpleValueType F64VecVTs[] = {
108       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
109 
110   if (Subtarget.hasStdExtV()) {
111     auto addRegClassForRVV = [this](MVT VT) {
112       unsigned Size = VT.getSizeInBits().getKnownMinValue();
113       assert(Size <= 512 && isPowerOf2_32(Size));
114       const TargetRegisterClass *RC;
115       if (Size <= 64)
116         RC = &RISCV::VRRegClass;
117       else if (Size == 128)
118         RC = &RISCV::VRM2RegClass;
119       else if (Size == 256)
120         RC = &RISCV::VRM4RegClass;
121       else
122         RC = &RISCV::VRM8RegClass;
123 
124       addRegisterClass(VT, RC);
125     };
126 
127     for (MVT VT : BoolVecVTs)
128       addRegClassForRVV(VT);
129     for (MVT VT : IntVecVTs)
130       addRegClassForRVV(VT);
131 
132     if (Subtarget.hasStdExtZfh())
133       for (MVT VT : F16VecVTs)
134         addRegClassForRVV(VT);
135 
136     if (Subtarget.hasStdExtF())
137       for (MVT VT : F32VecVTs)
138         addRegClassForRVV(VT);
139 
140     if (Subtarget.hasStdExtD())
141       for (MVT VT : F64VecVTs)
142         addRegClassForRVV(VT);
143 
144     if (Subtarget.useRVVForFixedLengthVectors()) {
145       auto addRegClassForFixedVectors = [this](MVT VT) {
146         unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
147         const TargetRegisterClass *RC;
148         if (LMul == 1 || VT.getVectorElementType() == MVT::i1)
149           RC = &RISCV::VRRegClass;
150         else if (LMul == 2)
151           RC = &RISCV::VRM2RegClass;
152         else if (LMul == 4)
153           RC = &RISCV::VRM4RegClass;
154         else if (LMul == 8)
155           RC = &RISCV::VRM8RegClass;
156         else
157           llvm_unreachable("Unexpected LMul!");
158 
159         addRegisterClass(VT, RC);
160       };
161       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
162         if (useRVVForFixedLengthVectorVT(VT))
163           addRegClassForFixedVectors(VT);
164 
165       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
166         if (useRVVForFixedLengthVectorVT(VT))
167           addRegClassForFixedVectors(VT);
168     }
169   }
170 
171   // Compute derived properties from the register classes.
172   computeRegisterProperties(STI.getRegisterInfo());
173 
174   setStackPointerRegisterToSaveRestore(RISCV::X2);
175 
176   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
177     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
178 
179   // TODO: add all necessary setOperationAction calls.
180   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
181 
182   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
183   setOperationAction(ISD::BR_CC, XLenVT, Expand);
184   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
185   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
186 
187   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
188   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
189 
190   setOperationAction(ISD::VASTART, MVT::Other, Custom);
191   setOperationAction(ISD::VAARG, MVT::Other, Expand);
192   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
193   setOperationAction(ISD::VAEND, MVT::Other, Expand);
194 
195   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
196   if (!Subtarget.hasStdExtZbb()) {
197     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
198     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
199   }
200 
201   if (Subtarget.is64Bit()) {
202     setOperationAction(ISD::ADD, MVT::i32, Custom);
203     setOperationAction(ISD::SUB, MVT::i32, Custom);
204     setOperationAction(ISD::SHL, MVT::i32, Custom);
205     setOperationAction(ISD::SRA, MVT::i32, Custom);
206     setOperationAction(ISD::SRL, MVT::i32, Custom);
207 
208     setOperationAction(ISD::UADDO, MVT::i32, Custom);
209     setOperationAction(ISD::USUBO, MVT::i32, Custom);
210   }
211 
212   if (!Subtarget.hasStdExtM()) {
213     setOperationAction(ISD::MUL, XLenVT, Expand);
214     setOperationAction(ISD::MULHS, XLenVT, Expand);
215     setOperationAction(ISD::MULHU, XLenVT, Expand);
216     setOperationAction(ISD::SDIV, XLenVT, Expand);
217     setOperationAction(ISD::UDIV, XLenVT, Expand);
218     setOperationAction(ISD::SREM, XLenVT, Expand);
219     setOperationAction(ISD::UREM, XLenVT, Expand);
220   }
221 
222   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
223     setOperationAction(ISD::MUL, MVT::i32, Custom);
224 
225     setOperationAction(ISD::SDIV, MVT::i8, Custom);
226     setOperationAction(ISD::UDIV, MVT::i8, Custom);
227     setOperationAction(ISD::UREM, MVT::i8, Custom);
228     setOperationAction(ISD::SDIV, MVT::i16, Custom);
229     setOperationAction(ISD::UDIV, MVT::i16, Custom);
230     setOperationAction(ISD::UREM, MVT::i16, Custom);
231     setOperationAction(ISD::SDIV, MVT::i32, Custom);
232     setOperationAction(ISD::UDIV, MVT::i32, Custom);
233     setOperationAction(ISD::UREM, MVT::i32, Custom);
234   }
235 
236   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
237   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
238   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
239   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
240 
241   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
242   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
243   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
244 
245   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
246     if (Subtarget.is64Bit()) {
247       setOperationAction(ISD::ROTL, MVT::i32, Custom);
248       setOperationAction(ISD::ROTR, MVT::i32, Custom);
249     }
250   } else {
251     setOperationAction(ISD::ROTL, XLenVT, Expand);
252     setOperationAction(ISD::ROTR, XLenVT, Expand);
253   }
254 
255   if (Subtarget.hasStdExtZbp()) {
256     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
257     // more combining.
258     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
259     setOperationAction(ISD::BSWAP, XLenVT, Custom);
260 
261     if (Subtarget.is64Bit()) {
262       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
263       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
264     }
265   } else {
266     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
267     // pattern match it directly in isel.
268     setOperationAction(ISD::BSWAP, XLenVT,
269                        Subtarget.hasStdExtZbb() ? Legal : Expand);
270   }
271 
272   if (Subtarget.hasStdExtZbb()) {
273     setOperationAction(ISD::SMIN, XLenVT, Legal);
274     setOperationAction(ISD::SMAX, XLenVT, Legal);
275     setOperationAction(ISD::UMIN, XLenVT, Legal);
276     setOperationAction(ISD::UMAX, XLenVT, Legal);
277   } else {
278     setOperationAction(ISD::CTTZ, XLenVT, Expand);
279     setOperationAction(ISD::CTLZ, XLenVT, Expand);
280     setOperationAction(ISD::CTPOP, XLenVT, Expand);
281   }
282 
283   if (Subtarget.hasStdExtZbt()) {
284     setOperationAction(ISD::FSHL, XLenVT, Custom);
285     setOperationAction(ISD::FSHR, XLenVT, Custom);
286     setOperationAction(ISD::SELECT, XLenVT, Legal);
287 
288     if (Subtarget.is64Bit()) {
289       setOperationAction(ISD::FSHL, MVT::i32, Custom);
290       setOperationAction(ISD::FSHR, MVT::i32, Custom);
291     }
292   } else {
293     setOperationAction(ISD::SELECT, XLenVT, Custom);
294   }
295 
296   ISD::CondCode FPCCToExpand[] = {
297       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
298       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
299       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
300 
301   ISD::NodeType FPOpToExpand[] = {
302       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
303       ISD::FP_TO_FP16};
304 
305   if (Subtarget.hasStdExtZfh())
306     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
307 
308   if (Subtarget.hasStdExtZfh()) {
309     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
310     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
311     for (auto CC : FPCCToExpand)
312       setCondCodeAction(CC, MVT::f16, Expand);
313     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
314     setOperationAction(ISD::SELECT, MVT::f16, Custom);
315     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
316     for (auto Op : FPOpToExpand)
317       setOperationAction(Op, MVT::f16, Expand);
318   }
319 
320   if (Subtarget.hasStdExtF()) {
321     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
322     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
323     for (auto CC : FPCCToExpand)
324       setCondCodeAction(CC, MVT::f32, Expand);
325     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
326     setOperationAction(ISD::SELECT, MVT::f32, Custom);
327     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
328     for (auto Op : FPOpToExpand)
329       setOperationAction(Op, MVT::f32, Expand);
330     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
331     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
332   }
333 
334   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
335     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
336 
337   if (Subtarget.hasStdExtD()) {
338     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
339     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
340     for (auto CC : FPCCToExpand)
341       setCondCodeAction(CC, MVT::f64, Expand);
342     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
343     setOperationAction(ISD::SELECT, MVT::f64, Custom);
344     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
345     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
346     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
347     for (auto Op : FPOpToExpand)
348       setOperationAction(Op, MVT::f64, Expand);
349     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
350     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
351   }
352 
353   if (Subtarget.is64Bit()) {
354     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
355     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
356     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
357     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
358   }
359 
360   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
361   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
362   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
363   setOperationAction(ISD::JumpTable, XLenVT, Custom);
364 
365   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
366 
367   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
368   // Unfortunately this can't be determined just from the ISA naming string.
369   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
370                      Subtarget.is64Bit() ? Legal : Custom);
371 
372   setOperationAction(ISD::TRAP, MVT::Other, Legal);
373   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
374   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
375 
376   if (Subtarget.hasStdExtA()) {
377     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
378     setMinCmpXchgSizeInBits(32);
379   } else {
380     setMaxAtomicSizeInBitsSupported(0);
381   }
382 
383   setBooleanContents(ZeroOrOneBooleanContent);
384 
385   if (Subtarget.hasStdExtV()) {
386     setBooleanVectorContents(ZeroOrOneBooleanContent);
387 
388     setOperationAction(ISD::VSCALE, XLenVT, Custom);
389 
390     // RVV intrinsics may have illegal operands.
391     // We also need to custom legalize vmv.x.s.
392     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
393     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
394     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
395     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
396     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
397     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
398     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
399     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
400 
401     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
402 
403     if (!Subtarget.is64Bit()) {
404       // We must custom-lower certain vXi64 operations on RV32 due to the vector
405       // element type being illegal.
406       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
407       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
408 
409       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
410       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
411       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
412       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
413       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
414       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
415       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
416       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
417     }
418 
419     for (MVT VT : BoolVecVTs) {
420       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
421 
422       // Mask VTs are custom-expanded into a series of standard nodes
423       setOperationAction(ISD::TRUNCATE, VT, Custom);
424       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
425       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
426 
427       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
428     }
429 
430     for (MVT VT : IntVecVTs) {
431       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
432       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
433 
434       setOperationAction(ISD::SMIN, VT, Legal);
435       setOperationAction(ISD::SMAX, VT, Legal);
436       setOperationAction(ISD::UMIN, VT, Legal);
437       setOperationAction(ISD::UMAX, VT, Legal);
438 
439       setOperationAction(ISD::ROTL, VT, Expand);
440       setOperationAction(ISD::ROTR, VT, Expand);
441 
442       // Custom-lower extensions and truncations from/to mask types.
443       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
444       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
445       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
446 
447       // RVV has native int->float & float->int conversions where the
448       // element type sizes are within one power-of-two of each other. Any
449       // wider distances between type sizes have to be lowered as sequences
450       // which progressively narrow the gap in stages.
451       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
452       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
453       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
454       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
455 
456       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
457       // nodes which truncate by one power of two at a time.
458       setOperationAction(ISD::TRUNCATE, VT, Custom);
459 
460       // Custom-lower insert/extract operations to simplify patterns.
461       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
462       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
463 
464       // Custom-lower reduction operations to set up the corresponding custom
465       // nodes' operands.
466       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
467       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
468       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
469       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
470       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
471       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
472       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
473       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
474 
475       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
476       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
477 
478       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
479     }
480 
481     // Expand various CCs to best match the RVV ISA, which natively supports UNE
482     // but no other unordered comparisons, and supports all ordered comparisons
483     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
484     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
485     // and we pattern-match those back to the "original", swapping operands once
486     // more. This way we catch both operations and both "vf" and "fv" forms with
487     // fewer patterns.
488     ISD::CondCode VFPCCToExpand[] = {
489         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
490         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
491         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
492     };
493 
494     // Sets common operation actions on RVV floating-point vector types.
495     const auto SetCommonVFPActions = [&](MVT VT) {
496       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
497       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
498       // sizes are within one power-of-two of each other. Therefore conversions
499       // between vXf16 and vXf64 must be lowered as sequences which convert via
500       // vXf32.
501       setOperationAction(ISD::FP_ROUND, VT, Custom);
502       setOperationAction(ISD::FP_EXTEND, VT, Custom);
503       // Custom-lower insert/extract operations to simplify patterns.
504       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
505       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
506       // Expand various condition codes (explained above).
507       for (auto CC : VFPCCToExpand)
508         setCondCodeAction(CC, VT, Expand);
509 
510       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
511       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
512       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
513 
514       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
515       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
516 
517       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
518     };
519 
520     if (Subtarget.hasStdExtZfh())
521       for (MVT VT : F16VecVTs)
522         SetCommonVFPActions(VT);
523 
524     if (Subtarget.hasStdExtF())
525       for (MVT VT : F32VecVTs)
526         SetCommonVFPActions(VT);
527 
528     if (Subtarget.hasStdExtD())
529       for (MVT VT : F64VecVTs)
530         SetCommonVFPActions(VT);
531 
532     if (Subtarget.useRVVForFixedLengthVectors()) {
533       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
534         if (!useRVVForFixedLengthVectorVT(VT))
535           continue;
536 
537         // By default everything must be expanded.
538         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
539           setOperationAction(Op, VT, Expand);
540         for (MVT OtherVT : MVT::fixedlen_vector_valuetypes())
541           setTruncStoreAction(VT, OtherVT, Expand);
542 
543         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
544         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
545         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
546 
547         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
548         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
549 
550         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
551 
552         setOperationAction(ISD::LOAD, VT, Custom);
553         setOperationAction(ISD::STORE, VT, Custom);
554 
555         setOperationAction(ISD::SETCC, VT, Custom);
556 
557         setOperationAction(ISD::TRUNCATE, VT, Custom);
558 
559         // Operations below are different for between masks and other vectors.
560         if (VT.getVectorElementType() == MVT::i1) {
561           setOperationAction(ISD::AND, VT, Custom);
562           setOperationAction(ISD::OR, VT, Custom);
563           setOperationAction(ISD::XOR, VT, Custom);
564           continue;
565         }
566 
567         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
568         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
569 
570         setOperationAction(ISD::ADD, VT, Custom);
571         setOperationAction(ISD::MUL, VT, Custom);
572         setOperationAction(ISD::SUB, VT, Custom);
573         setOperationAction(ISD::AND, VT, Custom);
574         setOperationAction(ISD::OR, VT, Custom);
575         setOperationAction(ISD::XOR, VT, Custom);
576         setOperationAction(ISD::SDIV, VT, Custom);
577         setOperationAction(ISD::SREM, VT, Custom);
578         setOperationAction(ISD::UDIV, VT, Custom);
579         setOperationAction(ISD::UREM, VT, Custom);
580         setOperationAction(ISD::SHL, VT, Custom);
581         setOperationAction(ISD::SRA, VT, Custom);
582         setOperationAction(ISD::SRL, VT, Custom);
583 
584         setOperationAction(ISD::SMIN, VT, Custom);
585         setOperationAction(ISD::SMAX, VT, Custom);
586         setOperationAction(ISD::UMIN, VT, Custom);
587         setOperationAction(ISD::UMAX, VT, Custom);
588         setOperationAction(ISD::ABS,  VT, Custom);
589 
590         setOperationAction(ISD::MULHS, VT, Custom);
591         setOperationAction(ISD::MULHU, VT, Custom);
592 
593         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
594         setOperationAction(ISD::UINT_TO_FP, VT, Custom);
595         setOperationAction(ISD::FP_TO_SINT, VT, Custom);
596         setOperationAction(ISD::FP_TO_UINT, VT, Custom);
597 
598         setOperationAction(ISD::VSELECT, VT, Custom);
599 
600         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
601         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
602         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
603 
604         setOperationAction(ISD::BITCAST, VT, Custom);
605 
606         // Custom-lower reduction operations to set up the corresponding custom
607         // nodes' operands.
608         setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
609         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
610         setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
611         setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
612         setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
613         setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
614         setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
615         setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
616       }
617 
618       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
619         if (!useRVVForFixedLengthVectorVT(VT))
620           continue;
621 
622         // By default everything must be expanded.
623         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
624           setOperationAction(Op, VT, Expand);
625         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
626           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
627           setTruncStoreAction(VT, OtherVT, Expand);
628         }
629 
630         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
631         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
632         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
633 
634         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
635         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
636         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
637         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
638 
639         setOperationAction(ISD::LOAD, VT, Custom);
640         setOperationAction(ISD::STORE, VT, Custom);
641         setOperationAction(ISD::FADD, VT, Custom);
642         setOperationAction(ISD::FSUB, VT, Custom);
643         setOperationAction(ISD::FMUL, VT, Custom);
644         setOperationAction(ISD::FDIV, VT, Custom);
645         setOperationAction(ISD::FNEG, VT, Custom);
646         setOperationAction(ISD::FABS, VT, Custom);
647         setOperationAction(ISD::FCOPYSIGN, VT, Custom);
648         setOperationAction(ISD::FSQRT, VT, Custom);
649         setOperationAction(ISD::FMA, VT, Custom);
650 
651         setOperationAction(ISD::FP_ROUND, VT, Custom);
652         setOperationAction(ISD::FP_EXTEND, VT, Custom);
653 
654         for (auto CC : VFPCCToExpand)
655           setCondCodeAction(CC, VT, Expand);
656 
657         setOperationAction(ISD::VSELECT, VT, Custom);
658 
659         setOperationAction(ISD::BITCAST, VT, Custom);
660 
661         setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
662         setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
663       }
664     }
665   }
666 
667   // Function alignments.
668   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
669   setMinFunctionAlignment(FunctionAlignment);
670   setPrefFunctionAlignment(FunctionAlignment);
671 
672   setMinimumJumpTableEntries(5);
673 
674   // Jumps are expensive, compared to logic
675   setJumpIsExpensive();
676 
677   // We can use any register for comparisons
678   setHasMultipleConditionRegisters();
679 
680   if (Subtarget.hasStdExtZbp()) {
681     setTargetDAGCombine(ISD::OR);
682   }
683   if (Subtarget.hasStdExtV())
684     setTargetDAGCombine(ISD::FCOPYSIGN);
685 }
686 
687 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
688                                             LLVMContext &Context,
689                                             EVT VT) const {
690   if (!VT.isVector())
691     return getPointerTy(DL);
692   if (Subtarget.hasStdExtV() &&
693       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
694     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
695   return VT.changeVectorElementTypeToInteger();
696 }
697 
698 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
699                                              const CallInst &I,
700                                              MachineFunction &MF,
701                                              unsigned Intrinsic) const {
702   switch (Intrinsic) {
703   default:
704     return false;
705   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
706   case Intrinsic::riscv_masked_atomicrmw_add_i32:
707   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
708   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
709   case Intrinsic::riscv_masked_atomicrmw_max_i32:
710   case Intrinsic::riscv_masked_atomicrmw_min_i32:
711   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
712   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
713   case Intrinsic::riscv_masked_cmpxchg_i32:
714     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
715     Info.opc = ISD::INTRINSIC_W_CHAIN;
716     Info.memVT = MVT::getVT(PtrTy->getElementType());
717     Info.ptrVal = I.getArgOperand(0);
718     Info.offset = 0;
719     Info.align = Align(4);
720     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
721                  MachineMemOperand::MOVolatile;
722     return true;
723   }
724 }
725 
726 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
727                                                 const AddrMode &AM, Type *Ty,
728                                                 unsigned AS,
729                                                 Instruction *I) const {
730   // No global is ever allowed as a base.
731   if (AM.BaseGV)
732     return false;
733 
734   // Require a 12-bit signed offset.
735   if (!isInt<12>(AM.BaseOffs))
736     return false;
737 
738   switch (AM.Scale) {
739   case 0: // "r+i" or just "i", depending on HasBaseReg.
740     break;
741   case 1:
742     if (!AM.HasBaseReg) // allow "r+i".
743       break;
744     return false; // disallow "r+r" or "r+r+i".
745   default:
746     return false;
747   }
748 
749   return true;
750 }
751 
752 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
753   return isInt<12>(Imm);
754 }
755 
756 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
757   return isInt<12>(Imm);
758 }
759 
760 // On RV32, 64-bit integers are split into their high and low parts and held
761 // in two different registers, so the trunc is free since the low register can
762 // just be used.
763 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
764   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
765     return false;
766   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
767   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
768   return (SrcBits == 64 && DestBits == 32);
769 }
770 
771 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
772   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
773       !SrcVT.isInteger() || !DstVT.isInteger())
774     return false;
775   unsigned SrcBits = SrcVT.getSizeInBits();
776   unsigned DestBits = DstVT.getSizeInBits();
777   return (SrcBits == 64 && DestBits == 32);
778 }
779 
780 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
781   // Zexts are free if they can be combined with a load.
782   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
783     EVT MemVT = LD->getMemoryVT();
784     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
785          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
786         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
787          LD->getExtensionType() == ISD::ZEXTLOAD))
788       return true;
789   }
790 
791   return TargetLowering::isZExtFree(Val, VT2);
792 }
793 
794 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
795   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
796 }
797 
798 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
799   return Subtarget.hasStdExtZbb();
800 }
801 
802 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
803   return Subtarget.hasStdExtZbb();
804 }
805 
806 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
807                                        bool ForCodeSize) const {
808   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
809     return false;
810   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
811     return false;
812   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
813     return false;
814   if (Imm.isNegZero())
815     return false;
816   return Imm.isZero();
817 }
818 
819 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
820   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
821          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
822          (VT == MVT::f64 && Subtarget.hasStdExtD());
823 }
824 
825 // Changes the condition code and swaps operands if necessary, so the SetCC
826 // operation matches one of the comparisons supported directly by branches
827 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
828 // with 1/-1.
829 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
830                                     ISD::CondCode &CC, SelectionDAG &DAG) {
831   // Convert X > -1 to X >= 0.
832   if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
833     RHS = DAG.getConstant(0, DL, RHS.getValueType());
834     CC = ISD::SETGE;
835     return;
836   }
837   // Convert X < 1 to 0 >= X.
838   if (CC == ISD::SETLT && isOneConstant(RHS)) {
839     RHS = LHS;
840     LHS = DAG.getConstant(0, DL, RHS.getValueType());
841     CC = ISD::SETGE;
842     return;
843   }
844 
845   switch (CC) {
846   default:
847     break;
848   case ISD::SETGT:
849   case ISD::SETLE:
850   case ISD::SETUGT:
851   case ISD::SETULE:
852     CC = ISD::getSetCCSwappedOperands(CC);
853     std::swap(LHS, RHS);
854     break;
855   }
856 }
857 
858 // Return the RISC-V branch opcode that matches the given DAG integer
859 // condition code. The CondCode must be one of those supported by the RISC-V
860 // ISA (see translateSetCCForBranch).
861 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
862   switch (CC) {
863   default:
864     llvm_unreachable("Unsupported CondCode");
865   case ISD::SETEQ:
866     return RISCV::BEQ;
867   case ISD::SETNE:
868     return RISCV::BNE;
869   case ISD::SETLT:
870     return RISCV::BLT;
871   case ISD::SETGE:
872     return RISCV::BGE;
873   case ISD::SETULT:
874     return RISCV::BLTU;
875   case ISD::SETUGE:
876     return RISCV::BGEU;
877   }
878 }
879 
880 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) {
881   assert(VT.isScalableVector() && "Expecting a scalable vector type");
882   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
883   if (VT.getVectorElementType() == MVT::i1)
884     KnownSize *= 8;
885 
886   switch (KnownSize) {
887   default:
888     llvm_unreachable("Invalid LMUL.");
889   case 8:
890     return RISCVVLMUL::LMUL_F8;
891   case 16:
892     return RISCVVLMUL::LMUL_F4;
893   case 32:
894     return RISCVVLMUL::LMUL_F2;
895   case 64:
896     return RISCVVLMUL::LMUL_1;
897   case 128:
898     return RISCVVLMUL::LMUL_2;
899   case 256:
900     return RISCVVLMUL::LMUL_4;
901   case 512:
902     return RISCVVLMUL::LMUL_8;
903   }
904 }
905 
906 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) {
907   switch (LMul) {
908   default:
909     llvm_unreachable("Invalid LMUL.");
910   case RISCVVLMUL::LMUL_F8:
911   case RISCVVLMUL::LMUL_F4:
912   case RISCVVLMUL::LMUL_F2:
913   case RISCVVLMUL::LMUL_1:
914     return RISCV::VRRegClassID;
915   case RISCVVLMUL::LMUL_2:
916     return RISCV::VRM2RegClassID;
917   case RISCVVLMUL::LMUL_4:
918     return RISCV::VRM4RegClassID;
919   case RISCVVLMUL::LMUL_8:
920     return RISCV::VRM8RegClassID;
921   }
922 }
923 
924 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
925   RISCVVLMUL LMUL = getLMUL(VT);
926   if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 ||
927       LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) {
928     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
929                   "Unexpected subreg numbering");
930     return RISCV::sub_vrm1_0 + Index;
931   }
932   if (LMUL == RISCVVLMUL::LMUL_2) {
933     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
934                   "Unexpected subreg numbering");
935     return RISCV::sub_vrm2_0 + Index;
936   }
937   if (LMUL == RISCVVLMUL::LMUL_4) {
938     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
939                   "Unexpected subreg numbering");
940     return RISCV::sub_vrm4_0 + Index;
941   }
942   llvm_unreachable("Invalid vector type.");
943 }
944 
945 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
946   if (VT.getVectorElementType() == MVT::i1)
947     return RISCV::VRRegClassID;
948   return getRegClassIDForLMUL(getLMUL(VT));
949 }
950 
951 // Attempt to decompose a subvector insert/extract between VecVT and
952 // SubVecVT via subregister indices. Returns the subregister index that
953 // can perform the subvector insert/extract with the given element index, as
954 // well as the index corresponding to any leftover subvectors that must be
955 // further inserted/extracted within the register class for SubVecVT.
956 std::pair<unsigned, unsigned>
957 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
958     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
959     const RISCVRegisterInfo *TRI) {
960   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
961                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
962                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
963                 "Register classes not ordered");
964   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
965   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
966   // Try to compose a subregister index that takes us from the incoming
967   // LMUL>1 register class down to the outgoing one. At each step we half
968   // the LMUL:
969   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
970   // Note that this is not guaranteed to find a subregister index, such as
971   // when we are extracting from one VR type to another.
972   unsigned SubRegIdx = RISCV::NoSubRegister;
973   for (const unsigned RCID :
974        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
975     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
976       VecVT = VecVT.getHalfNumVectorElementsVT();
977       bool IsHi =
978           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
979       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
980                                             getSubregIndexByMVT(VecVT, IsHi));
981       if (IsHi)
982         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
983     }
984   return {SubRegIdx, InsertExtractIdx};
985 }
986 
987 // Return the largest legal scalable vector type that matches VT's element type.
988 MVT RISCVTargetLowering::getContainerForFixedLengthVector(
989     const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) {
990   assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) &&
991          "Expected legal fixed length vector!");
992 
993   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
994   assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
995 
996   MVT EltVT = VT.getVectorElementType();
997   switch (EltVT.SimpleTy) {
998   default:
999     llvm_unreachable("unexpected element type for RVV container");
1000   case MVT::i1: {
1001     // Masks are calculated assuming 8-bit elements since that's when we need
1002     // the most elements.
1003     unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8;
1004     return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock);
1005   }
1006   case MVT::i8:
1007   case MVT::i16:
1008   case MVT::i32:
1009   case MVT::i64:
1010   case MVT::f16:
1011   case MVT::f32:
1012   case MVT::f64: {
1013     unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits();
1014     return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock);
1015   }
1016   }
1017 }
1018 
1019 MVT RISCVTargetLowering::getContainerForFixedLengthVector(
1020     SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) {
1021   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1022                                           Subtarget);
1023 }
1024 
1025 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1026   return getContainerForFixedLengthVector(*this, VT, getSubtarget());
1027 }
1028 
1029 // Grow V to consume an entire RVV register.
1030 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1031                                        const RISCVSubtarget &Subtarget) {
1032   assert(VT.isScalableVector() &&
1033          "Expected to convert into a scalable vector!");
1034   assert(V.getValueType().isFixedLengthVector() &&
1035          "Expected a fixed length vector operand!");
1036   SDLoc DL(V);
1037   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1038   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1039 }
1040 
1041 // Shrink V so it's just big enough to maintain a VT's worth of data.
1042 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1043                                          const RISCVSubtarget &Subtarget) {
1044   assert(VT.isFixedLengthVector() &&
1045          "Expected to convert into a fixed length vector!");
1046   assert(V.getValueType().isScalableVector() &&
1047          "Expected a scalable vector operand!");
1048   SDLoc DL(V);
1049   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1050   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1051 }
1052 
1053 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1054 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1055 // the vector type that it is contained in.
1056 static std::pair<SDValue, SDValue>
1057 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1058                 const RISCVSubtarget &Subtarget) {
1059   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1060   MVT XLenVT = Subtarget.getXLenVT();
1061   SDValue VL = VecVT.isFixedLengthVector()
1062                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1063                    : DAG.getRegister(RISCV::X0, XLenVT);
1064   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1065   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1066   return {Mask, VL};
1067 }
1068 
1069 // As above but assuming the given type is a scalable vector type.
1070 static std::pair<SDValue, SDValue>
1071 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1072                         const RISCVSubtarget &Subtarget) {
1073   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1074   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1075 }
1076 
1077 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1078 // of either is (currently) supported. This can get us into an infinite loop
1079 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1080 // as a ..., etc.
1081 // Until either (or both) of these can reliably lower any node, reporting that
1082 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1083 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1084 // which is not desirable.
1085 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1086     EVT VT, unsigned DefinedValues) const {
1087   return false;
1088 }
1089 
1090 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1091   // Only splats are currently supported.
1092   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1093     return true;
1094 
1095   return false;
1096 }
1097 
1098 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
1099                                  const RISCVSubtarget &Subtarget) {
1100   MVT VT = Op.getSimpleValueType();
1101   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1102 
1103   MVT ContainerVT =
1104       RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget);
1105 
1106   SDLoc DL(Op);
1107   SDValue Mask, VL;
1108   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1109 
1110   if (VT.getVectorElementType() == MVT::i1) {
1111     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1112       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1113       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1114     }
1115 
1116     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1117       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1118       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1119     }
1120 
1121     return SDValue();
1122   }
1123 
1124   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1125     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1126                                         : RISCVISD::VMV_V_X_VL;
1127     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1128     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1129   }
1130 
1131   // Try and match an index sequence, which we can lower directly to the vid
1132   // instruction. An all-undef vector is matched by getSplatValue, above.
1133   if (VT.isInteger()) {
1134     bool IsVID = true;
1135     for (unsigned i = 0, e = Op.getNumOperands(); i < e && IsVID; i++)
1136       IsVID &= Op.getOperand(i).isUndef() ||
1137                (isa<ConstantSDNode>(Op.getOperand(i)) &&
1138                 Op.getConstantOperandVal(i) == i);
1139 
1140     if (IsVID) {
1141       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1142       return convertFromScalableVector(VT, VID, DAG, Subtarget);
1143     }
1144   }
1145 
1146   return SDValue();
1147 }
1148 
1149 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
1150                                    const RISCVSubtarget &Subtarget) {
1151   SDValue V1 = Op.getOperand(0);
1152   SDLoc DL(Op);
1153   MVT VT = Op.getSimpleValueType();
1154   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
1155 
1156   if (SVN->isSplat()) {
1157     int Lane = SVN->getSplatIndex();
1158     if (Lane >= 0) {
1159       MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector(
1160           DAG, VT, Subtarget);
1161 
1162       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
1163       assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!");
1164 
1165       SDValue Mask, VL;
1166       std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1167       MVT XLenVT = Subtarget.getXLenVT();
1168       SDValue Gather =
1169           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
1170                       DAG.getConstant(Lane, DL, XLenVT), Mask, VL);
1171       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1172     }
1173   }
1174 
1175   return SDValue();
1176 }
1177 
1178 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
1179                                      SDLoc DL, SelectionDAG &DAG,
1180                                      const RISCVSubtarget &Subtarget) {
1181   if (VT.isScalableVector())
1182     return DAG.getFPExtendOrRound(Op, DL, VT);
1183   assert(VT.isFixedLengthVector() &&
1184          "Unexpected value type for RVV FP extend/round lowering");
1185   SDValue Mask, VL;
1186   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1187   unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
1188                         ? RISCVISD::FP_EXTEND_VL
1189                         : RISCVISD::FP_ROUND_VL;
1190   return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
1191 }
1192 
1193 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
1194                                             SelectionDAG &DAG) const {
1195   switch (Op.getOpcode()) {
1196   default:
1197     report_fatal_error("unimplemented operand");
1198   case ISD::GlobalAddress:
1199     return lowerGlobalAddress(Op, DAG);
1200   case ISD::BlockAddress:
1201     return lowerBlockAddress(Op, DAG);
1202   case ISD::ConstantPool:
1203     return lowerConstantPool(Op, DAG);
1204   case ISD::JumpTable:
1205     return lowerJumpTable(Op, DAG);
1206   case ISD::GlobalTLSAddress:
1207     return lowerGlobalTLSAddress(Op, DAG);
1208   case ISD::SELECT:
1209     return lowerSELECT(Op, DAG);
1210   case ISD::BRCOND:
1211     return lowerBRCOND(Op, DAG);
1212   case ISD::VASTART:
1213     return lowerVASTART(Op, DAG);
1214   case ISD::FRAMEADDR:
1215     return lowerFRAMEADDR(Op, DAG);
1216   case ISD::RETURNADDR:
1217     return lowerRETURNADDR(Op, DAG);
1218   case ISD::SHL_PARTS:
1219     return lowerShiftLeftParts(Op, DAG);
1220   case ISD::SRA_PARTS:
1221     return lowerShiftRightParts(Op, DAG, true);
1222   case ISD::SRL_PARTS:
1223     return lowerShiftRightParts(Op, DAG, false);
1224   case ISD::BITCAST: {
1225     SDValue Op0 = Op.getOperand(0);
1226     // We can handle fixed length vector bitcasts with a simple replacement
1227     // in isel.
1228     if (Op.getValueType().isFixedLengthVector()) {
1229       if (Op0.getValueType().isFixedLengthVector())
1230         return Op;
1231       return SDValue();
1232     }
1233     assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
1234             Subtarget.hasStdExtZfh()) &&
1235            "Unexpected custom legalisation");
1236     SDLoc DL(Op);
1237     if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
1238       if (Op0.getValueType() != MVT::i16)
1239         return SDValue();
1240       SDValue NewOp0 =
1241           DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
1242       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
1243       return FPConv;
1244     } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
1245                Subtarget.hasStdExtF()) {
1246       if (Op0.getValueType() != MVT::i32)
1247         return SDValue();
1248       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1249       SDValue FPConv =
1250           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
1251       return FPConv;
1252     }
1253     return SDValue();
1254   }
1255   case ISD::INTRINSIC_WO_CHAIN:
1256     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
1257   case ISD::INTRINSIC_W_CHAIN:
1258     return LowerINTRINSIC_W_CHAIN(Op, DAG);
1259   case ISD::BSWAP:
1260   case ISD::BITREVERSE: {
1261     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
1262     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1263     MVT VT = Op.getSimpleValueType();
1264     SDLoc DL(Op);
1265     // Start with the maximum immediate value which is the bitwidth - 1.
1266     unsigned Imm = VT.getSizeInBits() - 1;
1267     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
1268     if (Op.getOpcode() == ISD::BSWAP)
1269       Imm &= ~0x7U;
1270     return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
1271                        DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
1272   }
1273   case ISD::FSHL:
1274   case ISD::FSHR: {
1275     MVT VT = Op.getSimpleValueType();
1276     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
1277     SDLoc DL(Op);
1278     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
1279     // use log(XLen) bits. Mask the shift amount accordingly.
1280     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
1281     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
1282                                 DAG.getConstant(ShAmtWidth, DL, VT));
1283     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
1284     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
1285   }
1286   case ISD::TRUNCATE: {
1287     SDLoc DL(Op);
1288     MVT VT = Op.getSimpleValueType();
1289     // Only custom-lower vector truncates
1290     if (!VT.isVector())
1291       return Op;
1292 
1293     // Truncates to mask types are handled differently
1294     if (VT.getVectorElementType() == MVT::i1)
1295       return lowerVectorMaskTrunc(Op, DAG);
1296 
1297     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
1298     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
1299     // truncate by one power of two at a time.
1300     MVT DstEltVT = VT.getVectorElementType();
1301 
1302     SDValue Src = Op.getOperand(0);
1303     MVT SrcVT = Src.getSimpleValueType();
1304     MVT SrcEltVT = SrcVT.getVectorElementType();
1305 
1306     assert(DstEltVT.bitsLT(SrcEltVT) &&
1307            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
1308            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
1309            "Unexpected vector truncate lowering");
1310 
1311     MVT ContainerVT = SrcVT;
1312     if (SrcVT.isFixedLengthVector()) {
1313       ContainerVT = getContainerForFixedLengthVector(SrcVT);
1314       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
1315     }
1316 
1317     SDValue Result = Src;
1318     SDValue Mask, VL;
1319     std::tie(Mask, VL) =
1320         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
1321     LLVMContext &Context = *DAG.getContext();
1322     const ElementCount Count = ContainerVT.getVectorElementCount();
1323     do {
1324       SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
1325       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
1326       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
1327                            Mask, VL);
1328     } while (SrcEltVT != DstEltVT);
1329 
1330     if (SrcVT.isFixedLengthVector())
1331       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
1332 
1333     return Result;
1334   }
1335   case ISD::ANY_EXTEND:
1336   case ISD::ZERO_EXTEND:
1337     if (Op.getOperand(0).getValueType().isVector() &&
1338         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
1339       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
1340     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
1341   case ISD::SIGN_EXTEND:
1342     if (Op.getOperand(0).getValueType().isVector() &&
1343         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
1344       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
1345     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
1346   case ISD::SPLAT_VECTOR_PARTS:
1347     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
1348   case ISD::INSERT_VECTOR_ELT:
1349     return lowerINSERT_VECTOR_ELT(Op, DAG);
1350   case ISD::EXTRACT_VECTOR_ELT:
1351     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1352   case ISD::VSCALE: {
1353     MVT VT = Op.getSimpleValueType();
1354     SDLoc DL(Op);
1355     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
1356     // We define our scalable vector types for lmul=1 to use a 64 bit known
1357     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
1358     // vscale as VLENB / 8.
1359     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
1360                                  DAG.getConstant(3, DL, VT));
1361     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
1362   }
1363   case ISD::FP_EXTEND: {
1364     // RVV can only do fp_extend to types double the size as the source. We
1365     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
1366     // via f32.
1367     SDLoc DL(Op);
1368     MVT VT = Op.getSimpleValueType();
1369     SDValue Src = Op.getOperand(0);
1370     MVT SrcVT = Src.getSimpleValueType();
1371 
1372     // Prepare any fixed-length vector operands.
1373     MVT ContainerVT = VT;
1374     if (SrcVT.isFixedLengthVector()) {
1375       ContainerVT = getContainerForFixedLengthVector(VT);
1376       MVT SrcContainerVT =
1377           ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
1378       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1379     }
1380 
1381     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
1382         SrcVT.getVectorElementType() != MVT::f16) {
1383       // For scalable vectors, we only need to close the gap between
1384       // vXf16->vXf64.
1385       if (!VT.isFixedLengthVector())
1386         return Op;
1387       // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
1388       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
1389       return convertFromScalableVector(VT, Src, DAG, Subtarget);
1390     }
1391 
1392     MVT InterVT = VT.changeVectorElementType(MVT::f32);
1393     MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
1394     SDValue IntermediateExtend = getRVVFPExtendOrRound(
1395         Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
1396 
1397     SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
1398                                            DL, DAG, Subtarget);
1399     if (VT.isFixedLengthVector())
1400       return convertFromScalableVector(VT, Extend, DAG, Subtarget);
1401     return Extend;
1402   }
1403   case ISD::FP_ROUND: {
1404     // RVV can only do fp_round to types half the size as the source. We
1405     // custom-lower f64->f16 rounds via RVV's round-to-odd float
1406     // conversion instruction.
1407     SDLoc DL(Op);
1408     MVT VT = Op.getSimpleValueType();
1409     SDValue Src = Op.getOperand(0);
1410     MVT SrcVT = Src.getSimpleValueType();
1411 
1412     // Prepare any fixed-length vector operands.
1413     MVT ContainerVT = VT;
1414     if (VT.isFixedLengthVector()) {
1415       MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
1416       ContainerVT =
1417           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
1418       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1419     }
1420 
1421     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
1422         SrcVT.getVectorElementType() != MVT::f64) {
1423       // For scalable vectors, we only need to close the gap between
1424       // vXf64<->vXf16.
1425       if (!VT.isFixedLengthVector())
1426         return Op;
1427       // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
1428       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
1429       return convertFromScalableVector(VT, Src, DAG, Subtarget);
1430     }
1431 
1432     SDValue Mask, VL;
1433     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1434 
1435     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
1436     SDValue IntermediateRound =
1437         DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
1438     SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
1439                                           DL, DAG, Subtarget);
1440 
1441     if (VT.isFixedLengthVector())
1442       return convertFromScalableVector(VT, Round, DAG, Subtarget);
1443     return Round;
1444   }
1445   case ISD::FP_TO_SINT:
1446   case ISD::FP_TO_UINT:
1447   case ISD::SINT_TO_FP:
1448   case ISD::UINT_TO_FP: {
1449     // RVV can only do fp<->int conversions to types half/double the size as
1450     // the source. We custom-lower any conversions that do two hops into
1451     // sequences.
1452     MVT VT = Op.getSimpleValueType();
1453     if (!VT.isVector())
1454       return Op;
1455     SDLoc DL(Op);
1456     SDValue Src = Op.getOperand(0);
1457     MVT EltVT = VT.getVectorElementType();
1458     MVT SrcVT = Src.getSimpleValueType();
1459     MVT SrcEltVT = SrcVT.getVectorElementType();
1460     unsigned EltSize = EltVT.getSizeInBits();
1461     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
1462     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
1463            "Unexpected vector element types");
1464 
1465     bool IsInt2FP = SrcEltVT.isInteger();
1466     // Widening conversions
1467     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
1468       if (IsInt2FP) {
1469         // Do a regular integer sign/zero extension then convert to float.
1470         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
1471                                       VT.getVectorElementCount());
1472         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
1473                                  ? ISD::ZERO_EXTEND
1474                                  : ISD::SIGN_EXTEND;
1475         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
1476         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
1477       }
1478       // FP2Int
1479       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
1480       // Do one doubling fp_extend then complete the operation by converting
1481       // to int.
1482       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1483       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
1484       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
1485     }
1486 
1487     // Narrowing conversions
1488     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
1489       if (IsInt2FP) {
1490         // One narrowing int_to_fp, then an fp_round.
1491         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
1492         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1493         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
1494         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
1495       }
1496       // FP2Int
1497       // One narrowing fp_to_int, then truncate the integer. If the float isn't
1498       // representable by the integer, the result is poison.
1499       MVT IVecVT =
1500           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
1501                            VT.getVectorElementCount());
1502       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
1503       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
1504     }
1505 
1506     // Scalable vectors can exit here. Patterns will handle equally-sized
1507     // conversions halving/doubling ones.
1508     if (!VT.isFixedLengthVector())
1509       return Op;
1510 
1511     // For fixed-length vectors we lower to a custom "VL" node.
1512     unsigned RVVOpc = 0;
1513     switch (Op.getOpcode()) {
1514     default:
1515       llvm_unreachable("Impossible opcode");
1516     case ISD::FP_TO_SINT:
1517       RVVOpc = RISCVISD::FP_TO_SINT_VL;
1518       break;
1519     case ISD::FP_TO_UINT:
1520       RVVOpc = RISCVISD::FP_TO_UINT_VL;
1521       break;
1522     case ISD::SINT_TO_FP:
1523       RVVOpc = RISCVISD::SINT_TO_FP_VL;
1524       break;
1525     case ISD::UINT_TO_FP:
1526       RVVOpc = RISCVISD::UINT_TO_FP_VL;
1527       break;
1528     }
1529 
1530     MVT ContainerVT, SrcContainerVT;
1531     // Derive the reference container type from the larger vector type.
1532     if (SrcEltSize > EltSize) {
1533       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
1534       ContainerVT =
1535           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
1536     } else {
1537       ContainerVT = getContainerForFixedLengthVector(VT);
1538       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
1539     }
1540 
1541     SDValue Mask, VL;
1542     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1543 
1544     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1545     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
1546     return convertFromScalableVector(VT, Src, DAG, Subtarget);
1547   }
1548   case ISD::VECREDUCE_ADD:
1549   case ISD::VECREDUCE_UMAX:
1550   case ISD::VECREDUCE_SMAX:
1551   case ISD::VECREDUCE_UMIN:
1552   case ISD::VECREDUCE_SMIN:
1553   case ISD::VECREDUCE_AND:
1554   case ISD::VECREDUCE_OR:
1555   case ISD::VECREDUCE_XOR:
1556     return lowerVECREDUCE(Op, DAG);
1557   case ISD::VECREDUCE_FADD:
1558   case ISD::VECREDUCE_SEQ_FADD:
1559     return lowerFPVECREDUCE(Op, DAG);
1560   case ISD::INSERT_SUBVECTOR:
1561     return lowerINSERT_SUBVECTOR(Op, DAG);
1562   case ISD::EXTRACT_SUBVECTOR:
1563     return lowerEXTRACT_SUBVECTOR(Op, DAG);
1564   case ISD::VECTOR_REVERSE:
1565     return lowerVECTOR_REVERSE(Op, DAG);
1566   case ISD::BUILD_VECTOR:
1567     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
1568   case ISD::VECTOR_SHUFFLE:
1569     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
1570   case ISD::CONCAT_VECTORS: {
1571     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
1572     // better than going through the stack, as the default expansion does.
1573     SDLoc DL(Op);
1574     MVT VT = Op.getSimpleValueType();
1575     assert(VT.isFixedLengthVector() && "Unexpected CONCAT_VECTORS lowering");
1576     unsigned NumOpElts =
1577         Op.getOperand(0).getSimpleValueType().getVectorNumElements();
1578     SDValue Vec = DAG.getUNDEF(VT);
1579     for (const auto &OpIdx : enumerate(Op->ops()))
1580       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
1581                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
1582     return Vec;
1583   }
1584   case ISD::LOAD:
1585     return lowerFixedLengthVectorLoadToRVV(Op, DAG);
1586   case ISD::STORE:
1587     return lowerFixedLengthVectorStoreToRVV(Op, DAG);
1588   case ISD::SETCC:
1589     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
1590   case ISD::ADD:
1591     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
1592   case ISD::SUB:
1593     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
1594   case ISD::MUL:
1595     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
1596   case ISD::MULHS:
1597     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
1598   case ISD::MULHU:
1599     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
1600   case ISD::AND:
1601     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
1602                                               RISCVISD::AND_VL);
1603   case ISD::OR:
1604     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
1605                                               RISCVISD::OR_VL);
1606   case ISD::XOR:
1607     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
1608                                               RISCVISD::XOR_VL);
1609   case ISD::SDIV:
1610     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
1611   case ISD::SREM:
1612     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
1613   case ISD::UDIV:
1614     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
1615   case ISD::UREM:
1616     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
1617   case ISD::SHL:
1618     return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
1619   case ISD::SRA:
1620     return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
1621   case ISD::SRL:
1622     return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
1623   case ISD::FADD:
1624     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
1625   case ISD::FSUB:
1626     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
1627   case ISD::FMUL:
1628     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
1629   case ISD::FDIV:
1630     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
1631   case ISD::FNEG:
1632     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
1633   case ISD::FABS:
1634     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
1635   case ISD::FSQRT:
1636     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
1637   case ISD::FMA:
1638     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
1639   case ISD::SMIN:
1640     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
1641   case ISD::SMAX:
1642     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
1643   case ISD::UMIN:
1644     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
1645   case ISD::UMAX:
1646     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
1647   case ISD::ABS:
1648     return lowerABS(Op, DAG);
1649   case ISD::VSELECT:
1650     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
1651   case ISD::FCOPYSIGN:
1652     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
1653   }
1654 }
1655 
1656 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1657                              SelectionDAG &DAG, unsigned Flags) {
1658   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1659 }
1660 
1661 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1662                              SelectionDAG &DAG, unsigned Flags) {
1663   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1664                                    Flags);
1665 }
1666 
1667 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1668                              SelectionDAG &DAG, unsigned Flags) {
1669   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1670                                    N->getOffset(), Flags);
1671 }
1672 
1673 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1674                              SelectionDAG &DAG, unsigned Flags) {
1675   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1676 }
1677 
1678 template <class NodeTy>
1679 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1680                                      bool IsLocal) const {
1681   SDLoc DL(N);
1682   EVT Ty = getPointerTy(DAG.getDataLayout());
1683 
1684   if (isPositionIndependent()) {
1685     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1686     if (IsLocal)
1687       // Use PC-relative addressing to access the symbol. This generates the
1688       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
1689       // %pcrel_lo(auipc)).
1690       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1691 
1692     // Use PC-relative addressing to access the GOT for this symbol, then load
1693     // the address from the GOT. This generates the pattern (PseudoLA sym),
1694     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
1695     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
1696   }
1697 
1698   switch (getTargetMachine().getCodeModel()) {
1699   default:
1700     report_fatal_error("Unsupported code model for lowering");
1701   case CodeModel::Small: {
1702     // Generate a sequence for accessing addresses within the first 2 GiB of
1703     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
1704     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
1705     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
1706     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1707     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
1708   }
1709   case CodeModel::Medium: {
1710     // Generate a sequence for accessing addresses within any 2GiB range within
1711     // the address space. This generates the pattern (PseudoLLA sym), which
1712     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
1713     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1714     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1715   }
1716   }
1717 }
1718 
1719 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
1720                                                 SelectionDAG &DAG) const {
1721   SDLoc DL(Op);
1722   EVT Ty = Op.getValueType();
1723   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1724   int64_t Offset = N->getOffset();
1725   MVT XLenVT = Subtarget.getXLenVT();
1726 
1727   const GlobalValue *GV = N->getGlobal();
1728   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1729   SDValue Addr = getAddr(N, DAG, IsLocal);
1730 
1731   // In order to maximise the opportunity for common subexpression elimination,
1732   // emit a separate ADD node for the global address offset instead of folding
1733   // it in the global address node. Later peephole optimisations may choose to
1734   // fold it back in when profitable.
1735   if (Offset != 0)
1736     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1737                        DAG.getConstant(Offset, DL, XLenVT));
1738   return Addr;
1739 }
1740 
1741 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
1742                                                SelectionDAG &DAG) const {
1743   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
1744 
1745   return getAddr(N, DAG);
1746 }
1747 
1748 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
1749                                                SelectionDAG &DAG) const {
1750   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
1751 
1752   return getAddr(N, DAG);
1753 }
1754 
1755 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
1756                                             SelectionDAG &DAG) const {
1757   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
1758 
1759   return getAddr(N, DAG);
1760 }
1761 
1762 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1763                                               SelectionDAG &DAG,
1764                                               bool UseGOT) const {
1765   SDLoc DL(N);
1766   EVT Ty = getPointerTy(DAG.getDataLayout());
1767   const GlobalValue *GV = N->getGlobal();
1768   MVT XLenVT = Subtarget.getXLenVT();
1769 
1770   if (UseGOT) {
1771     // Use PC-relative addressing to access the GOT for this TLS symbol, then
1772     // load the address from the GOT and add the thread pointer. This generates
1773     // the pattern (PseudoLA_TLS_IE sym), which expands to
1774     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
1775     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1776     SDValue Load =
1777         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
1778 
1779     // Add the thread pointer.
1780     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1781     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
1782   }
1783 
1784   // Generate a sequence for accessing the address relative to the thread
1785   // pointer, with the appropriate adjustment for the thread pointer offset.
1786   // This generates the pattern
1787   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
1788   SDValue AddrHi =
1789       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
1790   SDValue AddrAdd =
1791       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
1792   SDValue AddrLo =
1793       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
1794 
1795   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1796   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1797   SDValue MNAdd = SDValue(
1798       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
1799       0);
1800   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
1801 }
1802 
1803 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1804                                                SelectionDAG &DAG) const {
1805   SDLoc DL(N);
1806   EVT Ty = getPointerTy(DAG.getDataLayout());
1807   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1808   const GlobalValue *GV = N->getGlobal();
1809 
1810   // Use a PC-relative addressing mode to access the global dynamic GOT address.
1811   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
1812   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
1813   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1814   SDValue Load =
1815       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
1816 
1817   // Prepare argument list to generate call.
1818   ArgListTy Args;
1819   ArgListEntry Entry;
1820   Entry.Node = Load;
1821   Entry.Ty = CallTy;
1822   Args.push_back(Entry);
1823 
1824   // Setup call to __tls_get_addr.
1825   TargetLowering::CallLoweringInfo CLI(DAG);
1826   CLI.setDebugLoc(DL)
1827       .setChain(DAG.getEntryNode())
1828       .setLibCallee(CallingConv::C, CallTy,
1829                     DAG.getExternalSymbol("__tls_get_addr", Ty),
1830                     std::move(Args));
1831 
1832   return LowerCallTo(CLI).first;
1833 }
1834 
1835 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1836                                                    SelectionDAG &DAG) const {
1837   SDLoc DL(Op);
1838   EVT Ty = Op.getValueType();
1839   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1840   int64_t Offset = N->getOffset();
1841   MVT XLenVT = Subtarget.getXLenVT();
1842 
1843   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
1844 
1845   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1846       CallingConv::GHC)
1847     report_fatal_error("In GHC calling convention TLS is not supported");
1848 
1849   SDValue Addr;
1850   switch (Model) {
1851   case TLSModel::LocalExec:
1852     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
1853     break;
1854   case TLSModel::InitialExec:
1855     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
1856     break;
1857   case TLSModel::LocalDynamic:
1858   case TLSModel::GeneralDynamic:
1859     Addr = getDynamicTLSAddr(N, DAG);
1860     break;
1861   }
1862 
1863   // In order to maximise the opportunity for common subexpression elimination,
1864   // emit a separate ADD node for the global address offset instead of folding
1865   // it in the global address node. Later peephole optimisations may choose to
1866   // fold it back in when profitable.
1867   if (Offset != 0)
1868     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1869                        DAG.getConstant(Offset, DL, XLenVT));
1870   return Addr;
1871 }
1872 
1873 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
1874   SDValue CondV = Op.getOperand(0);
1875   SDValue TrueV = Op.getOperand(1);
1876   SDValue FalseV = Op.getOperand(2);
1877   SDLoc DL(Op);
1878   MVT XLenVT = Subtarget.getXLenVT();
1879 
1880   // If the result type is XLenVT and CondV is the output of a SETCC node
1881   // which also operated on XLenVT inputs, then merge the SETCC node into the
1882   // lowered RISCVISD::SELECT_CC to take advantage of the integer
1883   // compare+branch instructions. i.e.:
1884   // (select (setcc lhs, rhs, cc), truev, falsev)
1885   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
1886   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
1887       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
1888     SDValue LHS = CondV.getOperand(0);
1889     SDValue RHS = CondV.getOperand(1);
1890     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
1891     ISD::CondCode CCVal = CC->get();
1892 
1893     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1894 
1895     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
1896     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1897     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1898   }
1899 
1900   // Otherwise:
1901   // (select condv, truev, falsev)
1902   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
1903   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
1904   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
1905 
1906   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1907 
1908   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1909 }
1910 
1911 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1912   SDValue CondV = Op.getOperand(1);
1913   SDLoc DL(Op);
1914   MVT XLenVT = Subtarget.getXLenVT();
1915 
1916   if (CondV.getOpcode() == ISD::SETCC &&
1917       CondV.getOperand(0).getValueType() == XLenVT) {
1918     SDValue LHS = CondV.getOperand(0);
1919     SDValue RHS = CondV.getOperand(1);
1920     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1921 
1922     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1923 
1924     SDValue TargetCC = DAG.getCondCode(CCVal);
1925     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
1926                        LHS, RHS, TargetCC, Op.getOperand(2));
1927   }
1928 
1929   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
1930                      CondV, DAG.getConstant(0, DL, XLenVT),
1931                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1932 }
1933 
1934 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1935   MachineFunction &MF = DAG.getMachineFunction();
1936   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
1937 
1938   SDLoc DL(Op);
1939   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1940                                  getPointerTy(MF.getDataLayout()));
1941 
1942   // vastart just stores the address of the VarArgsFrameIndex slot into the
1943   // memory location argument.
1944   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1945   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1946                       MachinePointerInfo(SV));
1947 }
1948 
1949 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
1950                                             SelectionDAG &DAG) const {
1951   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1952   MachineFunction &MF = DAG.getMachineFunction();
1953   MachineFrameInfo &MFI = MF.getFrameInfo();
1954   MFI.setFrameAddressIsTaken(true);
1955   Register FrameReg = RI.getFrameRegister(MF);
1956   int XLenInBytes = Subtarget.getXLen() / 8;
1957 
1958   EVT VT = Op.getValueType();
1959   SDLoc DL(Op);
1960   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1961   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1962   while (Depth--) {
1963     int Offset = -(XLenInBytes * 2);
1964     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1965                               DAG.getIntPtrConstant(Offset, DL));
1966     FrameAddr =
1967         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1968   }
1969   return FrameAddr;
1970 }
1971 
1972 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
1973                                              SelectionDAG &DAG) const {
1974   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1975   MachineFunction &MF = DAG.getMachineFunction();
1976   MachineFrameInfo &MFI = MF.getFrameInfo();
1977   MFI.setReturnAddressIsTaken(true);
1978   MVT XLenVT = Subtarget.getXLenVT();
1979   int XLenInBytes = Subtarget.getXLen() / 8;
1980 
1981   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1982     return SDValue();
1983 
1984   EVT VT = Op.getValueType();
1985   SDLoc DL(Op);
1986   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1987   if (Depth) {
1988     int Off = -XLenInBytes;
1989     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
1990     SDValue Offset = DAG.getConstant(Off, DL, VT);
1991     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1992                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1993                        MachinePointerInfo());
1994   }
1995 
1996   // Return the value of the return address register, marking it an implicit
1997   // live-in.
1998   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
1999   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
2000 }
2001 
2002 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
2003                                                  SelectionDAG &DAG) const {
2004   SDLoc DL(Op);
2005   SDValue Lo = Op.getOperand(0);
2006   SDValue Hi = Op.getOperand(1);
2007   SDValue Shamt = Op.getOperand(2);
2008   EVT VT = Lo.getValueType();
2009 
2010   // if Shamt-XLEN < 0: // Shamt < XLEN
2011   //   Lo = Lo << Shamt
2012   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
2013   // else:
2014   //   Lo = 0
2015   //   Hi = Lo << (Shamt-XLEN)
2016 
2017   SDValue Zero = DAG.getConstant(0, DL, VT);
2018   SDValue One = DAG.getConstant(1, DL, VT);
2019   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2020   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2021   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2022   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2023 
2024   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2025   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2026   SDValue ShiftRightLo =
2027       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
2028   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2029   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2030   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
2031 
2032   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2033 
2034   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2035   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2036 
2037   SDValue Parts[2] = {Lo, Hi};
2038   return DAG.getMergeValues(Parts, DL);
2039 }
2040 
2041 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
2042                                                   bool IsSRA) const {
2043   SDLoc DL(Op);
2044   SDValue Lo = Op.getOperand(0);
2045   SDValue Hi = Op.getOperand(1);
2046   SDValue Shamt = Op.getOperand(2);
2047   EVT VT = Lo.getValueType();
2048 
2049   // SRA expansion:
2050   //   if Shamt-XLEN < 0: // Shamt < XLEN
2051   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2052   //     Hi = Hi >>s Shamt
2053   //   else:
2054   //     Lo = Hi >>s (Shamt-XLEN);
2055   //     Hi = Hi >>s (XLEN-1)
2056   //
2057   // SRL expansion:
2058   //   if Shamt-XLEN < 0: // Shamt < XLEN
2059   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2060   //     Hi = Hi >>u Shamt
2061   //   else:
2062   //     Lo = Hi >>u (Shamt-XLEN);
2063   //     Hi = 0;
2064 
2065   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2066 
2067   SDValue Zero = DAG.getConstant(0, DL, VT);
2068   SDValue One = DAG.getConstant(1, DL, VT);
2069   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2070   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2071   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2072   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2073 
2074   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2075   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2076   SDValue ShiftLeftHi =
2077       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
2078   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2079   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2080   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
2081   SDValue HiFalse =
2082       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
2083 
2084   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2085 
2086   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2087   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2088 
2089   SDValue Parts[2] = {Lo, Hi};
2090   return DAG.getMergeValues(Parts, DL);
2091 }
2092 
2093 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
2094 // illegal (currently only vXi64 RV32).
2095 // FIXME: We could also catch non-constant sign-extended i32 values and lower
2096 // them to SPLAT_VECTOR_I64
2097 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
2098                                                      SelectionDAG &DAG) const {
2099   SDLoc DL(Op);
2100   EVT VecVT = Op.getValueType();
2101   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
2102          "Unexpected SPLAT_VECTOR_PARTS lowering");
2103 
2104   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
2105   SDValue Lo = Op.getOperand(0);
2106   SDValue Hi = Op.getOperand(1);
2107 
2108   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2109     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2110     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2111     // If Hi constant is all the same sign bit as Lo, lower this as a custom
2112     // node in order to try and match RVV vector/scalar instructions.
2113     if ((LoC >> 31) == HiC)
2114       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
2115   }
2116 
2117   // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
2118   // to accidentally sign-extend the 32-bit halves to the e64 SEW:
2119   // vmv.v.x vX, hi
2120   // vsll.vx vX, vX, /*32*/
2121   // vmv.v.x vY, lo
2122   // vsll.vx vY, vY, /*32*/
2123   // vsrl.vx vY, vY, /*32*/
2124   // vor.vv vX, vX, vY
2125   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
2126 
2127   Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
2128   Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
2129   Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
2130 
2131   if (isNullConstant(Hi))
2132     return Lo;
2133 
2134   Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
2135   Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
2136 
2137   return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
2138 }
2139 
2140 // Custom-lower extensions from mask vectors by using a vselect either with 1
2141 // for zero/any-extension or -1 for sign-extension:
2142 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
2143 // Note that any-extension is lowered identically to zero-extension.
2144 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
2145                                                 int64_t ExtTrueVal) const {
2146   SDLoc DL(Op);
2147   MVT VecVT = Op.getSimpleValueType();
2148   SDValue Src = Op.getOperand(0);
2149   // Only custom-lower extensions from mask types
2150   assert(Src.getValueType().isVector() &&
2151          Src.getValueType().getVectorElementType() == MVT::i1);
2152 
2153   MVT XLenVT = Subtarget.getXLenVT();
2154   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
2155   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
2156 
2157   if (VecVT.isScalableVector()) {
2158     // Be careful not to introduce illegal scalar types at this stage, and be
2159     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
2160     // illegal and must be expanded. Since we know that the constants are
2161     // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
2162     bool IsRV32E64 =
2163         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
2164 
2165     if (!IsRV32E64) {
2166       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
2167       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
2168     } else {
2169       SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
2170       SplatTrueVal =
2171           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
2172     }
2173 
2174     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
2175   }
2176 
2177   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
2178   MVT I1ContainerVT =
2179       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2180 
2181   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
2182 
2183   SDValue Mask, VL;
2184   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2185 
2186   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
2187   SplatTrueVal =
2188       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
2189   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
2190                                SplatTrueVal, SplatZero, VL);
2191 
2192   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
2193 }
2194 
2195 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
2196     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
2197   MVT ExtVT = Op.getSimpleValueType();
2198   // Only custom-lower extensions from fixed-length vector types.
2199   if (!ExtVT.isFixedLengthVector())
2200     return Op;
2201   MVT VT = Op.getOperand(0).getSimpleValueType();
2202   // Grab the canonical container type for the extended type. Infer the smaller
2203   // type from that to ensure the same number of vector elements, as we know
2204   // the LMUL will be sufficient to hold the smaller type.
2205   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
2206   // Get the extended container type manually to ensure the same number of
2207   // vector elements between source and dest.
2208   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
2209                                      ContainerExtVT.getVectorElementCount());
2210 
2211   SDValue Op1 =
2212       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
2213 
2214   SDLoc DL(Op);
2215   SDValue Mask, VL;
2216   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2217 
2218   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
2219 
2220   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
2221 }
2222 
2223 // Custom-lower truncations from vectors to mask vectors by using a mask and a
2224 // setcc operation:
2225 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
2226 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
2227                                                   SelectionDAG &DAG) const {
2228   SDLoc DL(Op);
2229   EVT MaskVT = Op.getValueType();
2230   // Only expect to custom-lower truncations to mask types
2231   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
2232          "Unexpected type for vector mask lowering");
2233   SDValue Src = Op.getOperand(0);
2234   MVT VecVT = Src.getSimpleValueType();
2235 
2236   // If this is a fixed vector, we need to convert it to a scalable vector.
2237   MVT ContainerVT = VecVT;
2238   if (VecVT.isFixedLengthVector()) {
2239     ContainerVT = getContainerForFixedLengthVector(VecVT);
2240     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2241   }
2242 
2243   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
2244   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2245 
2246   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
2247   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
2248 
2249   if (VecVT.isScalableVector()) {
2250     SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
2251     return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
2252   }
2253 
2254   SDValue Mask, VL;
2255   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2256 
2257   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2258   SDValue Trunc =
2259       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
2260   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
2261                       DAG.getCondCode(ISD::SETNE), Mask, VL);
2262   return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
2263 }
2264 
2265 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
2266 // first position of a vector, and that vector is slid up to the insert index.
2267 // By limiting the active vector length to index+1 and merging with the
2268 // original vector (with an undisturbed tail policy for elements >= VL), we
2269 // achieve the desired result of leaving all elements untouched except the one
2270 // at VL-1, which is replaced with the desired value.
2271 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2272                                                     SelectionDAG &DAG) const {
2273   SDLoc DL(Op);
2274   MVT VecVT = Op.getSimpleValueType();
2275   SDValue Vec = Op.getOperand(0);
2276   SDValue Val = Op.getOperand(1);
2277   SDValue Idx = Op.getOperand(2);
2278 
2279   MVT ContainerVT = VecVT;
2280   // If the operand is a fixed-length vector, convert to a scalable one.
2281   if (VecVT.isFixedLengthVector()) {
2282     ContainerVT = getContainerForFixedLengthVector(VecVT);
2283     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2284   }
2285 
2286   MVT XLenVT = Subtarget.getXLenVT();
2287 
2288   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
2289   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
2290   // Even i64-element vectors on RV32 can be lowered without scalar
2291   // legalization if the most-significant 32 bits of the value are not affected
2292   // by the sign-extension of the lower 32 bits.
2293   // TODO: We could also catch sign extensions of a 32-bit value.
2294   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
2295     const auto *CVal = cast<ConstantSDNode>(Val);
2296     if (isInt<32>(CVal->getSExtValue())) {
2297       IsLegalInsert = true;
2298       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
2299     }
2300   }
2301 
2302   SDValue Mask, VL;
2303   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2304 
2305   SDValue ValInVec;
2306 
2307   if (IsLegalInsert) {
2308     if (isNullConstant(Idx))
2309       return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL);
2310     ValInVec = DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT,
2311                            DAG.getUNDEF(ContainerVT), Val, VL);
2312   } else {
2313     // On RV32, i64-element vectors must be specially handled to place the
2314     // value at element 0, by using two vslide1up instructions in sequence on
2315     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
2316     // this.
2317     SDValue One = DAG.getConstant(1, DL, XLenVT);
2318     SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
2319     SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
2320     MVT I32ContainerVT =
2321         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
2322     SDValue I32Mask =
2323         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
2324     // Limit the active VL to two.
2325     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
2326     // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
2327     // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
2328     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
2329                            InsertI64VL);
2330     // First slide in the hi value, then the lo in underneath it.
2331     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
2332                            ValHi, I32Mask, InsertI64VL);
2333     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
2334                            ValLo, I32Mask, InsertI64VL);
2335     // Bitcast back to the right container type.
2336     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
2337   }
2338 
2339   // Now that the value is in a vector, slide it into position.
2340   SDValue InsertVL =
2341       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
2342   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
2343                                 ValInVec, Idx, Mask, InsertVL);
2344   if (!VecVT.isFixedLengthVector())
2345     return Slideup;
2346   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
2347 }
2348 
2349 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
2350 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
2351 // types this is done using VMV_X_S to allow us to glean information about the
2352 // sign bits of the result.
2353 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2354                                                      SelectionDAG &DAG) const {
2355   SDLoc DL(Op);
2356   SDValue Idx = Op.getOperand(1);
2357   SDValue Vec = Op.getOperand(0);
2358   EVT EltVT = Op.getValueType();
2359   MVT VecVT = Vec.getSimpleValueType();
2360   MVT XLenVT = Subtarget.getXLenVT();
2361 
2362   if (VecVT.getVectorElementType() == MVT::i1) {
2363     // FIXME: For now we just promote to an i8 vector and extract from that,
2364     // but this is probably not optimal.
2365     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
2366     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
2367     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
2368   }
2369 
2370   // If this is a fixed vector, we need to convert it to a scalable vector.
2371   MVT ContainerVT = VecVT;
2372   if (VecVT.isFixedLengthVector()) {
2373     ContainerVT = getContainerForFixedLengthVector(VecVT);
2374     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2375   }
2376 
2377   // If the index is 0, the vector is already in the right position.
2378   if (!isNullConstant(Idx)) {
2379     // Use a VL of 1 to avoid processing more elements than we need.
2380     SDValue VL = DAG.getConstant(1, DL, XLenVT);
2381     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2382     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2383     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2384                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
2385   }
2386 
2387   if (!EltVT.isInteger()) {
2388     // Floating-point extracts are handled in TableGen.
2389     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
2390                        DAG.getConstant(0, DL, XLenVT));
2391   }
2392 
2393   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
2394   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
2395 }
2396 
2397 // Called by type legalization to handle splat of i64 on RV32.
2398 // FIXME: We can optimize this when the type has sign or zero bits in one
2399 // of the halves.
2400 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
2401                                    SDValue VL, SelectionDAG &DAG) {
2402   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VT);
2403   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2404                            DAG.getConstant(0, DL, MVT::i32));
2405   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2406                            DAG.getConstant(1, DL, MVT::i32));
2407 
2408   // vmv.v.x vX, hi
2409   // vsll.vx vX, vX, /*32*/
2410   // vmv.v.x vY, lo
2411   // vsll.vx vY, vY, /*32*/
2412   // vsrl.vx vY, vY, /*32*/
2413   // vor.vv vX, vX, vY
2414   MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
2415   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2416   Lo = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
2417   Lo = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL);
2418   Lo = DAG.getNode(RISCVISD::SRL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL);
2419 
2420   Hi = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Hi, VL);
2421   Hi = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Hi, ThirtyTwoV, Mask, VL);
2422 
2423   return DAG.getNode(RISCVISD::OR_VL, DL, VT, Lo, Hi, Mask, VL);
2424 }
2425 
2426 // Some RVV intrinsics may claim that they want an integer operand to be
2427 // promoted or expanded.
2428 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
2429                                           const RISCVSubtarget &Subtarget) {
2430   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2431           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2432          "Unexpected opcode");
2433 
2434   if (!Subtarget.hasStdExtV())
2435     return SDValue();
2436 
2437   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2438   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2439   SDLoc DL(Op);
2440 
2441   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2442       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2443   if (!II || !II->SplatOperand)
2444     return SDValue();
2445 
2446   unsigned SplatOp = II->SplatOperand + HasChain;
2447   assert(SplatOp < Op.getNumOperands());
2448 
2449   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
2450   SDValue &ScalarOp = Operands[SplatOp];
2451   MVT OpVT = ScalarOp.getSimpleValueType();
2452   MVT VT = Op.getSimpleValueType();
2453   MVT XLenVT = Subtarget.getXLenVT();
2454 
2455   // If this isn't a scalar, or its type is XLenVT we're done.
2456   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
2457     return SDValue();
2458 
2459   // Simplest case is that the operand needs to be promoted to XLenVT.
2460   if (OpVT.bitsLT(XLenVT)) {
2461     // If the operand is a constant, sign extend to increase our chances
2462     // of being able to use a .vi instruction. ANY_EXTEND would become a
2463     // a zero extend and the simm5 check in isel would fail.
2464     // FIXME: Should we ignore the upper bits in isel instead?
2465     unsigned ExtOpc =
2466         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2467     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
2468     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
2469   }
2470 
2471   // The more complex case is when the scalar is larger than XLenVT.
2472   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
2473          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
2474 
2475   // If this is a sign-extended 32-bit constant, we can truncate it and rely
2476   // on the instruction to sign-extend since SEW>XLEN.
2477   if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
2478     if (isInt<32>(CVal->getSExtValue())) {
2479       ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
2480       return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
2481     }
2482   }
2483 
2484   // We need to convert the scalar to a splat vector.
2485   // FIXME: Can we implicitly truncate the scalar if it is known to
2486   // be sign extended?
2487   // VL should be the last operand.
2488   SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
2489   assert(VL.getValueType() == XLenVT);
2490   ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
2491   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
2492 }
2493 
2494 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2495                                                      SelectionDAG &DAG) const {
2496   unsigned IntNo = Op.getConstantOperandVal(0);
2497   SDLoc DL(Op);
2498   MVT XLenVT = Subtarget.getXLenVT();
2499 
2500   switch (IntNo) {
2501   default:
2502     break; // Don't custom lower most intrinsics.
2503   case Intrinsic::thread_pointer: {
2504     EVT PtrVT = getPointerTy(DAG.getDataLayout());
2505     return DAG.getRegister(RISCV::X4, PtrVT);
2506   }
2507   case Intrinsic::riscv_vmv_x_s:
2508     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
2509     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
2510                        Op.getOperand(1));
2511   case Intrinsic::riscv_vmv_v_x: {
2512     SDValue Scalar = Op.getOperand(1);
2513     if (Scalar.getValueType().bitsLE(XLenVT)) {
2514       unsigned ExtOpc =
2515           isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2516       Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2517       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), Scalar,
2518                          Op.getOperand(2));
2519     }
2520 
2521     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
2522 
2523     // If this is a sign-extended 32-bit constant, we can truncate it and rely
2524     // on the instruction to sign-extend since SEW>XLEN.
2525     if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar)) {
2526       if (isInt<32>(CVal->getSExtValue()))
2527         return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(),
2528                            DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32),
2529                            Op.getOperand(2));
2530     }
2531 
2532     // Otherwise use the more complicated splatting algorithm.
2533     return splatSplitI64WithVL(DL, Op.getSimpleValueType(), Scalar,
2534                                Op.getOperand(2), DAG);
2535   }
2536   case Intrinsic::riscv_vfmv_v_f:
2537     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
2538                        Op.getOperand(1), Op.getOperand(2));
2539   case Intrinsic::riscv_vmv_s_x: {
2540     SDValue Scalar = Op.getOperand(2);
2541 
2542     if (Scalar.getValueType().bitsLE(XLenVT)) {
2543       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
2544       return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, Op.getValueType(),
2545                          Op.getOperand(1), Scalar, Op.getOperand(3));
2546     }
2547 
2548     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
2549 
2550     // This is an i64 value that lives in two scalar registers. We have to
2551     // insert this in a convoluted way. First we build vXi64 splat containing
2552     // the/ two values that we assemble using some bit math. Next we'll use
2553     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
2554     // to merge element 0 from our splat into the source vector.
2555     // FIXME: This is probably not the best way to do this, but it is
2556     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
2557     // point.
2558     //   vmv.v.x vX, hi
2559     //   vsll.vx vX, vX, /*32*/
2560     //   vmv.v.x vY, lo
2561     //   vsll.vx vY, vY, /*32*/
2562     //   vsrl.vx vY, vY, /*32*/
2563     //   vor.vv vX, vX, vY
2564     //
2565     //   vid.v      vVid
2566     //   vmseq.vx   mMask, vVid, 0
2567     //   vmerge.vvm vDest, vSrc, vVal, mMask
2568     MVT VT = Op.getSimpleValueType();
2569     SDValue Vec = Op.getOperand(1);
2570     SDValue VL = Op.getOperand(3);
2571 
2572     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
2573     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
2574                                       DAG.getConstant(0, DL, MVT::i32), VL);
2575 
2576     MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
2577     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2578     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
2579     SDValue SelectCond =
2580         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
2581                     DAG.getCondCode(ISD::SETEQ), Mask, VL);
2582     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
2583                        Vec, VL);
2584   }
2585   }
2586 
2587   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
2588 }
2589 
2590 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
2591                                                     SelectionDAG &DAG) const {
2592   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
2593 }
2594 
2595 static MVT getLMUL1VT(MVT VT) {
2596   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
2597          "Unexpected vector MVT");
2598   return MVT::getScalableVectorVT(
2599       VT.getVectorElementType(),
2600       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
2601 }
2602 
2603 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
2604   switch (ISDOpcode) {
2605   default:
2606     llvm_unreachable("Unhandled reduction");
2607   case ISD::VECREDUCE_ADD:
2608     return RISCVISD::VECREDUCE_ADD_VL;
2609   case ISD::VECREDUCE_UMAX:
2610     return RISCVISD::VECREDUCE_UMAX_VL;
2611   case ISD::VECREDUCE_SMAX:
2612     return RISCVISD::VECREDUCE_SMAX_VL;
2613   case ISD::VECREDUCE_UMIN:
2614     return RISCVISD::VECREDUCE_UMIN_VL;
2615   case ISD::VECREDUCE_SMIN:
2616     return RISCVISD::VECREDUCE_SMIN_VL;
2617   case ISD::VECREDUCE_AND:
2618     return RISCVISD::VECREDUCE_AND_VL;
2619   case ISD::VECREDUCE_OR:
2620     return RISCVISD::VECREDUCE_OR_VL;
2621   case ISD::VECREDUCE_XOR:
2622     return RISCVISD::VECREDUCE_XOR_VL;
2623   }
2624 }
2625 
2626 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
2627                                             SelectionDAG &DAG) const {
2628   SDLoc DL(Op);
2629   SDValue Vec = Op.getOperand(0);
2630   EVT VecEVT = Vec.getValueType();
2631 
2632   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
2633 
2634   // Due to ordering in legalize types we may have a vector type that needs to
2635   // be split. Do that manually so we can get down to a legal type.
2636   while (getTypeAction(*DAG.getContext(), VecEVT) ==
2637          TargetLowering::TypeSplitVector) {
2638     SDValue Lo, Hi;
2639     std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
2640     VecEVT = Lo.getValueType();
2641     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
2642   }
2643 
2644   // TODO: The type may need to be widened rather than split. Or widened before
2645   // it can be split.
2646   if (!isTypeLegal(VecEVT))
2647     return SDValue();
2648 
2649   MVT VecVT = VecEVT.getSimpleVT();
2650   MVT VecEltVT = VecVT.getVectorElementType();
2651   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
2652 
2653   MVT ContainerVT = VecVT;
2654   if (VecVT.isFixedLengthVector()) {
2655     ContainerVT = getContainerForFixedLengthVector(VecVT);
2656     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2657   }
2658 
2659   MVT M1VT = getLMUL1VT(ContainerVT);
2660 
2661   SDValue Mask, VL;
2662   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2663 
2664   // FIXME: This is a VLMAX splat which might be too large and can prevent
2665   // vsetvli removal.
2666   SDValue NeutralElem =
2667       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
2668   SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
2669   SDValue Reduction =
2670       DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL);
2671   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
2672                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
2673   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
2674 }
2675 
2676 // Given a reduction op, this function returns the matching reduction opcode,
2677 // the vector SDValue and the scalar SDValue required to lower this to a
2678 // RISCVISD node.
2679 static std::tuple<unsigned, SDValue, SDValue>
2680 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
2681   SDLoc DL(Op);
2682   switch (Op.getOpcode()) {
2683   default:
2684     llvm_unreachable("Unhandled reduction");
2685   case ISD::VECREDUCE_FADD:
2686     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
2687                            DAG.getConstantFP(0.0, DL, EltVT));
2688   case ISD::VECREDUCE_SEQ_FADD:
2689     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
2690                            Op.getOperand(0));
2691   }
2692 }
2693 
2694 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
2695                                               SelectionDAG &DAG) const {
2696   SDLoc DL(Op);
2697   MVT VecEltVT = Op.getSimpleValueType();
2698 
2699   unsigned RVVOpcode;
2700   SDValue VectorVal, ScalarVal;
2701   std::tie(RVVOpcode, VectorVal, ScalarVal) =
2702       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
2703   MVT VecVT = VectorVal.getSimpleValueType();
2704 
2705   MVT ContainerVT = VecVT;
2706   if (VecVT.isFixedLengthVector()) {
2707     ContainerVT = getContainerForFixedLengthVector(VecVT);
2708     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
2709   }
2710 
2711   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
2712 
2713   SDValue Mask, VL;
2714   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2715 
2716   // FIXME: This is a VLMAX splat which might be too large and can prevent
2717   // vsetvli removal.
2718   SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
2719   SDValue Reduction =
2720       DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL);
2721   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
2722                      DAG.getConstant(0, DL, Subtarget.getXLenVT()));
2723 }
2724 
2725 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
2726                                                    SelectionDAG &DAG) const {
2727   SDValue Vec = Op.getOperand(0);
2728   SDValue SubVec = Op.getOperand(1);
2729   MVT VecVT = Vec.getSimpleValueType();
2730   MVT SubVecVT = SubVec.getSimpleValueType();
2731 
2732   SDLoc DL(Op);
2733   MVT XLenVT = Subtarget.getXLenVT();
2734   unsigned OrigIdx = Op.getConstantOperandVal(2);
2735   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
2736 
2737   // We don't have the ability to slide mask vectors up indexed by their i1
2738   // elements; the smallest we can do is i8. Often we are able to bitcast to
2739   // equivalent i8 vectors. Note that when inserting a fixed-length vector
2740   // into a scalable one, we might not necessarily have enough scalable
2741   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
2742   if (SubVecVT.getVectorElementType() == MVT::i1 &&
2743       (OrigIdx != 0 || !Vec.isUndef())) {
2744     if (VecVT.getVectorMinNumElements() >= 8 &&
2745         SubVecVT.getVectorMinNumElements() >= 8) {
2746       assert(OrigIdx % 8 == 0 && "Invalid index");
2747       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
2748              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
2749              "Unexpected mask vector lowering");
2750       OrigIdx /= 8;
2751       SubVecVT =
2752           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
2753                            SubVecVT.isScalableVector());
2754       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
2755                                VecVT.isScalableVector());
2756       Vec = DAG.getBitcast(VecVT, Vec);
2757       SubVec = DAG.getBitcast(SubVecVT, SubVec);
2758     } else {
2759       // We can't slide this mask vector up indexed by its i1 elements.
2760       // This poses a problem when we wish to insert a scalable vector which
2761       // can't be re-expressed as a larger type. Just choose the slow path and
2762       // extend to a larger type, then truncate back down.
2763       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
2764       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
2765       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
2766       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
2767       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
2768                         Op.getOperand(2));
2769       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
2770       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
2771     }
2772   }
2773 
2774   // If the subvector vector is a fixed-length type, we cannot use subregister
2775   // manipulation to simplify the codegen; we don't know which register of a
2776   // LMUL group contains the specific subvector as we only know the minimum
2777   // register size. Therefore we must slide the vector group up the full
2778   // amount.
2779   if (SubVecVT.isFixedLengthVector()) {
2780     if (OrigIdx == 0 && Vec.isUndef())
2781       return Op;
2782     MVT ContainerVT = VecVT;
2783     if (VecVT.isFixedLengthVector()) {
2784       ContainerVT = getContainerForFixedLengthVector(VecVT);
2785       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2786     }
2787     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
2788                          DAG.getUNDEF(ContainerVT), SubVec,
2789                          DAG.getConstant(0, DL, XLenVT));
2790     SDValue Mask =
2791         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
2792     // Set the vector length to only the number of elements we care about. Note
2793     // that for slideup this includes the offset.
2794     SDValue VL =
2795         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
2796     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
2797     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
2798                                   SubVec, SlideupAmt, Mask, VL);
2799     if (!VecVT.isFixedLengthVector())
2800       return Slideup;
2801     return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
2802   }
2803 
2804   unsigned SubRegIdx, RemIdx;
2805   std::tie(SubRegIdx, RemIdx) =
2806       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2807           VecVT, SubVecVT, OrigIdx, TRI);
2808 
2809   RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
2810   bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 ||
2811                          SubVecLMUL == RISCVVLMUL::LMUL_F4 ||
2812                          SubVecLMUL == RISCVVLMUL::LMUL_F8;
2813 
2814   // 1. If the Idx has been completely eliminated and this subvector's size is
2815   // a vector register or a multiple thereof, or the surrounding elements are
2816   // undef, then this is a subvector insert which naturally aligns to a vector
2817   // register. These can easily be handled using subregister manipulation.
2818   // 2. If the subvector is smaller than a vector register, then the insertion
2819   // must preserve the undisturbed elements of the register. We do this by
2820   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
2821   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
2822   // subvector within the vector register, and an INSERT_SUBVECTOR of that
2823   // LMUL=1 type back into the larger vector (resolving to another subregister
2824   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
2825   // to avoid allocating a large register group to hold our subvector.
2826   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
2827     return Op;
2828 
2829   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
2830   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
2831   // (in our case undisturbed). This means we can set up a subvector insertion
2832   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
2833   // size of the subvector.
2834   MVT InterSubVT = VecVT;
2835   SDValue AlignedExtract = Vec;
2836   unsigned AlignedIdx = OrigIdx - RemIdx;
2837   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
2838     InterSubVT = getLMUL1VT(VecVT);
2839     // Extract a subvector equal to the nearest full vector register type. This
2840     // should resolve to a EXTRACT_SUBREG instruction.
2841     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
2842                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
2843   }
2844 
2845   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
2846   // For scalable vectors this must be further multiplied by vscale.
2847   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
2848 
2849   SDValue Mask, VL;
2850   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
2851 
2852   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
2853   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
2854   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
2855   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
2856 
2857   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
2858                        DAG.getUNDEF(InterSubVT), SubVec,
2859                        DAG.getConstant(0, DL, XLenVT));
2860 
2861   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
2862                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
2863 
2864   // If required, insert this subvector back into the correct vector register.
2865   // This should resolve to an INSERT_SUBREG instruction.
2866   if (VecVT.bitsGT(InterSubVT))
2867     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
2868                           DAG.getConstant(AlignedIdx, DL, XLenVT));
2869 
2870   // We might have bitcast from a mask type: cast back to the original type if
2871   // required.
2872   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
2873 }
2874 
2875 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
2876                                                     SelectionDAG &DAG) const {
2877   SDValue Vec = Op.getOperand(0);
2878   MVT SubVecVT = Op.getSimpleValueType();
2879   MVT VecVT = Vec.getSimpleValueType();
2880 
2881   SDLoc DL(Op);
2882   MVT XLenVT = Subtarget.getXLenVT();
2883   unsigned OrigIdx = Op.getConstantOperandVal(1);
2884   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
2885 
2886   // We don't have the ability to slide mask vectors down indexed by their i1
2887   // elements; the smallest we can do is i8. Often we are able to bitcast to
2888   // equivalent i8 vectors. Note that when extracting a fixed-length vector
2889   // from a scalable one, we might not necessarily have enough scalable
2890   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
2891   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
2892     if (VecVT.getVectorMinNumElements() >= 8 &&
2893         SubVecVT.getVectorMinNumElements() >= 8) {
2894       assert(OrigIdx % 8 == 0 && "Invalid index");
2895       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
2896              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
2897              "Unexpected mask vector lowering");
2898       OrigIdx /= 8;
2899       SubVecVT =
2900           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
2901                            SubVecVT.isScalableVector());
2902       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
2903                                VecVT.isScalableVector());
2904       Vec = DAG.getBitcast(VecVT, Vec);
2905     } else {
2906       // We can't slide this mask vector down, indexed by its i1 elements.
2907       // This poses a problem when we wish to extract a scalable vector which
2908       // can't be re-expressed as a larger type. Just choose the slow path and
2909       // extend to a larger type, then truncate back down.
2910       // TODO: We could probably improve this when extracting certain fixed
2911       // from fixed, where we can extract as i8 and shift the correct element
2912       // right to reach the desired subvector?
2913       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
2914       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
2915       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
2916       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
2917                         Op.getOperand(1));
2918       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
2919       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
2920     }
2921   }
2922 
2923   // If the subvector vector is a fixed-length type, we cannot use subregister
2924   // manipulation to simplify the codegen; we don't know which register of a
2925   // LMUL group contains the specific subvector as we only know the minimum
2926   // register size. Therefore we must slide the vector group down the full
2927   // amount.
2928   if (SubVecVT.isFixedLengthVector()) {
2929     // With an index of 0 this is a cast-like subvector, which can be performed
2930     // with subregister operations.
2931     if (OrigIdx == 0)
2932       return Op;
2933     MVT ContainerVT = VecVT;
2934     if (VecVT.isFixedLengthVector()) {
2935       ContainerVT = getContainerForFixedLengthVector(VecVT);
2936       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2937     }
2938     SDValue Mask =
2939         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
2940     // Set the vector length to only the number of elements we care about. This
2941     // avoids sliding down elements we're going to discard straight away.
2942     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
2943     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
2944     SDValue Slidedown =
2945         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2946                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
2947     // Now we can use a cast-like subvector extract to get the result.
2948     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
2949                        DAG.getConstant(0, DL, XLenVT));
2950   }
2951 
2952   unsigned SubRegIdx, RemIdx;
2953   std::tie(SubRegIdx, RemIdx) =
2954       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2955           VecVT, SubVecVT, OrigIdx, TRI);
2956 
2957   // If the Idx has been completely eliminated then this is a subvector extract
2958   // which naturally aligns to a vector register. These can easily be handled
2959   // using subregister manipulation.
2960   if (RemIdx == 0)
2961     return Op;
2962 
2963   // Else we must shift our vector register directly to extract the subvector.
2964   // Do this using VSLIDEDOWN.
2965 
2966   // If the vector type is an LMUL-group type, extract a subvector equal to the
2967   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
2968   // instruction.
2969   MVT InterSubVT = VecVT;
2970   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
2971     InterSubVT = getLMUL1VT(VecVT);
2972     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
2973                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
2974   }
2975 
2976   // Slide this vector register down by the desired number of elements in order
2977   // to place the desired subvector starting at element 0.
2978   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
2979   // For scalable vectors this must be further multiplied by vscale.
2980   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
2981 
2982   SDValue Mask, VL;
2983   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
2984   SDValue Slidedown =
2985       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
2986                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
2987 
2988   // Now the vector is in the right position, extract our final subvector. This
2989   // should resolve to a COPY.
2990   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
2991                           DAG.getConstant(0, DL, XLenVT));
2992 
2993   // We might have bitcast from a mask type: cast back to the original type if
2994   // required.
2995   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
2996 }
2997 
2998 // Implement vector_reverse using vrgather.vv with indices determined by
2999 // subtracting the id of each element from (VLMAX-1). This will convert
3000 // the indices like so:
3001 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
3002 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
3003 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
3004                                                  SelectionDAG &DAG) const {
3005   SDLoc DL(Op);
3006   MVT VecVT = Op.getSimpleValueType();
3007   unsigned EltSize = VecVT.getScalarSizeInBits();
3008   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3009 
3010   unsigned MaxVLMAX = 0;
3011   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
3012   if (VectorBitsMax != 0)
3013     MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
3014 
3015   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
3016   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
3017 
3018   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
3019   // to use vrgatherei16.vv.
3020   // TODO: It's also possible to use vrgatherei16.vv for other types to
3021   // decrease register width for the index calculation.
3022   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
3023     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
3024     // Reverse each half, then reassemble them in reverse order.
3025     // NOTE: It's also possible that after splitting that VLMAX no longer
3026     // requires vrgatherei16.vv.
3027     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
3028       SDValue Lo, Hi;
3029       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
3030       EVT LoVT, HiVT;
3031       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
3032       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
3033       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
3034       // Reassemble the low and high pieces reversed.
3035       // FIXME: This is a CONCAT_VECTORS.
3036       SDValue Res =
3037           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
3038                       DAG.getIntPtrConstant(0, DL));
3039       return DAG.getNode(
3040           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
3041           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
3042     }
3043 
3044     // Just promote the int type to i16 which will double the LMUL.
3045     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
3046     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
3047   }
3048 
3049   MVT XLenVT = Subtarget.getXLenVT();
3050   SDValue Mask, VL;
3051   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
3052 
3053   // Calculate VLMAX-1 for the desired SEW.
3054   unsigned MinElts = VecVT.getVectorMinNumElements();
3055   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
3056                               DAG.getConstant(MinElts, DL, XLenVT));
3057   SDValue VLMinus1 =
3058       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
3059 
3060   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
3061   bool IsRV32E64 =
3062       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
3063   SDValue SplatVL;
3064   if (!IsRV32E64)
3065     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
3066   else
3067     SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
3068 
3069   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
3070   SDValue Indices =
3071       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
3072 
3073   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
3074 }
3075 
3076 SDValue
3077 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
3078                                                      SelectionDAG &DAG) const {
3079   auto *Load = cast<LoadSDNode>(Op);
3080 
3081   SDLoc DL(Op);
3082   MVT VT = Op.getSimpleValueType();
3083   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3084 
3085   SDValue VL =
3086       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
3087 
3088   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
3089   SDValue NewLoad = DAG.getMemIntrinsicNode(
3090       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
3091       Load->getMemoryVT(), Load->getMemOperand());
3092 
3093   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
3094   return DAG.getMergeValues({Result, Load->getChain()}, DL);
3095 }
3096 
3097 SDValue
3098 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
3099                                                       SelectionDAG &DAG) const {
3100   auto *Store = cast<StoreSDNode>(Op);
3101 
3102   SDLoc DL(Op);
3103   MVT VT = Store->getValue().getSimpleValueType();
3104 
3105   // FIXME: We probably need to zero any extra bits in a byte for mask stores.
3106   // This is tricky to do.
3107 
3108   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3109 
3110   SDValue VL =
3111       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
3112 
3113   SDValue NewValue =
3114       convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget);
3115   return DAG.getMemIntrinsicNode(
3116       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
3117       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
3118       Store->getMemoryVT(), Store->getMemOperand());
3119 }
3120 
3121 SDValue
3122 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
3123                                                       SelectionDAG &DAG) const {
3124   MVT InVT = Op.getOperand(0).getSimpleValueType();
3125   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
3126 
3127   MVT VT = Op.getSimpleValueType();
3128 
3129   SDValue Op1 =
3130       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
3131   SDValue Op2 =
3132       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
3133 
3134   SDLoc DL(Op);
3135   SDValue VL =
3136       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
3137 
3138   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3139 
3140   bool Invert = false;
3141   Optional<unsigned> LogicOpc;
3142   if (ContainerVT.isFloatingPoint()) {
3143     bool Swap = false;
3144     switch (CC) {
3145     default:
3146       break;
3147     case ISD::SETULE:
3148     case ISD::SETULT:
3149       Swap = true;
3150       LLVM_FALLTHROUGH;
3151     case ISD::SETUGE:
3152     case ISD::SETUGT:
3153       CC = getSetCCInverse(CC, ContainerVT);
3154       Invert = true;
3155       break;
3156     case ISD::SETOGE:
3157     case ISD::SETOGT:
3158     case ISD::SETGE:
3159     case ISD::SETGT:
3160       Swap = true;
3161       break;
3162     case ISD::SETUEQ:
3163       // Use !((OLT Op1, Op2) || (OLT Op2, Op1))
3164       Invert = true;
3165       LogicOpc = RISCVISD::VMOR_VL;
3166       CC = ISD::SETOLT;
3167       break;
3168     case ISD::SETONE:
3169       // Use ((OLT Op1, Op2) || (OLT Op2, Op1))
3170       LogicOpc = RISCVISD::VMOR_VL;
3171       CC = ISD::SETOLT;
3172       break;
3173     case ISD::SETO:
3174       // Use (OEQ Op1, Op1) && (OEQ Op2, Op2)
3175       LogicOpc = RISCVISD::VMAND_VL;
3176       CC = ISD::SETOEQ;
3177       break;
3178     case ISD::SETUO:
3179       // Use (UNE Op1, Op1) || (UNE Op2, Op2)
3180       LogicOpc = RISCVISD::VMOR_VL;
3181       CC = ISD::SETUNE;
3182       break;
3183     }
3184 
3185     if (Swap) {
3186       CC = getSetCCSwappedOperands(CC);
3187       std::swap(Op1, Op2);
3188     }
3189   }
3190 
3191   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3192   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3193 
3194   // There are 3 cases we need to emit.
3195   // 1. For (OEQ Op1, Op1) && (OEQ Op2, Op2) or (UNE Op1, Op1) || (UNE Op2, Op2)
3196   //    we need to compare each operand with itself.
3197   // 2. For (OLT Op1, Op2) || (OLT Op2, Op1) we need to compare Op1 and Op2 in
3198   //    both orders.
3199   // 3. For any other case we just need one compare with Op1 and Op2.
3200   SDValue Cmp;
3201   if (LogicOpc && (CC == ISD::SETOEQ || CC == ISD::SETUNE)) {
3202     Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1,
3203                       DAG.getCondCode(CC), Mask, VL);
3204     SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2,
3205                                DAG.getCondCode(CC), Mask, VL);
3206     Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL);
3207   } else {
3208     Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
3209                       DAG.getCondCode(CC), Mask, VL);
3210     if (LogicOpc) {
3211       SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1,
3212                                  DAG.getCondCode(CC), Mask, VL);
3213       Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL);
3214     }
3215   }
3216 
3217   if (Invert) {
3218     SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3219     Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL);
3220   }
3221 
3222   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
3223 }
3224 
3225 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
3226     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
3227   MVT VT = Op.getSimpleValueType();
3228 
3229   if (VT.getVectorElementType() == MVT::i1)
3230     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
3231 
3232   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
3233 }
3234 
3235 // Lower vector ABS to smax(X, sub(0, X)).
3236 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
3237   SDLoc DL(Op);
3238   MVT VT = Op.getSimpleValueType();
3239   SDValue X = Op.getOperand(0);
3240 
3241   assert(VT.isFixedLengthVector() && "Unexpected type");
3242 
3243   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3244   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
3245 
3246   SDValue Mask, VL;
3247   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3248 
3249   SDValue SplatZero =
3250       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
3251                   DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3252   SDValue NegX =
3253       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
3254   SDValue Max =
3255       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
3256 
3257   return convertFromScalableVector(VT, Max, DAG, Subtarget);
3258 }
3259 
3260 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
3261     SDValue Op, SelectionDAG &DAG) const {
3262   SDLoc DL(Op);
3263   MVT VT = Op.getSimpleValueType();
3264   SDValue Mag = Op.getOperand(0);
3265   SDValue Sign = Op.getOperand(1);
3266   assert(Mag.getValueType() == Sign.getValueType() &&
3267          "Can only handle COPYSIGN with matching types.");
3268 
3269   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3270   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
3271   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
3272 
3273   SDValue Mask, VL;
3274   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3275 
3276   SDValue CopySign =
3277       DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
3278 
3279   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
3280 }
3281 
3282 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
3283     SDValue Op, SelectionDAG &DAG) const {
3284   MVT VT = Op.getSimpleValueType();
3285   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3286 
3287   MVT I1ContainerVT =
3288       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3289 
3290   SDValue CC =
3291       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
3292   SDValue Op1 =
3293       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
3294   SDValue Op2 =
3295       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
3296 
3297   SDLoc DL(Op);
3298   SDValue Mask, VL;
3299   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3300 
3301   SDValue Select =
3302       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
3303 
3304   return convertFromScalableVector(VT, Select, DAG, Subtarget);
3305 }
3306 
3307 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
3308                                                unsigned NewOpc,
3309                                                bool HasMask) const {
3310   MVT VT = Op.getSimpleValueType();
3311   assert(useRVVForFixedLengthVectorVT(VT) &&
3312          "Only expected to lower fixed length vector operation!");
3313   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3314 
3315   // Create list of operands by converting existing ones to scalable types.
3316   SmallVector<SDValue, 6> Ops;
3317   for (const SDValue &V : Op->op_values()) {
3318     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
3319 
3320     // Pass through non-vector operands.
3321     if (!V.getValueType().isVector()) {
3322       Ops.push_back(V);
3323       continue;
3324     }
3325 
3326     // "cast" fixed length vector to a scalable vector.
3327     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
3328            "Only fixed length vectors are supported!");
3329     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
3330   }
3331 
3332   SDLoc DL(Op);
3333   SDValue Mask, VL;
3334   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3335   if (HasMask)
3336     Ops.push_back(Mask);
3337   Ops.push_back(VL);
3338 
3339   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
3340   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
3341 }
3342 
3343 // Returns the opcode of the target-specific SDNode that implements the 32-bit
3344 // form of the given Opcode.
3345 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
3346   switch (Opcode) {
3347   default:
3348     llvm_unreachable("Unexpected opcode");
3349   case ISD::SHL:
3350     return RISCVISD::SLLW;
3351   case ISD::SRA:
3352     return RISCVISD::SRAW;
3353   case ISD::SRL:
3354     return RISCVISD::SRLW;
3355   case ISD::SDIV:
3356     return RISCVISD::DIVW;
3357   case ISD::UDIV:
3358     return RISCVISD::DIVUW;
3359   case ISD::UREM:
3360     return RISCVISD::REMUW;
3361   case ISD::ROTL:
3362     return RISCVISD::ROLW;
3363   case ISD::ROTR:
3364     return RISCVISD::RORW;
3365   case RISCVISD::GREVI:
3366     return RISCVISD::GREVIW;
3367   case RISCVISD::GORCI:
3368     return RISCVISD::GORCIW;
3369   }
3370 }
3371 
3372 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
3373 // Because i32 isn't a legal type for RV64, these operations would otherwise
3374 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
3375 // later one because the fact the operation was originally of type i32 is
3376 // lost.
3377 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
3378                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
3379   SDLoc DL(N);
3380   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
3381   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
3382   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
3383   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
3384   // ReplaceNodeResults requires we maintain the same type for the return value.
3385   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
3386 }
3387 
3388 // Converts the given 32-bit operation to a i64 operation with signed extension
3389 // semantic to reduce the signed extension instructions.
3390 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
3391   SDLoc DL(N);
3392   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3393   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
3394   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
3395   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
3396                                DAG.getValueType(MVT::i32));
3397   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
3398 }
3399 
3400 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
3401                                              SmallVectorImpl<SDValue> &Results,
3402                                              SelectionDAG &DAG) const {
3403   SDLoc DL(N);
3404   switch (N->getOpcode()) {
3405   default:
3406     llvm_unreachable("Don't know how to custom type legalize this operation!");
3407   case ISD::STRICT_FP_TO_SINT:
3408   case ISD::STRICT_FP_TO_UINT:
3409   case ISD::FP_TO_SINT:
3410   case ISD::FP_TO_UINT: {
3411     bool IsStrict = N->isStrictFPOpcode();
3412     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3413            "Unexpected custom legalisation");
3414     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
3415     // If the FP type needs to be softened, emit a library call using the 'si'
3416     // version. If we left it to default legalization we'd end up with 'di'. If
3417     // the FP type doesn't need to be softened just let generic type
3418     // legalization promote the result type.
3419     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
3420         TargetLowering::TypeSoftenFloat)
3421       return;
3422     RTLIB::Libcall LC;
3423     if (N->getOpcode() == ISD::FP_TO_SINT ||
3424         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
3425       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
3426     else
3427       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
3428     MakeLibCallOptions CallOptions;
3429     EVT OpVT = Op0.getValueType();
3430     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
3431     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
3432     SDValue Result;
3433     std::tie(Result, Chain) =
3434         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
3435     Results.push_back(Result);
3436     if (IsStrict)
3437       Results.push_back(Chain);
3438     break;
3439   }
3440   case ISD::READCYCLECOUNTER: {
3441     assert(!Subtarget.is64Bit() &&
3442            "READCYCLECOUNTER only has custom type legalization on riscv32");
3443 
3444     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
3445     SDValue RCW =
3446         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
3447 
3448     Results.push_back(
3449         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
3450     Results.push_back(RCW.getValue(2));
3451     break;
3452   }
3453   case ISD::ADD:
3454   case ISD::SUB:
3455   case ISD::MUL:
3456     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3457            "Unexpected custom legalisation");
3458     if (N->getOperand(1).getOpcode() == ISD::Constant)
3459       return;
3460     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
3461     break;
3462   case ISD::SHL:
3463   case ISD::SRA:
3464   case ISD::SRL:
3465     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3466            "Unexpected custom legalisation");
3467     if (N->getOperand(1).getOpcode() == ISD::Constant)
3468       return;
3469     Results.push_back(customLegalizeToWOp(N, DAG));
3470     break;
3471   case ISD::ROTL:
3472   case ISD::ROTR:
3473     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3474            "Unexpected custom legalisation");
3475     Results.push_back(customLegalizeToWOp(N, DAG));
3476     break;
3477   case ISD::SDIV:
3478   case ISD::UDIV:
3479   case ISD::UREM: {
3480     MVT VT = N->getSimpleValueType(0);
3481     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
3482            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
3483            "Unexpected custom legalisation");
3484     if (N->getOperand(0).getOpcode() == ISD::Constant ||
3485         N->getOperand(1).getOpcode() == ISD::Constant)
3486       return;
3487 
3488     // If the input is i32, use ANY_EXTEND since the W instructions don't read
3489     // the upper 32 bits. For other types we need to sign or zero extend
3490     // based on the opcode.
3491     unsigned ExtOpc = ISD::ANY_EXTEND;
3492     if (VT != MVT::i32)
3493       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
3494                                            : ISD::ZERO_EXTEND;
3495 
3496     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
3497     break;
3498   }
3499   case ISD::UADDO:
3500   case ISD::USUBO: {
3501     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3502            "Unexpected custom legalisation");
3503     bool IsAdd = N->getOpcode() == ISD::UADDO;
3504     SDLoc DL(N);
3505     // Create an ADDW or SUBW.
3506     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3507     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
3508     SDValue Res =
3509         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
3510     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
3511                       DAG.getValueType(MVT::i32));
3512 
3513     // Sign extend the LHS and perform an unsigned compare with the ADDW result.
3514     // Since the inputs are sign extended from i32, this is equivalent to
3515     // comparing the lower 32 bits.
3516     LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
3517     SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
3518                                     IsAdd ? ISD::SETULT : ISD::SETUGT);
3519 
3520     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
3521     Results.push_back(Overflow);
3522     return;
3523   }
3524   case ISD::BITCAST: {
3525     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3526              Subtarget.hasStdExtF()) ||
3527             (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
3528            "Unexpected custom legalisation");
3529     SDValue Op0 = N->getOperand(0);
3530     if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
3531       if (Op0.getValueType() != MVT::f16)
3532         return;
3533       SDValue FPConv =
3534           DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
3535       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
3536     } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3537                Subtarget.hasStdExtF()) {
3538       if (Op0.getValueType() != MVT::f32)
3539         return;
3540       SDValue FPConv =
3541           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
3542       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
3543     }
3544     break;
3545   }
3546   case RISCVISD::GREVI:
3547   case RISCVISD::GORCI: {
3548     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3549            "Unexpected custom legalisation");
3550     // This is similar to customLegalizeToWOp, except that we pass the second
3551     // operand (a TargetConstant) straight through: it is already of type
3552     // XLenVT.
3553     SDLoc DL(N);
3554     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
3555     SDValue NewOp0 =
3556         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3557     SDValue NewRes =
3558         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
3559     // ReplaceNodeResults requires we maintain the same type for the return
3560     // value.
3561     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
3562     break;
3563   }
3564   case RISCVISD::SHFLI: {
3565     // There is no SHFLIW instruction, but we can just promote the operation.
3566     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3567            "Unexpected custom legalisation");
3568     SDLoc DL(N);
3569     SDValue NewOp0 =
3570         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3571     SDValue NewRes =
3572         DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1));
3573     // ReplaceNodeResults requires we maintain the same type for the return
3574     // value.
3575     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
3576     break;
3577   }
3578   case ISD::BSWAP:
3579   case ISD::BITREVERSE: {
3580     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3581            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
3582     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3583                                  N->getOperand(0));
3584     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
3585     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
3586                                  DAG.getTargetConstant(Imm, DL,
3587                                                        Subtarget.getXLenVT()));
3588     // ReplaceNodeResults requires we maintain the same type for the return
3589     // value.
3590     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
3591     break;
3592   }
3593   case ISD::FSHL:
3594   case ISD::FSHR: {
3595     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3596            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
3597     SDValue NewOp0 =
3598         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3599     SDValue NewOp1 =
3600         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
3601     SDValue NewOp2 =
3602         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
3603     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
3604     // Mask the shift amount to 5 bits.
3605     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
3606                          DAG.getConstant(0x1f, DL, MVT::i64));
3607     unsigned Opc =
3608         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
3609     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
3610     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
3611     break;
3612   }
3613   case ISD::EXTRACT_VECTOR_ELT: {
3614     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
3615     // type is illegal (currently only vXi64 RV32).
3616     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
3617     // transferred to the destination register. We issue two of these from the
3618     // upper- and lower- halves of the SEW-bit vector element, slid down to the
3619     // first element.
3620     SDLoc DL(N);
3621     SDValue Vec = N->getOperand(0);
3622     SDValue Idx = N->getOperand(1);
3623 
3624     // The vector type hasn't been legalized yet so we can't issue target
3625     // specific nodes if it needs legalization.
3626     // FIXME: We would manually legalize if it's important.
3627     if (!isTypeLegal(Vec.getValueType()))
3628       return;
3629 
3630     MVT VecVT = Vec.getSimpleValueType();
3631 
3632     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
3633            VecVT.getVectorElementType() == MVT::i64 &&
3634            "Unexpected EXTRACT_VECTOR_ELT legalization");
3635 
3636     // If this is a fixed vector, we need to convert it to a scalable vector.
3637     MVT ContainerVT = VecVT;
3638     if (VecVT.isFixedLengthVector()) {
3639       ContainerVT = getContainerForFixedLengthVector(VecVT);
3640       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3641     }
3642 
3643     MVT XLenVT = Subtarget.getXLenVT();
3644 
3645     // Use a VL of 1 to avoid processing more elements than we need.
3646     MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
3647     SDValue VL = DAG.getConstant(1, DL, XLenVT);
3648     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3649 
3650     // Unless the index is known to be 0, we must slide the vector down to get
3651     // the desired element into index 0.
3652     if (!isNullConstant(Idx)) {
3653       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3654                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
3655     }
3656 
3657     // Extract the lower XLEN bits of the correct vector element.
3658     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
3659 
3660     // To extract the upper XLEN bits of the vector element, shift the first
3661     // element right by 32 bits and re-extract the lower XLEN bits.
3662     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
3663                                      DAG.getConstant(32, DL, XLenVT), VL);
3664     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
3665                                  ThirtyTwoV, Mask, VL);
3666 
3667     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
3668 
3669     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
3670     break;
3671   }
3672   case ISD::INTRINSIC_WO_CHAIN: {
3673     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3674     switch (IntNo) {
3675     default:
3676       llvm_unreachable(
3677           "Don't know how to custom type legalize this intrinsic!");
3678     case Intrinsic::riscv_vmv_x_s: {
3679       EVT VT = N->getValueType(0);
3680       MVT XLenVT = Subtarget.getXLenVT();
3681       if (VT.bitsLT(XLenVT)) {
3682         // Simple case just extract using vmv.x.s and truncate.
3683         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
3684                                       Subtarget.getXLenVT(), N->getOperand(1));
3685         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
3686         return;
3687       }
3688 
3689       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
3690              "Unexpected custom legalization");
3691 
3692       // We need to do the move in two steps.
3693       SDValue Vec = N->getOperand(1);
3694       MVT VecVT = Vec.getSimpleValueType();
3695 
3696       // First extract the lower XLEN bits of the element.
3697       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
3698 
3699       // To extract the upper XLEN bits of the vector element, shift the first
3700       // element right by 32 bits and re-extract the lower XLEN bits.
3701       SDValue VL = DAG.getConstant(1, DL, XLenVT);
3702       MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
3703       SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3704       SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
3705                                        DAG.getConstant(32, DL, XLenVT), VL);
3706       SDValue LShr32 =
3707           DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
3708       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
3709 
3710       Results.push_back(
3711           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
3712       break;
3713     }
3714     }
3715     break;
3716   }
3717   case ISD::VECREDUCE_ADD:
3718   case ISD::VECREDUCE_AND:
3719   case ISD::VECREDUCE_OR:
3720   case ISD::VECREDUCE_XOR:
3721   case ISD::VECREDUCE_SMAX:
3722   case ISD::VECREDUCE_UMAX:
3723   case ISD::VECREDUCE_SMIN:
3724   case ISD::VECREDUCE_UMIN:
3725     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
3726       Results.push_back(V);
3727     break;
3728   }
3729 }
3730 
3731 // A structure to hold one of the bit-manipulation patterns below. Together, a
3732 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
3733 //   (or (and (shl x, 1), 0xAAAAAAAA),
3734 //       (and (srl x, 1), 0x55555555))
3735 struct RISCVBitmanipPat {
3736   SDValue Op;
3737   unsigned ShAmt;
3738   bool IsSHL;
3739 
3740   bool formsPairWith(const RISCVBitmanipPat &Other) const {
3741     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
3742   }
3743 };
3744 
3745 // Matches patterns of the form
3746 //   (and (shl x, C2), (C1 << C2))
3747 //   (and (srl x, C2), C1)
3748 //   (shl (and x, C1), C2)
3749 //   (srl (and x, (C1 << C2)), C2)
3750 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
3751 // The expected masks for each shift amount are specified in BitmanipMasks where
3752 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
3753 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
3754 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
3755 // XLen is 64.
3756 static Optional<RISCVBitmanipPat>
3757 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
3758   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
3759          "Unexpected number of masks");
3760   Optional<uint64_t> Mask;
3761   // Optionally consume a mask around the shift operation.
3762   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
3763     Mask = Op.getConstantOperandVal(1);
3764     Op = Op.getOperand(0);
3765   }
3766   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
3767     return None;
3768   bool IsSHL = Op.getOpcode() == ISD::SHL;
3769 
3770   if (!isa<ConstantSDNode>(Op.getOperand(1)))
3771     return None;
3772   uint64_t ShAmt = Op.getConstantOperandVal(1);
3773 
3774   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
3775   if (ShAmt >= Width && !isPowerOf2_64(ShAmt))
3776     return None;
3777   // If we don't have enough masks for 64 bit, then we must be trying to
3778   // match SHFL so we're only allowed to shift 1/4 of the width.
3779   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
3780     return None;
3781 
3782   SDValue Src = Op.getOperand(0);
3783 
3784   // The expected mask is shifted left when the AND is found around SHL
3785   // patterns.
3786   //   ((x >> 1) & 0x55555555)
3787   //   ((x << 1) & 0xAAAAAAAA)
3788   bool SHLExpMask = IsSHL;
3789 
3790   if (!Mask) {
3791     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
3792     // the mask is all ones: consume that now.
3793     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
3794       Mask = Src.getConstantOperandVal(1);
3795       Src = Src.getOperand(0);
3796       // The expected mask is now in fact shifted left for SRL, so reverse the
3797       // decision.
3798       //   ((x & 0xAAAAAAAA) >> 1)
3799       //   ((x & 0x55555555) << 1)
3800       SHLExpMask = !SHLExpMask;
3801     } else {
3802       // Use a default shifted mask of all-ones if there's no AND, truncated
3803       // down to the expected width. This simplifies the logic later on.
3804       Mask = maskTrailingOnes<uint64_t>(Width);
3805       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
3806     }
3807   }
3808 
3809   unsigned MaskIdx = Log2_32(ShAmt);
3810   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
3811 
3812   if (SHLExpMask)
3813     ExpMask <<= ShAmt;
3814 
3815   if (Mask != ExpMask)
3816     return None;
3817 
3818   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
3819 }
3820 
3821 // Matches any of the following bit-manipulation patterns:
3822 //   (and (shl x, 1), (0x55555555 << 1))
3823 //   (and (srl x, 1), 0x55555555)
3824 //   (shl (and x, 0x55555555), 1)
3825 //   (srl (and x, (0x55555555 << 1)), 1)
3826 // where the shift amount and mask may vary thus:
3827 //   [1]  = 0x55555555 / 0xAAAAAAAA
3828 //   [2]  = 0x33333333 / 0xCCCCCCCC
3829 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
3830 //   [8]  = 0x00FF00FF / 0xFF00FF00
3831 //   [16] = 0x0000FFFF / 0xFFFFFFFF
3832 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
3833 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
3834   // These are the unshifted masks which we use to match bit-manipulation
3835   // patterns. They may be shifted left in certain circumstances.
3836   static const uint64_t BitmanipMasks[] = {
3837       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
3838       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
3839 
3840   return matchRISCVBitmanipPat(Op, BitmanipMasks);
3841 }
3842 
3843 // Match the following pattern as a GREVI(W) operation
3844 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
3845 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
3846                                const RISCVSubtarget &Subtarget) {
3847   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
3848   EVT VT = Op.getValueType();
3849 
3850   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
3851     auto LHS = matchGREVIPat(Op.getOperand(0));
3852     auto RHS = matchGREVIPat(Op.getOperand(1));
3853     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
3854       SDLoc DL(Op);
3855       return DAG.getNode(
3856           RISCVISD::GREVI, DL, VT, LHS->Op,
3857           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
3858     }
3859   }
3860   return SDValue();
3861 }
3862 
3863 // Matches any the following pattern as a GORCI(W) operation
3864 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
3865 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
3866 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
3867 // Note that with the variant of 3.,
3868 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
3869 // the inner pattern will first be matched as GREVI and then the outer
3870 // pattern will be matched to GORC via the first rule above.
3871 // 4.  (or (rotl/rotr x, bitwidth/2), x)
3872 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
3873                                const RISCVSubtarget &Subtarget) {
3874   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
3875   EVT VT = Op.getValueType();
3876 
3877   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
3878     SDLoc DL(Op);
3879     SDValue Op0 = Op.getOperand(0);
3880     SDValue Op1 = Op.getOperand(1);
3881 
3882     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
3883       if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
3884           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
3885         return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
3886       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
3887       if ((Reverse.getOpcode() == ISD::ROTL ||
3888            Reverse.getOpcode() == ISD::ROTR) &&
3889           Reverse.getOperand(0) == X &&
3890           isa<ConstantSDNode>(Reverse.getOperand(1))) {
3891         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
3892         if (RotAmt == (VT.getSizeInBits() / 2))
3893           return DAG.getNode(
3894               RISCVISD::GORCI, DL, VT, X,
3895               DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
3896       }
3897       return SDValue();
3898     };
3899 
3900     // Check for either commutable permutation of (or (GREVI x, shamt), x)
3901     if (SDValue V = MatchOROfReverse(Op0, Op1))
3902       return V;
3903     if (SDValue V = MatchOROfReverse(Op1, Op0))
3904       return V;
3905 
3906     // OR is commutable so canonicalize its OR operand to the left
3907     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
3908       std::swap(Op0, Op1);
3909     if (Op0.getOpcode() != ISD::OR)
3910       return SDValue();
3911     SDValue OrOp0 = Op0.getOperand(0);
3912     SDValue OrOp1 = Op0.getOperand(1);
3913     auto LHS = matchGREVIPat(OrOp0);
3914     // OR is commutable so swap the operands and try again: x might have been
3915     // on the left
3916     if (!LHS) {
3917       std::swap(OrOp0, OrOp1);
3918       LHS = matchGREVIPat(OrOp0);
3919     }
3920     auto RHS = matchGREVIPat(Op1);
3921     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
3922       return DAG.getNode(
3923           RISCVISD::GORCI, DL, VT, LHS->Op,
3924           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
3925     }
3926   }
3927   return SDValue();
3928 }
3929 
3930 // Matches any of the following bit-manipulation patterns:
3931 //   (and (shl x, 1), (0x22222222 << 1))
3932 //   (and (srl x, 1), 0x22222222)
3933 //   (shl (and x, 0x22222222), 1)
3934 //   (srl (and x, (0x22222222 << 1)), 1)
3935 // where the shift amount and mask may vary thus:
3936 //   [1]  = 0x22222222 / 0x44444444
3937 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
3938 //   [4]  = 0x00F000F0 / 0x0F000F00
3939 //   [8]  = 0x0000FF00 / 0x00FF0000
3940 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
3941 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
3942   // These are the unshifted masks which we use to match bit-manipulation
3943   // patterns. They may be shifted left in certain circumstances.
3944   static const uint64_t BitmanipMasks[] = {
3945       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
3946       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
3947 
3948   return matchRISCVBitmanipPat(Op, BitmanipMasks);
3949 }
3950 
3951 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
3952 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
3953                                const RISCVSubtarget &Subtarget) {
3954   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
3955   EVT VT = Op.getValueType();
3956 
3957   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
3958     return SDValue();
3959 
3960   SDValue Op0 = Op.getOperand(0);
3961   SDValue Op1 = Op.getOperand(1);
3962 
3963   // Or is commutable so canonicalize the second OR to the LHS.
3964   if (Op0.getOpcode() != ISD::OR)
3965     std::swap(Op0, Op1);
3966   if (Op0.getOpcode() != ISD::OR)
3967     return SDValue();
3968 
3969   // We found an inner OR, so our operands are the operands of the inner OR
3970   // and the other operand of the outer OR.
3971   SDValue A = Op0.getOperand(0);
3972   SDValue B = Op0.getOperand(1);
3973   SDValue C = Op1;
3974 
3975   auto Match1 = matchSHFLPat(A);
3976   auto Match2 = matchSHFLPat(B);
3977 
3978   // If neither matched, we failed.
3979   if (!Match1 && !Match2)
3980     return SDValue();
3981 
3982   // We had at least one match. if one failed, try the remaining C operand.
3983   if (!Match1) {
3984     std::swap(A, C);
3985     Match1 = matchSHFLPat(A);
3986     if (!Match1)
3987       return SDValue();
3988   } else if (!Match2) {
3989     std::swap(B, C);
3990     Match2 = matchSHFLPat(B);
3991     if (!Match2)
3992       return SDValue();
3993   }
3994   assert(Match1 && Match2);
3995 
3996   // Make sure our matches pair up.
3997   if (!Match1->formsPairWith(*Match2))
3998     return SDValue();
3999 
4000   // All the remains is to make sure C is an AND with the same input, that masks
4001   // out the bits that are being shuffled.
4002   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
4003       C.getOperand(0) != Match1->Op)
4004     return SDValue();
4005 
4006   uint64_t Mask = C.getConstantOperandVal(1);
4007 
4008   static const uint64_t BitmanipMasks[] = {
4009       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
4010       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
4011   };
4012 
4013   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
4014   unsigned MaskIdx = Log2_32(Match1->ShAmt);
4015   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
4016 
4017   if (Mask != ExpMask)
4018     return SDValue();
4019 
4020   SDLoc DL(Op);
4021   return DAG.getNode(
4022       RISCVISD::SHFLI, DL, VT, Match1->Op,
4023       DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT()));
4024 }
4025 
4026 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
4027 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
4028 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
4029 // not undo itself, but they are redundant.
4030 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
4031   unsigned ShAmt1 = N->getConstantOperandVal(1);
4032   SDValue Src = N->getOperand(0);
4033 
4034   if (Src.getOpcode() != N->getOpcode())
4035     return SDValue();
4036 
4037   unsigned ShAmt2 = Src.getConstantOperandVal(1);
4038   Src = Src.getOperand(0);
4039 
4040   unsigned CombinedShAmt;
4041   if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
4042     CombinedShAmt = ShAmt1 | ShAmt2;
4043   else
4044     CombinedShAmt = ShAmt1 ^ ShAmt2;
4045 
4046   if (CombinedShAmt == 0)
4047     return Src;
4048 
4049   SDLoc DL(N);
4050   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
4051                      DAG.getTargetConstant(CombinedShAmt, DL,
4052                                            N->getOperand(1).getValueType()));
4053 }
4054 
4055 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
4056                                                DAGCombinerInfo &DCI) const {
4057   SelectionDAG &DAG = DCI.DAG;
4058 
4059   switch (N->getOpcode()) {
4060   default:
4061     break;
4062   case RISCVISD::SplitF64: {
4063     SDValue Op0 = N->getOperand(0);
4064     // If the input to SplitF64 is just BuildPairF64 then the operation is
4065     // redundant. Instead, use BuildPairF64's operands directly.
4066     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
4067       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
4068 
4069     SDLoc DL(N);
4070 
4071     // It's cheaper to materialise two 32-bit integers than to load a double
4072     // from the constant pool and transfer it to integer registers through the
4073     // stack.
4074     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
4075       APInt V = C->getValueAPF().bitcastToAPInt();
4076       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
4077       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
4078       return DCI.CombineTo(N, Lo, Hi);
4079     }
4080 
4081     // This is a target-specific version of a DAGCombine performed in
4082     // DAGCombiner::visitBITCAST. It performs the equivalent of:
4083     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
4084     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
4085     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
4086         !Op0.getNode()->hasOneUse())
4087       break;
4088     SDValue NewSplitF64 =
4089         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
4090                     Op0.getOperand(0));
4091     SDValue Lo = NewSplitF64.getValue(0);
4092     SDValue Hi = NewSplitF64.getValue(1);
4093     APInt SignBit = APInt::getSignMask(32);
4094     if (Op0.getOpcode() == ISD::FNEG) {
4095       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
4096                                   DAG.getConstant(SignBit, DL, MVT::i32));
4097       return DCI.CombineTo(N, Lo, NewHi);
4098     }
4099     assert(Op0.getOpcode() == ISD::FABS);
4100     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
4101                                 DAG.getConstant(~SignBit, DL, MVT::i32));
4102     return DCI.CombineTo(N, Lo, NewHi);
4103   }
4104   case RISCVISD::SLLW:
4105   case RISCVISD::SRAW:
4106   case RISCVISD::SRLW:
4107   case RISCVISD::ROLW:
4108   case RISCVISD::RORW: {
4109     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
4110     SDValue LHS = N->getOperand(0);
4111     SDValue RHS = N->getOperand(1);
4112     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
4113     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
4114     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
4115         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
4116       if (N->getOpcode() != ISD::DELETED_NODE)
4117         DCI.AddToWorklist(N);
4118       return SDValue(N, 0);
4119     }
4120     break;
4121   }
4122   case RISCVISD::FSL:
4123   case RISCVISD::FSR: {
4124     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
4125     SDValue ShAmt = N->getOperand(2);
4126     unsigned BitWidth = ShAmt.getValueSizeInBits();
4127     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
4128     APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
4129     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
4130       if (N->getOpcode() != ISD::DELETED_NODE)
4131         DCI.AddToWorklist(N);
4132       return SDValue(N, 0);
4133     }
4134     break;
4135   }
4136   case RISCVISD::FSLW:
4137   case RISCVISD::FSRW: {
4138     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
4139     // read.
4140     SDValue Op0 = N->getOperand(0);
4141     SDValue Op1 = N->getOperand(1);
4142     SDValue ShAmt = N->getOperand(2);
4143     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
4144     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
4145     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
4146         SimplifyDemandedBits(Op1, OpMask, DCI) ||
4147         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
4148       if (N->getOpcode() != ISD::DELETED_NODE)
4149         DCI.AddToWorklist(N);
4150       return SDValue(N, 0);
4151     }
4152     break;
4153   }
4154   case RISCVISD::GREVIW:
4155   case RISCVISD::GORCIW: {
4156     // Only the lower 32 bits of the first operand are read
4157     SDValue Op0 = N->getOperand(0);
4158     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
4159     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
4160       if (N->getOpcode() != ISD::DELETED_NODE)
4161         DCI.AddToWorklist(N);
4162       return SDValue(N, 0);
4163     }
4164 
4165     return combineGREVI_GORCI(N, DCI.DAG);
4166   }
4167   case RISCVISD::FMV_X_ANYEXTW_RV64: {
4168     SDLoc DL(N);
4169     SDValue Op0 = N->getOperand(0);
4170     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
4171     // conversion is unnecessary and can be replaced with an ANY_EXTEND
4172     // of the FMV_W_X_RV64 operand.
4173     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
4174       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
4175              "Unexpected value type!");
4176       return Op0.getOperand(0);
4177     }
4178 
4179     // This is a target-specific version of a DAGCombine performed in
4180     // DAGCombiner::visitBITCAST. It performs the equivalent of:
4181     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
4182     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
4183     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
4184         !Op0.getNode()->hasOneUse())
4185       break;
4186     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
4187                                  Op0.getOperand(0));
4188     APInt SignBit = APInt::getSignMask(32).sext(64);
4189     if (Op0.getOpcode() == ISD::FNEG)
4190       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
4191                          DAG.getConstant(SignBit, DL, MVT::i64));
4192 
4193     assert(Op0.getOpcode() == ISD::FABS);
4194     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
4195                        DAG.getConstant(~SignBit, DL, MVT::i64));
4196   }
4197   case RISCVISD::GREVI:
4198   case RISCVISD::GORCI:
4199     return combineGREVI_GORCI(N, DCI.DAG);
4200   case ISD::OR:
4201     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
4202       return GREV;
4203     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
4204       return GORC;
4205     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget))
4206       return SHFL;
4207     break;
4208   case RISCVISD::SELECT_CC: {
4209     // Transform
4210     SDValue LHS = N->getOperand(0);
4211     SDValue RHS = N->getOperand(1);
4212     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
4213     if (!ISD::isIntEqualitySetCC(CCVal))
4214       break;
4215 
4216     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
4217     //      (select_cc X, Y, lt, trueV, falseV)
4218     // Sometimes the setcc is introduced after select_cc has been formed.
4219     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
4220         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
4221       // If we're looking for eq 0 instead of ne 0, we need to invert the
4222       // condition.
4223       bool Invert = CCVal == ISD::SETEQ;
4224       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
4225       if (Invert)
4226         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
4227 
4228       SDLoc DL(N);
4229       RHS = LHS.getOperand(1);
4230       LHS = LHS.getOperand(0);
4231       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
4232 
4233       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
4234       return DAG.getNode(
4235           RISCVISD::SELECT_CC, DL, N->getValueType(0),
4236           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
4237     }
4238 
4239     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
4240     //      (select_cc X, Y, eq/ne, trueV, falseV)
4241     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
4242       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
4243                          {LHS.getOperand(0), LHS.getOperand(1),
4244                           N->getOperand(2), N->getOperand(3),
4245                           N->getOperand(4)});
4246     // (select_cc X, 1, setne, trueV, falseV) ->
4247     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
4248     // This can occur when legalizing some floating point comparisons.
4249     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
4250     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
4251       SDLoc DL(N);
4252       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
4253       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
4254       RHS = DAG.getConstant(0, DL, LHS.getValueType());
4255       return DAG.getNode(
4256           RISCVISD::SELECT_CC, DL, N->getValueType(0),
4257           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
4258     }
4259 
4260     break;
4261   }
4262   case RISCVISD::BR_CC: {
4263     SDValue LHS = N->getOperand(1);
4264     SDValue RHS = N->getOperand(2);
4265     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
4266     if (!ISD::isIntEqualitySetCC(CCVal))
4267       break;
4268 
4269     // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
4270     //      (br_cc X, Y, lt, dest)
4271     // Sometimes the setcc is introduced after br_cc has been formed.
4272     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
4273         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
4274       // If we're looking for eq 0 instead of ne 0, we need to invert the
4275       // condition.
4276       bool Invert = CCVal == ISD::SETEQ;
4277       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
4278       if (Invert)
4279         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
4280 
4281       SDLoc DL(N);
4282       RHS = LHS.getOperand(1);
4283       LHS = LHS.getOperand(0);
4284       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
4285 
4286       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
4287                          N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
4288                          N->getOperand(4));
4289     }
4290 
4291     // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
4292     //      (br_cc X, Y, eq/ne, trueV, falseV)
4293     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
4294       return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
4295                          N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
4296                          N->getOperand(3), N->getOperand(4));
4297 
4298     // (br_cc X, 1, setne, br_cc) ->
4299     // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
4300     // This can occur when legalizing some floating point comparisons.
4301     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
4302     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
4303       SDLoc DL(N);
4304       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
4305       SDValue TargetCC = DAG.getCondCode(CCVal);
4306       RHS = DAG.getConstant(0, DL, LHS.getValueType());
4307       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
4308                          N->getOperand(0), LHS, RHS, TargetCC,
4309                          N->getOperand(4));
4310     }
4311     break;
4312   }
4313   case ISD::FCOPYSIGN: {
4314     EVT VT = N->getValueType(0);
4315     if (!VT.isVector())
4316       break;
4317     // There is a form of VFSGNJ which injects the negated sign of its second
4318     // operand. Try and bubble any FNEG up after the extend/round to produce
4319     // this optimized pattern. Avoid modifying cases where FP_ROUND and
4320     // TRUNC=1.
4321     SDValue In2 = N->getOperand(1);
4322     // Avoid cases where the extend/round has multiple uses, as duplicating
4323     // those is typically more expensive than removing a fneg.
4324     if (!In2.hasOneUse())
4325       break;
4326     if (In2.getOpcode() != ISD::FP_EXTEND &&
4327         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
4328       break;
4329     In2 = In2.getOperand(0);
4330     if (In2.getOpcode() != ISD::FNEG)
4331       break;
4332     SDLoc DL(N);
4333     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
4334     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
4335                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
4336   }
4337   }
4338 
4339   return SDValue();
4340 }
4341 
4342 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
4343     const SDNode *N, CombineLevel Level) const {
4344   // The following folds are only desirable if `(OP _, c1 << c2)` can be
4345   // materialised in fewer instructions than `(OP _, c1)`:
4346   //
4347   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
4348   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
4349   SDValue N0 = N->getOperand(0);
4350   EVT Ty = N0.getValueType();
4351   if (Ty.isScalarInteger() &&
4352       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
4353     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
4354     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
4355     if (C1 && C2) {
4356       const APInt &C1Int = C1->getAPIntValue();
4357       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
4358 
4359       // We can materialise `c1 << c2` into an add immediate, so it's "free",
4360       // and the combine should happen, to potentially allow further combines
4361       // later.
4362       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
4363           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
4364         return true;
4365 
4366       // We can materialise `c1` in an add immediate, so it's "free", and the
4367       // combine should be prevented.
4368       if (C1Int.getMinSignedBits() <= 64 &&
4369           isLegalAddImmediate(C1Int.getSExtValue()))
4370         return false;
4371 
4372       // Neither constant will fit into an immediate, so find materialisation
4373       // costs.
4374       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
4375                                               Subtarget.is64Bit());
4376       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
4377           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
4378 
4379       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
4380       // combine should be prevented.
4381       if (C1Cost < ShiftedC1Cost)
4382         return false;
4383     }
4384   }
4385   return true;
4386 }
4387 
4388 bool RISCVTargetLowering::targetShrinkDemandedConstant(
4389     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4390     TargetLoweringOpt &TLO) const {
4391   // Delay this optimization as late as possible.
4392   if (!TLO.LegalOps)
4393     return false;
4394 
4395   EVT VT = Op.getValueType();
4396   if (VT.isVector())
4397     return false;
4398 
4399   // Only handle AND for now.
4400   if (Op.getOpcode() != ISD::AND)
4401     return false;
4402 
4403   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4404   if (!C)
4405     return false;
4406 
4407   const APInt &Mask = C->getAPIntValue();
4408 
4409   // Clear all non-demanded bits initially.
4410   APInt ShrunkMask = Mask & DemandedBits;
4411 
4412   // If the shrunk mask fits in sign extended 12 bits, let the target
4413   // independent code apply it.
4414   if (ShrunkMask.isSignedIntN(12))
4415     return false;
4416 
4417   // Try to make a smaller immediate by setting undemanded bits.
4418 
4419   // We need to be able to make a negative number through a combination of mask
4420   // and undemanded bits.
4421   APInt ExpandedMask = Mask | ~DemandedBits;
4422   if (!ExpandedMask.isNegative())
4423     return false;
4424 
4425   // What is the fewest number of bits we need to represent the negative number.
4426   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
4427 
4428   // Try to make a 12 bit negative immediate. If that fails try to make a 32
4429   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
4430   APInt NewMask = ShrunkMask;
4431   if (MinSignedBits <= 12)
4432     NewMask.setBitsFrom(11);
4433   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
4434     NewMask.setBitsFrom(31);
4435   else
4436     return false;
4437 
4438   // Sanity check that our new mask is a subset of the demanded mask.
4439   assert(NewMask.isSubsetOf(ExpandedMask));
4440 
4441   // If we aren't changing the mask, just return true to keep it and prevent
4442   // the caller from optimizing.
4443   if (NewMask == Mask)
4444     return true;
4445 
4446   // Replace the constant with the new mask.
4447   SDLoc DL(Op);
4448   SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
4449   SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
4450   return TLO.CombineTo(Op, NewOp);
4451 }
4452 
4453 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
4454                                                         KnownBits &Known,
4455                                                         const APInt &DemandedElts,
4456                                                         const SelectionDAG &DAG,
4457                                                         unsigned Depth) const {
4458   unsigned BitWidth = Known.getBitWidth();
4459   unsigned Opc = Op.getOpcode();
4460   assert((Opc >= ISD::BUILTIN_OP_END ||
4461           Opc == ISD::INTRINSIC_WO_CHAIN ||
4462           Opc == ISD::INTRINSIC_W_CHAIN ||
4463           Opc == ISD::INTRINSIC_VOID) &&
4464          "Should use MaskedValueIsZero if you don't know whether Op"
4465          " is a target node!");
4466 
4467   Known.resetAll();
4468   switch (Opc) {
4469   default: break;
4470   case RISCVISD::SELECT_CC: {
4471     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
4472     // If we don't know any bits, early out.
4473     if (Known.isUnknown())
4474       break;
4475     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
4476 
4477     // Only known if known in both the LHS and RHS.
4478     Known = KnownBits::commonBits(Known, Known2);
4479     break;
4480   }
4481   case RISCVISD::REMUW: {
4482     KnownBits Known2;
4483     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
4484     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
4485     // We only care about the lower 32 bits.
4486     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
4487     // Restore the original width by sign extending.
4488     Known = Known.sext(BitWidth);
4489     break;
4490   }
4491   case RISCVISD::DIVUW: {
4492     KnownBits Known2;
4493     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
4494     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
4495     // We only care about the lower 32 bits.
4496     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
4497     // Restore the original width by sign extending.
4498     Known = Known.sext(BitWidth);
4499     break;
4500   }
4501   case RISCVISD::READ_VLENB:
4502     // We assume VLENB is at least 8 bytes.
4503     // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
4504     Known.Zero.setLowBits(3);
4505     break;
4506   }
4507 }
4508 
4509 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
4510     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4511     unsigned Depth) const {
4512   switch (Op.getOpcode()) {
4513   default:
4514     break;
4515   case RISCVISD::SLLW:
4516   case RISCVISD::SRAW:
4517   case RISCVISD::SRLW:
4518   case RISCVISD::DIVW:
4519   case RISCVISD::DIVUW:
4520   case RISCVISD::REMUW:
4521   case RISCVISD::ROLW:
4522   case RISCVISD::RORW:
4523   case RISCVISD::GREVIW:
4524   case RISCVISD::GORCIW:
4525   case RISCVISD::FSLW:
4526   case RISCVISD::FSRW:
4527     // TODO: As the result is sign-extended, this is conservatively correct. A
4528     // more precise answer could be calculated for SRAW depending on known
4529     // bits in the shift amount.
4530     return 33;
4531   case RISCVISD::SHFLI: {
4532     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
4533     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
4534     // will stay within the upper 32 bits. If there were more than 32 sign bits
4535     // before there will be at least 33 sign bits after.
4536     if (Op.getValueType() == MVT::i64 &&
4537         (Op.getConstantOperandVal(1) & 0x10) == 0) {
4538       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
4539       if (Tmp > 32)
4540         return 33;
4541     }
4542     break;
4543   }
4544   case RISCVISD::VMV_X_S:
4545     // The number of sign bits of the scalar result is computed by obtaining the
4546     // element type of the input vector operand, subtracting its width from the
4547     // XLEN, and then adding one (sign bit within the element type). If the
4548     // element type is wider than XLen, the least-significant XLEN bits are
4549     // taken.
4550     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
4551       return 1;
4552     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
4553   }
4554 
4555   return 1;
4556 }
4557 
4558 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
4559                                                   MachineBasicBlock *BB) {
4560   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
4561 
4562   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
4563   // Should the count have wrapped while it was being read, we need to try
4564   // again.
4565   // ...
4566   // read:
4567   // rdcycleh x3 # load high word of cycle
4568   // rdcycle  x2 # load low word of cycle
4569   // rdcycleh x4 # load high word of cycle
4570   // bne x3, x4, read # check if high word reads match, otherwise try again
4571   // ...
4572 
4573   MachineFunction &MF = *BB->getParent();
4574   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4575   MachineFunction::iterator It = ++BB->getIterator();
4576 
4577   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
4578   MF.insert(It, LoopMBB);
4579 
4580   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
4581   MF.insert(It, DoneMBB);
4582 
4583   // Transfer the remainder of BB and its successor edges to DoneMBB.
4584   DoneMBB->splice(DoneMBB->begin(), BB,
4585                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
4586   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
4587 
4588   BB->addSuccessor(LoopMBB);
4589 
4590   MachineRegisterInfo &RegInfo = MF.getRegInfo();
4591   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
4592   Register LoReg = MI.getOperand(0).getReg();
4593   Register HiReg = MI.getOperand(1).getReg();
4594   DebugLoc DL = MI.getDebugLoc();
4595 
4596   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4597   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
4598       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
4599       .addReg(RISCV::X0);
4600   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
4601       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
4602       .addReg(RISCV::X0);
4603   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
4604       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
4605       .addReg(RISCV::X0);
4606 
4607   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
4608       .addReg(HiReg)
4609       .addReg(ReadAgainReg)
4610       .addMBB(LoopMBB);
4611 
4612   LoopMBB->addSuccessor(LoopMBB);
4613   LoopMBB->addSuccessor(DoneMBB);
4614 
4615   MI.eraseFromParent();
4616 
4617   return DoneMBB;
4618 }
4619 
4620 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
4621                                              MachineBasicBlock *BB) {
4622   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
4623 
4624   MachineFunction &MF = *BB->getParent();
4625   DebugLoc DL = MI.getDebugLoc();
4626   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
4627   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
4628   Register LoReg = MI.getOperand(0).getReg();
4629   Register HiReg = MI.getOperand(1).getReg();
4630   Register SrcReg = MI.getOperand(2).getReg();
4631   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
4632   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
4633 
4634   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
4635                           RI);
4636   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
4637   MachineMemOperand *MMOLo =
4638       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
4639   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
4640       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
4641   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
4642       .addFrameIndex(FI)
4643       .addImm(0)
4644       .addMemOperand(MMOLo);
4645   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
4646       .addFrameIndex(FI)
4647       .addImm(4)
4648       .addMemOperand(MMOHi);
4649   MI.eraseFromParent(); // The pseudo instruction is gone now.
4650   return BB;
4651 }
4652 
4653 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
4654                                                  MachineBasicBlock *BB) {
4655   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
4656          "Unexpected instruction");
4657 
4658   MachineFunction &MF = *BB->getParent();
4659   DebugLoc DL = MI.getDebugLoc();
4660   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
4661   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
4662   Register DstReg = MI.getOperand(0).getReg();
4663   Register LoReg = MI.getOperand(1).getReg();
4664   Register HiReg = MI.getOperand(2).getReg();
4665   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
4666   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
4667 
4668   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
4669   MachineMemOperand *MMOLo =
4670       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
4671   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
4672       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
4673   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
4674       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
4675       .addFrameIndex(FI)
4676       .addImm(0)
4677       .addMemOperand(MMOLo);
4678   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
4679       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
4680       .addFrameIndex(FI)
4681       .addImm(4)
4682       .addMemOperand(MMOHi);
4683   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
4684   MI.eraseFromParent(); // The pseudo instruction is gone now.
4685   return BB;
4686 }
4687 
4688 static bool isSelectPseudo(MachineInstr &MI) {
4689   switch (MI.getOpcode()) {
4690   default:
4691     return false;
4692   case RISCV::Select_GPR_Using_CC_GPR:
4693   case RISCV::Select_FPR16_Using_CC_GPR:
4694   case RISCV::Select_FPR32_Using_CC_GPR:
4695   case RISCV::Select_FPR64_Using_CC_GPR:
4696     return true;
4697   }
4698 }
4699 
4700 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
4701                                            MachineBasicBlock *BB) {
4702   // To "insert" Select_* instructions, we actually have to insert the triangle
4703   // control-flow pattern.  The incoming instructions know the destination vreg
4704   // to set, the condition code register to branch on, the true/false values to
4705   // select between, and the condcode to use to select the appropriate branch.
4706   //
4707   // We produce the following control flow:
4708   //     HeadMBB
4709   //     |  \
4710   //     |  IfFalseMBB
4711   //     | /
4712   //    TailMBB
4713   //
4714   // When we find a sequence of selects we attempt to optimize their emission
4715   // by sharing the control flow. Currently we only handle cases where we have
4716   // multiple selects with the exact same condition (same LHS, RHS and CC).
4717   // The selects may be interleaved with other instructions if the other
4718   // instructions meet some requirements we deem safe:
4719   // - They are debug instructions. Otherwise,
4720   // - They do not have side-effects, do not access memory and their inputs do
4721   //   not depend on the results of the select pseudo-instructions.
4722   // The TrueV/FalseV operands of the selects cannot depend on the result of
4723   // previous selects in the sequence.
4724   // These conditions could be further relaxed. See the X86 target for a
4725   // related approach and more information.
4726   Register LHS = MI.getOperand(1).getReg();
4727   Register RHS = MI.getOperand(2).getReg();
4728   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
4729 
4730   SmallVector<MachineInstr *, 4> SelectDebugValues;
4731   SmallSet<Register, 4> SelectDests;
4732   SelectDests.insert(MI.getOperand(0).getReg());
4733 
4734   MachineInstr *LastSelectPseudo = &MI;
4735 
4736   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
4737        SequenceMBBI != E; ++SequenceMBBI) {
4738     if (SequenceMBBI->isDebugInstr())
4739       continue;
4740     else if (isSelectPseudo(*SequenceMBBI)) {
4741       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
4742           SequenceMBBI->getOperand(2).getReg() != RHS ||
4743           SequenceMBBI->getOperand(3).getImm() != CC ||
4744           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
4745           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
4746         break;
4747       LastSelectPseudo = &*SequenceMBBI;
4748       SequenceMBBI->collectDebugValues(SelectDebugValues);
4749       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
4750     } else {
4751       if (SequenceMBBI->hasUnmodeledSideEffects() ||
4752           SequenceMBBI->mayLoadOrStore())
4753         break;
4754       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
4755             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
4756           }))
4757         break;
4758     }
4759   }
4760 
4761   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
4762   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4763   DebugLoc DL = MI.getDebugLoc();
4764   MachineFunction::iterator I = ++BB->getIterator();
4765 
4766   MachineBasicBlock *HeadMBB = BB;
4767   MachineFunction *F = BB->getParent();
4768   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
4769   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
4770 
4771   F->insert(I, IfFalseMBB);
4772   F->insert(I, TailMBB);
4773 
4774   // Transfer debug instructions associated with the selects to TailMBB.
4775   for (MachineInstr *DebugInstr : SelectDebugValues) {
4776     TailMBB->push_back(DebugInstr->removeFromParent());
4777   }
4778 
4779   // Move all instructions after the sequence to TailMBB.
4780   TailMBB->splice(TailMBB->end(), HeadMBB,
4781                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
4782   // Update machine-CFG edges by transferring all successors of the current
4783   // block to the new block which will contain the Phi nodes for the selects.
4784   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
4785   // Set the successors for HeadMBB.
4786   HeadMBB->addSuccessor(IfFalseMBB);
4787   HeadMBB->addSuccessor(TailMBB);
4788 
4789   // Insert appropriate branch.
4790   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
4791 
4792   BuildMI(HeadMBB, DL, TII.get(Opcode))
4793     .addReg(LHS)
4794     .addReg(RHS)
4795     .addMBB(TailMBB);
4796 
4797   // IfFalseMBB just falls through to TailMBB.
4798   IfFalseMBB->addSuccessor(TailMBB);
4799 
4800   // Create PHIs for all of the select pseudo-instructions.
4801   auto SelectMBBI = MI.getIterator();
4802   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
4803   auto InsertionPoint = TailMBB->begin();
4804   while (SelectMBBI != SelectEnd) {
4805     auto Next = std::next(SelectMBBI);
4806     if (isSelectPseudo(*SelectMBBI)) {
4807       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
4808       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
4809               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
4810           .addReg(SelectMBBI->getOperand(4).getReg())
4811           .addMBB(HeadMBB)
4812           .addReg(SelectMBBI->getOperand(5).getReg())
4813           .addMBB(IfFalseMBB);
4814       SelectMBBI->eraseFromParent();
4815     }
4816     SelectMBBI = Next;
4817   }
4818 
4819   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
4820   return TailMBB;
4821 }
4822 
4823 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
4824                                     int VLIndex, unsigned SEWIndex,
4825                                     RISCVVLMUL VLMul, bool ForceTailAgnostic) {
4826   MachineFunction &MF = *BB->getParent();
4827   DebugLoc DL = MI.getDebugLoc();
4828   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
4829 
4830   unsigned SEW = MI.getOperand(SEWIndex).getImm();
4831   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
4832   RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
4833 
4834   MachineRegisterInfo &MRI = MF.getRegInfo();
4835 
4836   auto BuildVSETVLI = [&]() {
4837     if (VLIndex >= 0) {
4838       Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
4839       Register VLReg = MI.getOperand(VLIndex).getReg();
4840 
4841       // VL might be a compile time constant, but isel would have to put it
4842       // in a register. See if VL comes from an ADDI X0, imm.
4843       if (VLReg.isVirtual()) {
4844         MachineInstr *Def = MRI.getVRegDef(VLReg);
4845         if (Def && Def->getOpcode() == RISCV::ADDI &&
4846             Def->getOperand(1).getReg() == RISCV::X0 &&
4847             Def->getOperand(2).isImm()) {
4848           uint64_t Imm = Def->getOperand(2).getImm();
4849           // VSETIVLI allows a 5-bit zero extended immediate.
4850           if (isUInt<5>(Imm))
4851             return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI))
4852                 .addReg(DestReg, RegState::Define | RegState::Dead)
4853                 .addImm(Imm);
4854         }
4855       }
4856 
4857       return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
4858           .addReg(DestReg, RegState::Define | RegState::Dead)
4859           .addReg(VLReg);
4860     }
4861 
4862     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
4863     return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
4864         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
4865         .addReg(RISCV::X0, RegState::Kill);
4866   };
4867 
4868   MachineInstrBuilder MIB = BuildVSETVLI();
4869 
4870   // Default to tail agnostic unless the destination is tied to a source. In
4871   // that case the user would have some control over the tail values. The tail
4872   // policy is also ignored on instructions that only update element 0 like
4873   // vmv.s.x or reductions so use agnostic there to match the common case.
4874   // FIXME: This is conservatively correct, but we might want to detect that
4875   // the input is undefined.
4876   bool TailAgnostic = true;
4877   unsigned UseOpIdx;
4878   if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
4879     TailAgnostic = false;
4880     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
4881     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
4882     MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
4883     if (UseMI && UseMI->isImplicitDef())
4884       TailAgnostic = true;
4885   }
4886 
4887   // For simplicity we reuse the vtype representation here.
4888   MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
4889                                      /*TailAgnostic*/ TailAgnostic,
4890                                      /*MaskAgnostic*/ false));
4891 
4892   // Remove (now) redundant operands from pseudo
4893   if (VLIndex >= 0) {
4894     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
4895     MI.getOperand(VLIndex).setIsKill(false);
4896   }
4897 
4898   return BB;
4899 }
4900 
4901 MachineBasicBlock *
4902 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
4903                                                  MachineBasicBlock *BB) const {
4904   uint64_t TSFlags = MI.getDesc().TSFlags;
4905 
4906   if (TSFlags & RISCVII::HasSEWOpMask) {
4907     unsigned NumOperands = MI.getNumExplicitOperands();
4908     int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
4909     unsigned SEWIndex = NumOperands - 1;
4910     bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask;
4911 
4912     RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
4913                                                RISCVII::VLMulShift);
4914     return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic);
4915   }
4916 
4917   switch (MI.getOpcode()) {
4918   default:
4919     llvm_unreachable("Unexpected instr type to insert");
4920   case RISCV::ReadCycleWide:
4921     assert(!Subtarget.is64Bit() &&
4922            "ReadCycleWrite is only to be used on riscv32");
4923     return emitReadCycleWidePseudo(MI, BB);
4924   case RISCV::Select_GPR_Using_CC_GPR:
4925   case RISCV::Select_FPR16_Using_CC_GPR:
4926   case RISCV::Select_FPR32_Using_CC_GPR:
4927   case RISCV::Select_FPR64_Using_CC_GPR:
4928     return emitSelectPseudo(MI, BB);
4929   case RISCV::BuildPairF64Pseudo:
4930     return emitBuildPairF64Pseudo(MI, BB);
4931   case RISCV::SplitF64Pseudo:
4932     return emitSplitF64Pseudo(MI, BB);
4933   }
4934 }
4935 
4936 // Calling Convention Implementation.
4937 // The expectations for frontend ABI lowering vary from target to target.
4938 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
4939 // details, but this is a longer term goal. For now, we simply try to keep the
4940 // role of the frontend as simple and well-defined as possible. The rules can
4941 // be summarised as:
4942 // * Never split up large scalar arguments. We handle them here.
4943 // * If a hardfloat calling convention is being used, and the struct may be
4944 // passed in a pair of registers (fp+fp, int+fp), and both registers are
4945 // available, then pass as two separate arguments. If either the GPRs or FPRs
4946 // are exhausted, then pass according to the rule below.
4947 // * If a struct could never be passed in registers or directly in a stack
4948 // slot (as it is larger than 2*XLEN and the floating point rules don't
4949 // apply), then pass it using a pointer with the byval attribute.
4950 // * If a struct is less than 2*XLEN, then coerce to either a two-element
4951 // word-sized array or a 2*XLEN scalar (depending on alignment).
4952 // * The frontend can determine whether a struct is returned by reference or
4953 // not based on its size and fields. If it will be returned by reference, the
4954 // frontend must modify the prototype so a pointer with the sret annotation is
4955 // passed as the first argument. This is not necessary for large scalar
4956 // returns.
4957 // * Struct return values and varargs should be coerced to structs containing
4958 // register-size fields in the same situations they would be for fixed
4959 // arguments.
4960 
4961 static const MCPhysReg ArgGPRs[] = {
4962   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
4963   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
4964 };
4965 static const MCPhysReg ArgFPR16s[] = {
4966   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
4967   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
4968 };
4969 static const MCPhysReg ArgFPR32s[] = {
4970   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
4971   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
4972 };
4973 static const MCPhysReg ArgFPR64s[] = {
4974   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
4975   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
4976 };
4977 // This is an interim calling convention and it may be changed in the future.
4978 static const MCPhysReg ArgVRs[] = {
4979     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
4980     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
4981     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
4982 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
4983                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
4984                                      RISCV::V20M2, RISCV::V22M2};
4985 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
4986                                      RISCV::V20M4};
4987 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
4988 
4989 // Pass a 2*XLEN argument that has been split into two XLEN values through
4990 // registers or the stack as necessary.
4991 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
4992                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
4993                                 MVT ValVT2, MVT LocVT2,
4994                                 ISD::ArgFlagsTy ArgFlags2) {
4995   unsigned XLenInBytes = XLen / 8;
4996   if (Register Reg = State.AllocateReg(ArgGPRs)) {
4997     // At least one half can be passed via register.
4998     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4999                                      VA1.getLocVT(), CCValAssign::Full));
5000   } else {
5001     // Both halves must be passed on the stack, with proper alignment.
5002     Align StackAlign =
5003         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
5004     State.addLoc(
5005         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
5006                             State.AllocateStack(XLenInBytes, StackAlign),
5007                             VA1.getLocVT(), CCValAssign::Full));
5008     State.addLoc(CCValAssign::getMem(
5009         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
5010         LocVT2, CCValAssign::Full));
5011     return false;
5012   }
5013 
5014   if (Register Reg = State.AllocateReg(ArgGPRs)) {
5015     // The second half can also be passed via register.
5016     State.addLoc(
5017         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
5018   } else {
5019     // The second half is passed via the stack, without additional alignment.
5020     State.addLoc(CCValAssign::getMem(
5021         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
5022         LocVT2, CCValAssign::Full));
5023   }
5024 
5025   return false;
5026 }
5027 
5028 // Implements the RISC-V calling convention. Returns true upon failure.
5029 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
5030                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
5031                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
5032                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
5033                      Optional<unsigned> FirstMaskArgument) {
5034   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
5035   assert(XLen == 32 || XLen == 64);
5036   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
5037 
5038   // Any return value split in to more than two values can't be returned
5039   // directly. Vectors are returned via the available vector registers.
5040   if (!LocVT.isVector() && IsRet && ValNo > 1)
5041     return true;
5042 
5043   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
5044   // variadic argument, or if no F16/F32 argument registers are available.
5045   bool UseGPRForF16_F32 = true;
5046   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
5047   // variadic argument, or if no F64 argument registers are available.
5048   bool UseGPRForF64 = true;
5049 
5050   switch (ABI) {
5051   default:
5052     llvm_unreachable("Unexpected ABI");
5053   case RISCVABI::ABI_ILP32:
5054   case RISCVABI::ABI_LP64:
5055     break;
5056   case RISCVABI::ABI_ILP32F:
5057   case RISCVABI::ABI_LP64F:
5058     UseGPRForF16_F32 = !IsFixed;
5059     break;
5060   case RISCVABI::ABI_ILP32D:
5061   case RISCVABI::ABI_LP64D:
5062     UseGPRForF16_F32 = !IsFixed;
5063     UseGPRForF64 = !IsFixed;
5064     break;
5065   }
5066 
5067   // FPR16, FPR32, and FPR64 alias each other.
5068   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
5069     UseGPRForF16_F32 = true;
5070     UseGPRForF64 = true;
5071   }
5072 
5073   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
5074   // similar local variables rather than directly checking against the target
5075   // ABI.
5076 
5077   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
5078     LocVT = XLenVT;
5079     LocInfo = CCValAssign::BCvt;
5080   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
5081     LocVT = MVT::i64;
5082     LocInfo = CCValAssign::BCvt;
5083   }
5084 
5085   // If this is a variadic argument, the RISC-V calling convention requires
5086   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
5087   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
5088   // be used regardless of whether the original argument was split during
5089   // legalisation or not. The argument will not be passed by registers if the
5090   // original type is larger than 2*XLEN, so the register alignment rule does
5091   // not apply.
5092   unsigned TwoXLenInBytes = (2 * XLen) / 8;
5093   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
5094       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
5095     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
5096     // Skip 'odd' register if necessary.
5097     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
5098       State.AllocateReg(ArgGPRs);
5099   }
5100 
5101   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
5102   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
5103       State.getPendingArgFlags();
5104 
5105   assert(PendingLocs.size() == PendingArgFlags.size() &&
5106          "PendingLocs and PendingArgFlags out of sync");
5107 
5108   // Handle passing f64 on RV32D with a soft float ABI or when floating point
5109   // registers are exhausted.
5110   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
5111     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
5112            "Can't lower f64 if it is split");
5113     // Depending on available argument GPRS, f64 may be passed in a pair of
5114     // GPRs, split between a GPR and the stack, or passed completely on the
5115     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
5116     // cases.
5117     Register Reg = State.AllocateReg(ArgGPRs);
5118     LocVT = MVT::i32;
5119     if (!Reg) {
5120       unsigned StackOffset = State.AllocateStack(8, Align(8));
5121       State.addLoc(
5122           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
5123       return false;
5124     }
5125     if (!State.AllocateReg(ArgGPRs))
5126       State.AllocateStack(4, Align(4));
5127     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5128     return false;
5129   }
5130 
5131   // Fixed-length vectors are located in the corresponding scalable-vector
5132   // container types.
5133   if (ValVT.isFixedLengthVector())
5134     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
5135 
5136   // Split arguments might be passed indirectly, so keep track of the pending
5137   // values. Split vectors are passed via a mix of registers and indirectly, so
5138   // treat them as we would any other argument.
5139   if (!LocVT.isVector() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
5140     LocVT = XLenVT;
5141     LocInfo = CCValAssign::Indirect;
5142     PendingLocs.push_back(
5143         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
5144     PendingArgFlags.push_back(ArgFlags);
5145     if (!ArgFlags.isSplitEnd()) {
5146       return false;
5147     }
5148   }
5149 
5150   // If the split argument only had two elements, it should be passed directly
5151   // in registers or on the stack.
5152   if (!LocVT.isVector() && ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
5153     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
5154     // Apply the normal calling convention rules to the first half of the
5155     // split argument.
5156     CCValAssign VA = PendingLocs[0];
5157     ISD::ArgFlagsTy AF = PendingArgFlags[0];
5158     PendingLocs.clear();
5159     PendingArgFlags.clear();
5160     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
5161                                ArgFlags);
5162   }
5163 
5164   // Allocate to a register if possible, or else a stack slot.
5165   Register Reg;
5166   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
5167     Reg = State.AllocateReg(ArgFPR16s);
5168   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
5169     Reg = State.AllocateReg(ArgFPR32s);
5170   else if (ValVT == MVT::f64 && !UseGPRForF64)
5171     Reg = State.AllocateReg(ArgFPR64s);
5172   else if (ValVT.isVector()) {
5173     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
5174     if (RC == &RISCV::VRRegClass) {
5175       // Assign the first mask argument to V0.
5176       // This is an interim calling convention and it may be changed in the
5177       // future.
5178       if (FirstMaskArgument.hasValue() &&
5179           ValNo == FirstMaskArgument.getValue()) {
5180         Reg = State.AllocateReg(RISCV::V0);
5181       } else {
5182         Reg = State.AllocateReg(ArgVRs);
5183       }
5184     } else if (RC == &RISCV::VRM2RegClass) {
5185       Reg = State.AllocateReg(ArgVRM2s);
5186     } else if (RC == &RISCV::VRM4RegClass) {
5187       Reg = State.AllocateReg(ArgVRM4s);
5188     } else if (RC == &RISCV::VRM8RegClass) {
5189       Reg = State.AllocateReg(ArgVRM8s);
5190     } else {
5191       llvm_unreachable("Unhandled class register for ValueType");
5192     }
5193     if (!Reg) {
5194       // For return values, the vector must be passed fully via registers or
5195       // via the stack.
5196       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
5197       // but we're using all of them.
5198       if (IsRet)
5199         return true;
5200       LocInfo = CCValAssign::Indirect;
5201       // Try using a GPR to pass the address
5202       Reg = State.AllocateReg(ArgGPRs);
5203       LocVT = XLenVT;
5204     }
5205   } else
5206     Reg = State.AllocateReg(ArgGPRs);
5207   unsigned StackOffset =
5208       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
5209 
5210   // If we reach this point and PendingLocs is non-empty, we must be at the
5211   // end of a split argument that must be passed indirectly.
5212   if (!PendingLocs.empty()) {
5213     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
5214     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
5215 
5216     for (auto &It : PendingLocs) {
5217       if (Reg)
5218         It.convertToReg(Reg);
5219       else
5220         It.convertToMem(StackOffset);
5221       State.addLoc(It);
5222     }
5223     PendingLocs.clear();
5224     PendingArgFlags.clear();
5225     return false;
5226   }
5227 
5228   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
5229           (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) &&
5230          "Expected an XLenVT or vector types at this stage");
5231 
5232   if (Reg) {
5233     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5234     return false;
5235   }
5236 
5237   // When a floating-point value is passed on the stack, no bit-conversion is
5238   // needed.
5239   if (ValVT.isFloatingPoint()) {
5240     LocVT = ValVT;
5241     LocInfo = CCValAssign::Full;
5242   }
5243   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
5244   return false;
5245 }
5246 
5247 template <typename ArgTy>
5248 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
5249   for (const auto &ArgIdx : enumerate(Args)) {
5250     MVT ArgVT = ArgIdx.value().VT;
5251     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
5252       return ArgIdx.index();
5253   }
5254   return None;
5255 }
5256 
5257 void RISCVTargetLowering::analyzeInputArgs(
5258     MachineFunction &MF, CCState &CCInfo,
5259     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
5260   unsigned NumArgs = Ins.size();
5261   FunctionType *FType = MF.getFunction().getFunctionType();
5262 
5263   Optional<unsigned> FirstMaskArgument;
5264   if (Subtarget.hasStdExtV())
5265     FirstMaskArgument = preAssignMask(Ins);
5266 
5267   for (unsigned i = 0; i != NumArgs; ++i) {
5268     MVT ArgVT = Ins[i].VT;
5269     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
5270 
5271     Type *ArgTy = nullptr;
5272     if (IsRet)
5273       ArgTy = FType->getReturnType();
5274     else if (Ins[i].isOrigArg())
5275       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
5276 
5277     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
5278     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
5279                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
5280                  FirstMaskArgument)) {
5281       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
5282                         << EVT(ArgVT).getEVTString() << '\n');
5283       llvm_unreachable(nullptr);
5284     }
5285   }
5286 }
5287 
5288 void RISCVTargetLowering::analyzeOutputArgs(
5289     MachineFunction &MF, CCState &CCInfo,
5290     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
5291     CallLoweringInfo *CLI) const {
5292   unsigned NumArgs = Outs.size();
5293 
5294   Optional<unsigned> FirstMaskArgument;
5295   if (Subtarget.hasStdExtV())
5296     FirstMaskArgument = preAssignMask(Outs);
5297 
5298   for (unsigned i = 0; i != NumArgs; i++) {
5299     MVT ArgVT = Outs[i].VT;
5300     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5301     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
5302 
5303     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
5304     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
5305                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
5306                  FirstMaskArgument)) {
5307       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
5308                         << EVT(ArgVT).getEVTString() << "\n");
5309       llvm_unreachable(nullptr);
5310     }
5311   }
5312 }
5313 
5314 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5315 // values.
5316 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
5317                                    const CCValAssign &VA, const SDLoc &DL,
5318                                    const RISCVSubtarget &Subtarget) {
5319   switch (VA.getLocInfo()) {
5320   default:
5321     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5322   case CCValAssign::Full:
5323     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
5324       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
5325     break;
5326   case CCValAssign::BCvt:
5327     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
5328       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
5329     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5330       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
5331     else
5332       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5333     break;
5334   }
5335   return Val;
5336 }
5337 
5338 // The caller is responsible for loading the full value if the argument is
5339 // passed with CCValAssign::Indirect.
5340 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
5341                                 const CCValAssign &VA, const SDLoc &DL,
5342                                 const RISCVTargetLowering &TLI) {
5343   MachineFunction &MF = DAG.getMachineFunction();
5344   MachineRegisterInfo &RegInfo = MF.getRegInfo();
5345   EVT LocVT = VA.getLocVT();
5346   SDValue Val;
5347   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
5348   Register VReg = RegInfo.createVirtualRegister(RC);
5349   RegInfo.addLiveIn(VA.getLocReg(), VReg);
5350   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
5351 
5352   if (VA.getLocInfo() == CCValAssign::Indirect)
5353     return Val;
5354 
5355   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
5356 }
5357 
5358 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
5359                                    const CCValAssign &VA, const SDLoc &DL,
5360                                    const RISCVSubtarget &Subtarget) {
5361   EVT LocVT = VA.getLocVT();
5362 
5363   switch (VA.getLocInfo()) {
5364   default:
5365     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5366   case CCValAssign::Full:
5367     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
5368       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
5369     break;
5370   case CCValAssign::BCvt:
5371     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
5372       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
5373     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5374       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
5375     else
5376       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5377     break;
5378   }
5379   return Val;
5380 }
5381 
5382 // The caller is responsible for loading the full value if the argument is
5383 // passed with CCValAssign::Indirect.
5384 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
5385                                 const CCValAssign &VA, const SDLoc &DL) {
5386   MachineFunction &MF = DAG.getMachineFunction();
5387   MachineFrameInfo &MFI = MF.getFrameInfo();
5388   EVT LocVT = VA.getLocVT();
5389   EVT ValVT = VA.getValVT();
5390   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
5391   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
5392                                  VA.getLocMemOffset(), /*Immutable=*/true);
5393   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
5394   SDValue Val;
5395 
5396   ISD::LoadExtType ExtType;
5397   switch (VA.getLocInfo()) {
5398   default:
5399     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5400   case CCValAssign::Full:
5401   case CCValAssign::Indirect:
5402   case CCValAssign::BCvt:
5403     ExtType = ISD::NON_EXTLOAD;
5404     break;
5405   }
5406   Val = DAG.getExtLoad(
5407       ExtType, DL, LocVT, Chain, FIN,
5408       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
5409   return Val;
5410 }
5411 
5412 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
5413                                        const CCValAssign &VA, const SDLoc &DL) {
5414   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
5415          "Unexpected VA");
5416   MachineFunction &MF = DAG.getMachineFunction();
5417   MachineFrameInfo &MFI = MF.getFrameInfo();
5418   MachineRegisterInfo &RegInfo = MF.getRegInfo();
5419 
5420   if (VA.isMemLoc()) {
5421     // f64 is passed on the stack.
5422     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
5423     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
5424     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
5425                        MachinePointerInfo::getFixedStack(MF, FI));
5426   }
5427 
5428   assert(VA.isRegLoc() && "Expected register VA assignment");
5429 
5430   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
5431   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
5432   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
5433   SDValue Hi;
5434   if (VA.getLocReg() == RISCV::X17) {
5435     // Second half of f64 is passed on the stack.
5436     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
5437     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
5438     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
5439                      MachinePointerInfo::getFixedStack(MF, FI));
5440   } else {
5441     // Second half of f64 is passed in another GPR.
5442     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
5443     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
5444     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
5445   }
5446   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
5447 }
5448 
5449 // FastCC has less than 1% performance improvement for some particular
5450 // benchmark. But theoretically, it may has benenfit for some cases.
5451 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
5452                             CCValAssign::LocInfo LocInfo,
5453                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
5454 
5455   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5456     // X5 and X6 might be used for save-restore libcall.
5457     static const MCPhysReg GPRList[] = {
5458         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
5459         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
5460         RISCV::X29, RISCV::X30, RISCV::X31};
5461     if (unsigned Reg = State.AllocateReg(GPRList)) {
5462       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5463       return false;
5464     }
5465   }
5466 
5467   if (LocVT == MVT::f16) {
5468     static const MCPhysReg FPR16List[] = {
5469         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
5470         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
5471         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
5472         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
5473     if (unsigned Reg = State.AllocateReg(FPR16List)) {
5474       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5475       return false;
5476     }
5477   }
5478 
5479   if (LocVT == MVT::f32) {
5480     static const MCPhysReg FPR32List[] = {
5481         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
5482         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
5483         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
5484         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
5485     if (unsigned Reg = State.AllocateReg(FPR32List)) {
5486       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5487       return false;
5488     }
5489   }
5490 
5491   if (LocVT == MVT::f64) {
5492     static const MCPhysReg FPR64List[] = {
5493         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
5494         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
5495         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
5496         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
5497     if (unsigned Reg = State.AllocateReg(FPR64List)) {
5498       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5499       return false;
5500     }
5501   }
5502 
5503   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
5504     unsigned Offset4 = State.AllocateStack(4, Align(4));
5505     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
5506     return false;
5507   }
5508 
5509   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
5510     unsigned Offset5 = State.AllocateStack(8, Align(8));
5511     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
5512     return false;
5513   }
5514 
5515   return true; // CC didn't match.
5516 }
5517 
5518 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5519                          CCValAssign::LocInfo LocInfo,
5520                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
5521 
5522   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5523     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
5524     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
5525     static const MCPhysReg GPRList[] = {
5526         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
5527         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
5528     if (unsigned Reg = State.AllocateReg(GPRList)) {
5529       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5530       return false;
5531     }
5532   }
5533 
5534   if (LocVT == MVT::f32) {
5535     // Pass in STG registers: F1, ..., F6
5536     //                        fs0 ... fs5
5537     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
5538                                           RISCV::F18_F, RISCV::F19_F,
5539                                           RISCV::F20_F, RISCV::F21_F};
5540     if (unsigned Reg = State.AllocateReg(FPR32List)) {
5541       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5542       return false;
5543     }
5544   }
5545 
5546   if (LocVT == MVT::f64) {
5547     // Pass in STG registers: D1, ..., D6
5548     //                        fs6 ... fs11
5549     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
5550                                           RISCV::F24_D, RISCV::F25_D,
5551                                           RISCV::F26_D, RISCV::F27_D};
5552     if (unsigned Reg = State.AllocateReg(FPR64List)) {
5553       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5554       return false;
5555     }
5556   }
5557 
5558   report_fatal_error("No registers left in GHC calling convention");
5559   return true;
5560 }
5561 
5562 // Transform physical registers into virtual registers.
5563 SDValue RISCVTargetLowering::LowerFormalArguments(
5564     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5565     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5566     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5567 
5568   MachineFunction &MF = DAG.getMachineFunction();
5569 
5570   switch (CallConv) {
5571   default:
5572     report_fatal_error("Unsupported calling convention");
5573   case CallingConv::C:
5574   case CallingConv::Fast:
5575     break;
5576   case CallingConv::GHC:
5577     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
5578         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
5579       report_fatal_error(
5580         "GHC calling convention requires the F and D instruction set extensions");
5581   }
5582 
5583   const Function &Func = MF.getFunction();
5584   if (Func.hasFnAttribute("interrupt")) {
5585     if (!Func.arg_empty())
5586       report_fatal_error(
5587         "Functions with the interrupt attribute cannot have arguments!");
5588 
5589     StringRef Kind =
5590       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
5591 
5592     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
5593       report_fatal_error(
5594         "Function interrupt attribute argument not supported!");
5595   }
5596 
5597   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5598   MVT XLenVT = Subtarget.getXLenVT();
5599   unsigned XLenInBytes = Subtarget.getXLen() / 8;
5600   // Used with vargs to acumulate store chains.
5601   std::vector<SDValue> OutChains;
5602 
5603   // Assign locations to all of the incoming arguments.
5604   SmallVector<CCValAssign, 16> ArgLocs;
5605   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5606 
5607   if (CallConv == CallingConv::Fast)
5608     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
5609   else if (CallConv == CallingConv::GHC)
5610     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
5611   else
5612     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
5613 
5614   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5615     CCValAssign &VA = ArgLocs[i];
5616     SDValue ArgValue;
5617     // Passing f64 on RV32D with a soft float ABI must be handled as a special
5618     // case.
5619     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
5620       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
5621     else if (VA.isRegLoc())
5622       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
5623     else
5624       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5625 
5626     if (VA.getLocInfo() == CCValAssign::Indirect) {
5627       // If the original argument was split and passed by reference (e.g. i128
5628       // on RV32), we need to load all parts of it here (using the same
5629       // address). Vectors may be partly split to registers and partly to the
5630       // stack, in which case the base address is partly offset and subsequent
5631       // stores are relative to that.
5632       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5633                                    MachinePointerInfo()));
5634       unsigned ArgIndex = Ins[i].OrigArgIndex;
5635       unsigned ArgPartOffset = Ins[i].PartOffset;
5636       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
5637       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5638         CCValAssign &PartVA = ArgLocs[i + 1];
5639         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5640         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
5641                                       DAG.getIntPtrConstant(PartOffset, DL));
5642         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5643                                      MachinePointerInfo()));
5644         ++i;
5645       }
5646       continue;
5647     }
5648     InVals.push_back(ArgValue);
5649   }
5650 
5651   if (IsVarArg) {
5652     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
5653     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5654     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
5655     MachineFrameInfo &MFI = MF.getFrameInfo();
5656     MachineRegisterInfo &RegInfo = MF.getRegInfo();
5657     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
5658 
5659     // Offset of the first variable argument from stack pointer, and size of
5660     // the vararg save area. For now, the varargs save area is either zero or
5661     // large enough to hold a0-a7.
5662     int VaArgOffset, VarArgsSaveSize;
5663 
5664     // If all registers are allocated, then all varargs must be passed on the
5665     // stack and we don't need to save any argregs.
5666     if (ArgRegs.size() == Idx) {
5667       VaArgOffset = CCInfo.getNextStackOffset();
5668       VarArgsSaveSize = 0;
5669     } else {
5670       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
5671       VaArgOffset = -VarArgsSaveSize;
5672     }
5673 
5674     // Record the frame index of the first variable argument
5675     // which is a value necessary to VASTART.
5676     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
5677     RVFI->setVarArgsFrameIndex(FI);
5678 
5679     // If saving an odd number of registers then create an extra stack slot to
5680     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
5681     // offsets to even-numbered registered remain 2*XLEN-aligned.
5682     if (Idx % 2) {
5683       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
5684       VarArgsSaveSize += XLenInBytes;
5685     }
5686 
5687     // Copy the integer registers that may have been used for passing varargs
5688     // to the vararg save area.
5689     for (unsigned I = Idx; I < ArgRegs.size();
5690          ++I, VaArgOffset += XLenInBytes) {
5691       const Register Reg = RegInfo.createVirtualRegister(RC);
5692       RegInfo.addLiveIn(ArgRegs[I], Reg);
5693       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
5694       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
5695       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5696       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5697                                    MachinePointerInfo::getFixedStack(MF, FI));
5698       cast<StoreSDNode>(Store.getNode())
5699           ->getMemOperand()
5700           ->setValue((Value *)nullptr);
5701       OutChains.push_back(Store);
5702     }
5703     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
5704   }
5705 
5706   // All stores are grouped in one node to allow the matching between
5707   // the size of Ins and InVals. This only happens for vararg functions.
5708   if (!OutChains.empty()) {
5709     OutChains.push_back(Chain);
5710     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5711   }
5712 
5713   return Chain;
5714 }
5715 
5716 /// isEligibleForTailCallOptimization - Check whether the call is eligible
5717 /// for tail call optimization.
5718 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
5719 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
5720     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5721     const SmallVector<CCValAssign, 16> &ArgLocs) const {
5722 
5723   auto &Callee = CLI.Callee;
5724   auto CalleeCC = CLI.CallConv;
5725   auto &Outs = CLI.Outs;
5726   auto &Caller = MF.getFunction();
5727   auto CallerCC = Caller.getCallingConv();
5728 
5729   // Exception-handling functions need a special set of instructions to
5730   // indicate a return to the hardware. Tail-calling another function would
5731   // probably break this.
5732   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
5733   // should be expanded as new function attributes are introduced.
5734   if (Caller.hasFnAttribute("interrupt"))
5735     return false;
5736 
5737   // Do not tail call opt if the stack is used to pass parameters.
5738   if (CCInfo.getNextStackOffset() != 0)
5739     return false;
5740 
5741   // Do not tail call opt if any parameters need to be passed indirectly.
5742   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
5743   // passed indirectly. So the address of the value will be passed in a
5744   // register, or if not available, then the address is put on the stack. In
5745   // order to pass indirectly, space on the stack often needs to be allocated
5746   // in order to store the value. In this case the CCInfo.getNextStackOffset()
5747   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
5748   // are passed CCValAssign::Indirect.
5749   for (auto &VA : ArgLocs)
5750     if (VA.getLocInfo() == CCValAssign::Indirect)
5751       return false;
5752 
5753   // Do not tail call opt if either caller or callee uses struct return
5754   // semantics.
5755   auto IsCallerStructRet = Caller.hasStructRetAttr();
5756   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5757   if (IsCallerStructRet || IsCalleeStructRet)
5758     return false;
5759 
5760   // Externally-defined functions with weak linkage should not be
5761   // tail-called. The behaviour of branch instructions in this situation (as
5762   // used for tail calls) is implementation-defined, so we cannot rely on the
5763   // linker replacing the tail call with a return.
5764   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5765     const GlobalValue *GV = G->getGlobal();
5766     if (GV->hasExternalWeakLinkage())
5767       return false;
5768   }
5769 
5770   // The callee has to preserve all registers the caller needs to preserve.
5771   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
5772   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5773   if (CalleeCC != CallerCC) {
5774     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5775     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5776       return false;
5777   }
5778 
5779   // Byval parameters hand the function a pointer directly into the stack area
5780   // we want to reuse during a tail call. Working around this *is* possible
5781   // but less efficient and uglier in LowerCall.
5782   for (auto &Arg : Outs)
5783     if (Arg.Flags.isByVal())
5784       return false;
5785 
5786   return true;
5787 }
5788 
5789 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5790 // and output parameter nodes.
5791 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
5792                                        SmallVectorImpl<SDValue> &InVals) const {
5793   SelectionDAG &DAG = CLI.DAG;
5794   SDLoc &DL = CLI.DL;
5795   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5796   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5797   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5798   SDValue Chain = CLI.Chain;
5799   SDValue Callee = CLI.Callee;
5800   bool &IsTailCall = CLI.IsTailCall;
5801   CallingConv::ID CallConv = CLI.CallConv;
5802   bool IsVarArg = CLI.IsVarArg;
5803   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5804   MVT XLenVT = Subtarget.getXLenVT();
5805 
5806   MachineFunction &MF = DAG.getMachineFunction();
5807 
5808   // Analyze the operands of the call, assigning locations to each operand.
5809   SmallVector<CCValAssign, 16> ArgLocs;
5810   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5811 
5812   if (CallConv == CallingConv::Fast)
5813     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
5814   else if (CallConv == CallingConv::GHC)
5815     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
5816   else
5817     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
5818 
5819   // Check if it's really possible to do a tail call.
5820   if (IsTailCall)
5821     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5822 
5823   if (IsTailCall)
5824     ++NumTailCalls;
5825   else if (CLI.CB && CLI.CB->isMustTailCall())
5826     report_fatal_error("failed to perform tail call elimination on a call "
5827                        "site marked musttail");
5828 
5829   // Get a count of how many bytes are to be pushed on the stack.
5830   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
5831 
5832   // Create local copies for byval args
5833   SmallVector<SDValue, 8> ByValArgs;
5834   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5835     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5836     if (!Flags.isByVal())
5837       continue;
5838 
5839     SDValue Arg = OutVals[i];
5840     unsigned Size = Flags.getByValSize();
5841     Align Alignment = Flags.getNonZeroByValAlign();
5842 
5843     int FI =
5844         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5845     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5846     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
5847 
5848     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5849                           /*IsVolatile=*/false,
5850                           /*AlwaysInline=*/false, IsTailCall,
5851                           MachinePointerInfo(), MachinePointerInfo());
5852     ByValArgs.push_back(FIPtr);
5853   }
5854 
5855   if (!IsTailCall)
5856     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5857 
5858   // Copy argument values to their designated locations.
5859   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
5860   SmallVector<SDValue, 8> MemOpChains;
5861   SDValue StackPtr;
5862   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5863     CCValAssign &VA = ArgLocs[i];
5864     SDValue ArgValue = OutVals[i];
5865     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5866 
5867     // Handle passing f64 on RV32D with a soft float ABI as a special case.
5868     bool IsF64OnRV32DSoftABI =
5869         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
5870     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
5871       SDValue SplitF64 = DAG.getNode(
5872           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
5873       SDValue Lo = SplitF64.getValue(0);
5874       SDValue Hi = SplitF64.getValue(1);
5875 
5876       Register RegLo = VA.getLocReg();
5877       RegsToPass.push_back(std::make_pair(RegLo, Lo));
5878 
5879       if (RegLo == RISCV::X17) {
5880         // Second half of f64 is passed on the stack.
5881         // Work out the address of the stack slot.
5882         if (!StackPtr.getNode())
5883           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
5884         // Emit the store.
5885         MemOpChains.push_back(
5886             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
5887       } else {
5888         // Second half of f64 is passed in another GPR.
5889         assert(RegLo < RISCV::X31 && "Invalid register pair");
5890         Register RegHigh = RegLo + 1;
5891         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
5892       }
5893       continue;
5894     }
5895 
5896     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
5897     // as any other MemLoc.
5898 
5899     // Promote the value if needed.
5900     // For now, only handle fully promoted and indirect arguments.
5901     if (VA.getLocInfo() == CCValAssign::Indirect) {
5902       // Store the argument in a stack slot and pass its address.
5903       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
5904       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5905       MemOpChains.push_back(
5906           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5907                        MachinePointerInfo::getFixedStack(MF, FI)));
5908       // If the original argument was split (e.g. i128), we need
5909       // to store the required parts of it here (and pass just one address).
5910       // Vectors may be partly split to registers and partly to the stack, in
5911       // which case the base address is partly offset and subsequent stores are
5912       // relative to that.
5913       unsigned ArgIndex = Outs[i].OrigArgIndex;
5914       unsigned ArgPartOffset = Outs[i].PartOffset;
5915       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
5916       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5917         SDValue PartValue = OutVals[i + 1];
5918         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5919         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
5920                                       DAG.getIntPtrConstant(PartOffset, DL));
5921         MemOpChains.push_back(
5922             DAG.getStore(Chain, DL, PartValue, Address,
5923                          MachinePointerInfo::getFixedStack(MF, FI)));
5924         ++i;
5925       }
5926       ArgValue = SpillSlot;
5927     } else {
5928       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
5929     }
5930 
5931     // Use local copy if it is a byval arg.
5932     if (Flags.isByVal())
5933       ArgValue = ByValArgs[j++];
5934 
5935     if (VA.isRegLoc()) {
5936       // Queue up the argument copies and emit them at the end.
5937       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5938     } else {
5939       assert(VA.isMemLoc() && "Argument not register or memory");
5940       assert(!IsTailCall && "Tail call not allowed if stack is used "
5941                             "for passing parameters");
5942 
5943       // Work out the address of the stack slot.
5944       if (!StackPtr.getNode())
5945         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
5946       SDValue Address =
5947           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5948                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
5949 
5950       // Emit the store.
5951       MemOpChains.push_back(
5952           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5953     }
5954   }
5955 
5956   // Join the stores, which are independent of one another.
5957   if (!MemOpChains.empty())
5958     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5959 
5960   SDValue Glue;
5961 
5962   // Build a sequence of copy-to-reg nodes, chained and glued together.
5963   for (auto &Reg : RegsToPass) {
5964     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5965     Glue = Chain.getValue(1);
5966   }
5967 
5968   // Validate that none of the argument registers have been marked as
5969   // reserved, if so report an error. Do the same for the return address if this
5970   // is not a tailcall.
5971   validateCCReservedRegs(RegsToPass, MF);
5972   if (!IsTailCall &&
5973       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
5974     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
5975         MF.getFunction(),
5976         "Return address register required, but has been reserved."});
5977 
5978   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5979   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5980   // split it and then direct call can be matched by PseudoCALL.
5981   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5982     const GlobalValue *GV = S->getGlobal();
5983 
5984     unsigned OpFlags = RISCVII::MO_CALL;
5985     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
5986       OpFlags = RISCVII::MO_PLT;
5987 
5988     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
5989   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5990     unsigned OpFlags = RISCVII::MO_CALL;
5991 
5992     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
5993                                                  nullptr))
5994       OpFlags = RISCVII::MO_PLT;
5995 
5996     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5997   }
5998 
5999   // The first call operand is the chain and the second is the target address.
6000   SmallVector<SDValue, 8> Ops;
6001   Ops.push_back(Chain);
6002   Ops.push_back(Callee);
6003 
6004   // Add argument registers to the end of the list so that they are
6005   // known live into the call.
6006   for (auto &Reg : RegsToPass)
6007     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
6008 
6009   if (!IsTailCall) {
6010     // Add a register mask operand representing the call-preserved registers.
6011     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
6012     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
6013     assert(Mask && "Missing call preserved mask for calling convention");
6014     Ops.push_back(DAG.getRegisterMask(Mask));
6015   }
6016 
6017   // Glue the call to the argument copies, if any.
6018   if (Glue.getNode())
6019     Ops.push_back(Glue);
6020 
6021   // Emit the call.
6022   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6023 
6024   if (IsTailCall) {
6025     MF.getFrameInfo().setHasTailCall();
6026     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
6027   }
6028 
6029   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
6030   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
6031   Glue = Chain.getValue(1);
6032 
6033   // Mark the end of the call, which is glued to the call itself.
6034   Chain = DAG.getCALLSEQ_END(Chain,
6035                              DAG.getConstant(NumBytes, DL, PtrVT, true),
6036                              DAG.getConstant(0, DL, PtrVT, true),
6037                              Glue, DL);
6038   Glue = Chain.getValue(1);
6039 
6040   // Assign locations to each value returned by this call.
6041   SmallVector<CCValAssign, 16> RVLocs;
6042   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
6043   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
6044 
6045   // Copy all of the result registers out of their specified physreg.
6046   for (auto &VA : RVLocs) {
6047     // Copy the value out
6048     SDValue RetValue =
6049         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
6050     // Glue the RetValue to the end of the call sequence
6051     Chain = RetValue.getValue(1);
6052     Glue = RetValue.getValue(2);
6053 
6054     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
6055       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
6056       SDValue RetValue2 =
6057           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
6058       Chain = RetValue2.getValue(1);
6059       Glue = RetValue2.getValue(2);
6060       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
6061                              RetValue2);
6062     }
6063 
6064     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
6065 
6066     InVals.push_back(RetValue);
6067   }
6068 
6069   return Chain;
6070 }
6071 
6072 bool RISCVTargetLowering::CanLowerReturn(
6073     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
6074     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
6075   SmallVector<CCValAssign, 16> RVLocs;
6076   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
6077 
6078   Optional<unsigned> FirstMaskArgument;
6079   if (Subtarget.hasStdExtV())
6080     FirstMaskArgument = preAssignMask(Outs);
6081 
6082   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
6083     MVT VT = Outs[i].VT;
6084     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6085     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
6086     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
6087                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
6088                  *this, FirstMaskArgument))
6089       return false;
6090   }
6091   return true;
6092 }
6093 
6094 SDValue
6095 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6096                                  bool IsVarArg,
6097                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
6098                                  const SmallVectorImpl<SDValue> &OutVals,
6099                                  const SDLoc &DL, SelectionDAG &DAG) const {
6100   const MachineFunction &MF = DAG.getMachineFunction();
6101   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
6102 
6103   // Stores the assignment of the return value to a location.
6104   SmallVector<CCValAssign, 16> RVLocs;
6105 
6106   // Info about the registers and stack slot.
6107   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
6108                  *DAG.getContext());
6109 
6110   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
6111                     nullptr);
6112 
6113   if (CallConv == CallingConv::GHC && !RVLocs.empty())
6114     report_fatal_error("GHC functions return void only");
6115 
6116   SDValue Glue;
6117   SmallVector<SDValue, 4> RetOps(1, Chain);
6118 
6119   // Copy the result values into the output registers.
6120   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
6121     SDValue Val = OutVals[i];
6122     CCValAssign &VA = RVLocs[i];
6123     assert(VA.isRegLoc() && "Can only return in registers!");
6124 
6125     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
6126       // Handle returning f64 on RV32D with a soft float ABI.
6127       assert(VA.isRegLoc() && "Expected return via registers");
6128       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
6129                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
6130       SDValue Lo = SplitF64.getValue(0);
6131       SDValue Hi = SplitF64.getValue(1);
6132       Register RegLo = VA.getLocReg();
6133       assert(RegLo < RISCV::X31 && "Invalid register pair");
6134       Register RegHi = RegLo + 1;
6135 
6136       if (STI.isRegisterReservedByUser(RegLo) ||
6137           STI.isRegisterReservedByUser(RegHi))
6138         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
6139             MF.getFunction(),
6140             "Return value register required, but has been reserved."});
6141 
6142       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
6143       Glue = Chain.getValue(1);
6144       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
6145       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
6146       Glue = Chain.getValue(1);
6147       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
6148     } else {
6149       // Handle a 'normal' return.
6150       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
6151       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
6152 
6153       if (STI.isRegisterReservedByUser(VA.getLocReg()))
6154         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
6155             MF.getFunction(),
6156             "Return value register required, but has been reserved."});
6157 
6158       // Guarantee that all emitted copies are stuck together.
6159       Glue = Chain.getValue(1);
6160       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6161     }
6162   }
6163 
6164   RetOps[0] = Chain; // Update chain.
6165 
6166   // Add the glue node if we have it.
6167   if (Glue.getNode()) {
6168     RetOps.push_back(Glue);
6169   }
6170 
6171   // Interrupt service routines use different return instructions.
6172   const Function &Func = DAG.getMachineFunction().getFunction();
6173   if (Func.hasFnAttribute("interrupt")) {
6174     if (!Func.getReturnType()->isVoidTy())
6175       report_fatal_error(
6176           "Functions with the interrupt attribute must have void return type!");
6177 
6178     MachineFunction &MF = DAG.getMachineFunction();
6179     StringRef Kind =
6180       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
6181 
6182     unsigned RetOpc;
6183     if (Kind == "user")
6184       RetOpc = RISCVISD::URET_FLAG;
6185     else if (Kind == "supervisor")
6186       RetOpc = RISCVISD::SRET_FLAG;
6187     else
6188       RetOpc = RISCVISD::MRET_FLAG;
6189 
6190     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
6191   }
6192 
6193   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
6194 }
6195 
6196 void RISCVTargetLowering::validateCCReservedRegs(
6197     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
6198     MachineFunction &MF) const {
6199   const Function &F = MF.getFunction();
6200   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
6201 
6202   if (llvm::any_of(Regs, [&STI](auto Reg) {
6203         return STI.isRegisterReservedByUser(Reg.first);
6204       }))
6205     F.getContext().diagnose(DiagnosticInfoUnsupported{
6206         F, "Argument register required, but has been reserved."});
6207 }
6208 
6209 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
6210   return CI->isTailCall();
6211 }
6212 
6213 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
6214 #define NODE_NAME_CASE(NODE)                                                   \
6215   case RISCVISD::NODE:                                                         \
6216     return "RISCVISD::" #NODE;
6217   // clang-format off
6218   switch ((RISCVISD::NodeType)Opcode) {
6219   case RISCVISD::FIRST_NUMBER:
6220     break;
6221   NODE_NAME_CASE(RET_FLAG)
6222   NODE_NAME_CASE(URET_FLAG)
6223   NODE_NAME_CASE(SRET_FLAG)
6224   NODE_NAME_CASE(MRET_FLAG)
6225   NODE_NAME_CASE(CALL)
6226   NODE_NAME_CASE(SELECT_CC)
6227   NODE_NAME_CASE(BR_CC)
6228   NODE_NAME_CASE(BuildPairF64)
6229   NODE_NAME_CASE(SplitF64)
6230   NODE_NAME_CASE(TAIL)
6231   NODE_NAME_CASE(SLLW)
6232   NODE_NAME_CASE(SRAW)
6233   NODE_NAME_CASE(SRLW)
6234   NODE_NAME_CASE(DIVW)
6235   NODE_NAME_CASE(DIVUW)
6236   NODE_NAME_CASE(REMUW)
6237   NODE_NAME_CASE(ROLW)
6238   NODE_NAME_CASE(RORW)
6239   NODE_NAME_CASE(FSLW)
6240   NODE_NAME_CASE(FSRW)
6241   NODE_NAME_CASE(FSL)
6242   NODE_NAME_CASE(FSR)
6243   NODE_NAME_CASE(FMV_H_X)
6244   NODE_NAME_CASE(FMV_X_ANYEXTH)
6245   NODE_NAME_CASE(FMV_W_X_RV64)
6246   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
6247   NODE_NAME_CASE(READ_CYCLE_WIDE)
6248   NODE_NAME_CASE(GREVI)
6249   NODE_NAME_CASE(GREVIW)
6250   NODE_NAME_CASE(GORCI)
6251   NODE_NAME_CASE(GORCIW)
6252   NODE_NAME_CASE(SHFLI)
6253   NODE_NAME_CASE(VMV_V_X_VL)
6254   NODE_NAME_CASE(VFMV_V_F_VL)
6255   NODE_NAME_CASE(VMV_X_S)
6256   NODE_NAME_CASE(VMV_S_XF_VL)
6257   NODE_NAME_CASE(SPLAT_VECTOR_I64)
6258   NODE_NAME_CASE(READ_VLENB)
6259   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
6260   NODE_NAME_CASE(VLEFF)
6261   NODE_NAME_CASE(VLEFF_MASK)
6262   NODE_NAME_CASE(VSLIDEUP_VL)
6263   NODE_NAME_CASE(VSLIDE1UP_VL)
6264   NODE_NAME_CASE(VSLIDEDOWN_VL)
6265   NODE_NAME_CASE(VID_VL)
6266   NODE_NAME_CASE(VFNCVT_ROD_VL)
6267   NODE_NAME_CASE(VECREDUCE_ADD_VL)
6268   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
6269   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
6270   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
6271   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
6272   NODE_NAME_CASE(VECREDUCE_AND_VL)
6273   NODE_NAME_CASE(VECREDUCE_OR_VL)
6274   NODE_NAME_CASE(VECREDUCE_XOR_VL)
6275   NODE_NAME_CASE(VECREDUCE_FADD_VL)
6276   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
6277   NODE_NAME_CASE(ADD_VL)
6278   NODE_NAME_CASE(AND_VL)
6279   NODE_NAME_CASE(MUL_VL)
6280   NODE_NAME_CASE(OR_VL)
6281   NODE_NAME_CASE(SDIV_VL)
6282   NODE_NAME_CASE(SHL_VL)
6283   NODE_NAME_CASE(SREM_VL)
6284   NODE_NAME_CASE(SRA_VL)
6285   NODE_NAME_CASE(SRL_VL)
6286   NODE_NAME_CASE(SUB_VL)
6287   NODE_NAME_CASE(UDIV_VL)
6288   NODE_NAME_CASE(UREM_VL)
6289   NODE_NAME_CASE(XOR_VL)
6290   NODE_NAME_CASE(FADD_VL)
6291   NODE_NAME_CASE(FSUB_VL)
6292   NODE_NAME_CASE(FMUL_VL)
6293   NODE_NAME_CASE(FDIV_VL)
6294   NODE_NAME_CASE(FNEG_VL)
6295   NODE_NAME_CASE(FABS_VL)
6296   NODE_NAME_CASE(FSQRT_VL)
6297   NODE_NAME_CASE(FMA_VL)
6298   NODE_NAME_CASE(FCOPYSIGN_VL)
6299   NODE_NAME_CASE(SMIN_VL)
6300   NODE_NAME_CASE(SMAX_VL)
6301   NODE_NAME_CASE(UMIN_VL)
6302   NODE_NAME_CASE(UMAX_VL)
6303   NODE_NAME_CASE(MULHS_VL)
6304   NODE_NAME_CASE(MULHU_VL)
6305   NODE_NAME_CASE(FP_TO_SINT_VL)
6306   NODE_NAME_CASE(FP_TO_UINT_VL)
6307   NODE_NAME_CASE(SINT_TO_FP_VL)
6308   NODE_NAME_CASE(UINT_TO_FP_VL)
6309   NODE_NAME_CASE(FP_EXTEND_VL)
6310   NODE_NAME_CASE(FP_ROUND_VL)
6311   NODE_NAME_CASE(SETCC_VL)
6312   NODE_NAME_CASE(VSELECT_VL)
6313   NODE_NAME_CASE(VMAND_VL)
6314   NODE_NAME_CASE(VMOR_VL)
6315   NODE_NAME_CASE(VMXOR_VL)
6316   NODE_NAME_CASE(VMCLR_VL)
6317   NODE_NAME_CASE(VMSET_VL)
6318   NODE_NAME_CASE(VRGATHER_VX_VL)
6319   NODE_NAME_CASE(VRGATHER_VV_VL)
6320   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
6321   NODE_NAME_CASE(VSEXT_VL)
6322   NODE_NAME_CASE(VZEXT_VL)
6323   NODE_NAME_CASE(VLE_VL)
6324   NODE_NAME_CASE(VSE_VL)
6325   }
6326   // clang-format on
6327   return nullptr;
6328 #undef NODE_NAME_CASE
6329 }
6330 
6331 /// getConstraintType - Given a constraint letter, return the type of
6332 /// constraint it is for this target.
6333 RISCVTargetLowering::ConstraintType
6334 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
6335   if (Constraint.size() == 1) {
6336     switch (Constraint[0]) {
6337     default:
6338       break;
6339     case 'f':
6340     case 'v':
6341       return C_RegisterClass;
6342     case 'I':
6343     case 'J':
6344     case 'K':
6345       return C_Immediate;
6346     case 'A':
6347       return C_Memory;
6348     }
6349   }
6350   return TargetLowering::getConstraintType(Constraint);
6351 }
6352 
6353 std::pair<unsigned, const TargetRegisterClass *>
6354 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
6355                                                   StringRef Constraint,
6356                                                   MVT VT) const {
6357   // First, see if this is a constraint that directly corresponds to a
6358   // RISCV register class.
6359   if (Constraint.size() == 1) {
6360     switch (Constraint[0]) {
6361     case 'r':
6362       return std::make_pair(0U, &RISCV::GPRRegClass);
6363     case 'f':
6364       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
6365         return std::make_pair(0U, &RISCV::FPR16RegClass);
6366       if (Subtarget.hasStdExtF() && VT == MVT::f32)
6367         return std::make_pair(0U, &RISCV::FPR32RegClass);
6368       if (Subtarget.hasStdExtD() && VT == MVT::f64)
6369         return std::make_pair(0U, &RISCV::FPR64RegClass);
6370       break;
6371     case 'v':
6372       for (const auto *RC :
6373            {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
6374             &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
6375         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
6376           return std::make_pair(0U, RC);
6377       }
6378       break;
6379     default:
6380       break;
6381     }
6382   }
6383 
6384   // Clang will correctly decode the usage of register name aliases into their
6385   // official names. However, other frontends like `rustc` do not. This allows
6386   // users of these frontends to use the ABI names for registers in LLVM-style
6387   // register constraints.
6388   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
6389                                .Case("{zero}", RISCV::X0)
6390                                .Case("{ra}", RISCV::X1)
6391                                .Case("{sp}", RISCV::X2)
6392                                .Case("{gp}", RISCV::X3)
6393                                .Case("{tp}", RISCV::X4)
6394                                .Case("{t0}", RISCV::X5)
6395                                .Case("{t1}", RISCV::X6)
6396                                .Case("{t2}", RISCV::X7)
6397                                .Cases("{s0}", "{fp}", RISCV::X8)
6398                                .Case("{s1}", RISCV::X9)
6399                                .Case("{a0}", RISCV::X10)
6400                                .Case("{a1}", RISCV::X11)
6401                                .Case("{a2}", RISCV::X12)
6402                                .Case("{a3}", RISCV::X13)
6403                                .Case("{a4}", RISCV::X14)
6404                                .Case("{a5}", RISCV::X15)
6405                                .Case("{a6}", RISCV::X16)
6406                                .Case("{a7}", RISCV::X17)
6407                                .Case("{s2}", RISCV::X18)
6408                                .Case("{s3}", RISCV::X19)
6409                                .Case("{s4}", RISCV::X20)
6410                                .Case("{s5}", RISCV::X21)
6411                                .Case("{s6}", RISCV::X22)
6412                                .Case("{s7}", RISCV::X23)
6413                                .Case("{s8}", RISCV::X24)
6414                                .Case("{s9}", RISCV::X25)
6415                                .Case("{s10}", RISCV::X26)
6416                                .Case("{s11}", RISCV::X27)
6417                                .Case("{t3}", RISCV::X28)
6418                                .Case("{t4}", RISCV::X29)
6419                                .Case("{t5}", RISCV::X30)
6420                                .Case("{t6}", RISCV::X31)
6421                                .Default(RISCV::NoRegister);
6422   if (XRegFromAlias != RISCV::NoRegister)
6423     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
6424 
6425   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
6426   // TableGen record rather than the AsmName to choose registers for InlineAsm
6427   // constraints, plus we want to match those names to the widest floating point
6428   // register type available, manually select floating point registers here.
6429   //
6430   // The second case is the ABI name of the register, so that frontends can also
6431   // use the ABI names in register constraint lists.
6432   if (Subtarget.hasStdExtF()) {
6433     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
6434                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
6435                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
6436                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
6437                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
6438                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
6439                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
6440                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
6441                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
6442                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
6443                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
6444                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
6445                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
6446                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
6447                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
6448                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
6449                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
6450                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
6451                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
6452                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
6453                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
6454                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
6455                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
6456                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
6457                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
6458                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
6459                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
6460                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
6461                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
6462                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
6463                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
6464                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
6465                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
6466                         .Default(RISCV::NoRegister);
6467     if (FReg != RISCV::NoRegister) {
6468       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
6469       if (Subtarget.hasStdExtD()) {
6470         unsigned RegNo = FReg - RISCV::F0_F;
6471         unsigned DReg = RISCV::F0_D + RegNo;
6472         return std::make_pair(DReg, &RISCV::FPR64RegClass);
6473       }
6474       return std::make_pair(FReg, &RISCV::FPR32RegClass);
6475     }
6476   }
6477 
6478   if (Subtarget.hasStdExtV()) {
6479     Register VReg = StringSwitch<Register>(Constraint.lower())
6480                         .Case("{v0}", RISCV::V0)
6481                         .Case("{v1}", RISCV::V1)
6482                         .Case("{v2}", RISCV::V2)
6483                         .Case("{v3}", RISCV::V3)
6484                         .Case("{v4}", RISCV::V4)
6485                         .Case("{v5}", RISCV::V5)
6486                         .Case("{v6}", RISCV::V6)
6487                         .Case("{v7}", RISCV::V7)
6488                         .Case("{v8}", RISCV::V8)
6489                         .Case("{v9}", RISCV::V9)
6490                         .Case("{v10}", RISCV::V10)
6491                         .Case("{v11}", RISCV::V11)
6492                         .Case("{v12}", RISCV::V12)
6493                         .Case("{v13}", RISCV::V13)
6494                         .Case("{v14}", RISCV::V14)
6495                         .Case("{v15}", RISCV::V15)
6496                         .Case("{v16}", RISCV::V16)
6497                         .Case("{v17}", RISCV::V17)
6498                         .Case("{v18}", RISCV::V18)
6499                         .Case("{v19}", RISCV::V19)
6500                         .Case("{v20}", RISCV::V20)
6501                         .Case("{v21}", RISCV::V21)
6502                         .Case("{v22}", RISCV::V22)
6503                         .Case("{v23}", RISCV::V23)
6504                         .Case("{v24}", RISCV::V24)
6505                         .Case("{v25}", RISCV::V25)
6506                         .Case("{v26}", RISCV::V26)
6507                         .Case("{v27}", RISCV::V27)
6508                         .Case("{v28}", RISCV::V28)
6509                         .Case("{v29}", RISCV::V29)
6510                         .Case("{v30}", RISCV::V30)
6511                         .Case("{v31}", RISCV::V31)
6512                         .Default(RISCV::NoRegister);
6513     if (VReg != RISCV::NoRegister) {
6514       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
6515         return std::make_pair(VReg, &RISCV::VMRegClass);
6516       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
6517         return std::make_pair(VReg, &RISCV::VRRegClass);
6518       for (const auto *RC :
6519            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
6520         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
6521           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
6522           return std::make_pair(VReg, RC);
6523         }
6524       }
6525     }
6526   }
6527 
6528   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
6529 }
6530 
6531 unsigned
6532 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
6533   // Currently only support length 1 constraints.
6534   if (ConstraintCode.size() == 1) {
6535     switch (ConstraintCode[0]) {
6536     case 'A':
6537       return InlineAsm::Constraint_A;
6538     default:
6539       break;
6540     }
6541   }
6542 
6543   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
6544 }
6545 
6546 void RISCVTargetLowering::LowerAsmOperandForConstraint(
6547     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
6548     SelectionDAG &DAG) const {
6549   // Currently only support length 1 constraints.
6550   if (Constraint.length() == 1) {
6551     switch (Constraint[0]) {
6552     case 'I':
6553       // Validate & create a 12-bit signed immediate operand.
6554       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6555         uint64_t CVal = C->getSExtValue();
6556         if (isInt<12>(CVal))
6557           Ops.push_back(
6558               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
6559       }
6560       return;
6561     case 'J':
6562       // Validate & create an integer zero operand.
6563       if (auto *C = dyn_cast<ConstantSDNode>(Op))
6564         if (C->getZExtValue() == 0)
6565           Ops.push_back(
6566               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
6567       return;
6568     case 'K':
6569       // Validate & create a 5-bit unsigned immediate operand.
6570       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6571         uint64_t CVal = C->getZExtValue();
6572         if (isUInt<5>(CVal))
6573           Ops.push_back(
6574               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
6575       }
6576       return;
6577     default:
6578       break;
6579     }
6580   }
6581   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6582 }
6583 
6584 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
6585                                                    Instruction *Inst,
6586                                                    AtomicOrdering Ord) const {
6587   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
6588     return Builder.CreateFence(Ord);
6589   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
6590     return Builder.CreateFence(AtomicOrdering::Release);
6591   return nullptr;
6592 }
6593 
6594 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
6595                                                     Instruction *Inst,
6596                                                     AtomicOrdering Ord) const {
6597   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
6598     return Builder.CreateFence(AtomicOrdering::Acquire);
6599   return nullptr;
6600 }
6601 
6602 TargetLowering::AtomicExpansionKind
6603 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
6604   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
6605   // point operations can't be used in an lr/sc sequence without breaking the
6606   // forward-progress guarantee.
6607   if (AI->isFloatingPointOperation())
6608     return AtomicExpansionKind::CmpXChg;
6609 
6610   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
6611   if (Size == 8 || Size == 16)
6612     return AtomicExpansionKind::MaskedIntrinsic;
6613   return AtomicExpansionKind::None;
6614 }
6615 
6616 static Intrinsic::ID
6617 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
6618   if (XLen == 32) {
6619     switch (BinOp) {
6620     default:
6621       llvm_unreachable("Unexpected AtomicRMW BinOp");
6622     case AtomicRMWInst::Xchg:
6623       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
6624     case AtomicRMWInst::Add:
6625       return Intrinsic::riscv_masked_atomicrmw_add_i32;
6626     case AtomicRMWInst::Sub:
6627       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
6628     case AtomicRMWInst::Nand:
6629       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
6630     case AtomicRMWInst::Max:
6631       return Intrinsic::riscv_masked_atomicrmw_max_i32;
6632     case AtomicRMWInst::Min:
6633       return Intrinsic::riscv_masked_atomicrmw_min_i32;
6634     case AtomicRMWInst::UMax:
6635       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
6636     case AtomicRMWInst::UMin:
6637       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
6638     }
6639   }
6640 
6641   if (XLen == 64) {
6642     switch (BinOp) {
6643     default:
6644       llvm_unreachable("Unexpected AtomicRMW BinOp");
6645     case AtomicRMWInst::Xchg:
6646       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
6647     case AtomicRMWInst::Add:
6648       return Intrinsic::riscv_masked_atomicrmw_add_i64;
6649     case AtomicRMWInst::Sub:
6650       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
6651     case AtomicRMWInst::Nand:
6652       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
6653     case AtomicRMWInst::Max:
6654       return Intrinsic::riscv_masked_atomicrmw_max_i64;
6655     case AtomicRMWInst::Min:
6656       return Intrinsic::riscv_masked_atomicrmw_min_i64;
6657     case AtomicRMWInst::UMax:
6658       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
6659     case AtomicRMWInst::UMin:
6660       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
6661     }
6662   }
6663 
6664   llvm_unreachable("Unexpected XLen\n");
6665 }
6666 
6667 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
6668     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
6669     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
6670   unsigned XLen = Subtarget.getXLen();
6671   Value *Ordering =
6672       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
6673   Type *Tys[] = {AlignedAddr->getType()};
6674   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
6675       AI->getModule(),
6676       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
6677 
6678   if (XLen == 64) {
6679     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
6680     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6681     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
6682   }
6683 
6684   Value *Result;
6685 
6686   // Must pass the shift amount needed to sign extend the loaded value prior
6687   // to performing a signed comparison for min/max. ShiftAmt is the number of
6688   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
6689   // is the number of bits to left+right shift the value in order to
6690   // sign-extend.
6691   if (AI->getOperation() == AtomicRMWInst::Min ||
6692       AI->getOperation() == AtomicRMWInst::Max) {
6693     const DataLayout &DL = AI->getModule()->getDataLayout();
6694     unsigned ValWidth =
6695         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
6696     Value *SextShamt =
6697         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
6698     Result = Builder.CreateCall(LrwOpScwLoop,
6699                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
6700   } else {
6701     Result =
6702         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
6703   }
6704 
6705   if (XLen == 64)
6706     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6707   return Result;
6708 }
6709 
6710 TargetLowering::AtomicExpansionKind
6711 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
6712     AtomicCmpXchgInst *CI) const {
6713   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
6714   if (Size == 8 || Size == 16)
6715     return AtomicExpansionKind::MaskedIntrinsic;
6716   return AtomicExpansionKind::None;
6717 }
6718 
6719 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
6720     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
6721     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
6722   unsigned XLen = Subtarget.getXLen();
6723   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
6724   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
6725   if (XLen == 64) {
6726     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
6727     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
6728     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6729     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
6730   }
6731   Type *Tys[] = {AlignedAddr->getType()};
6732   Function *MaskedCmpXchg =
6733       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
6734   Value *Result = Builder.CreateCall(
6735       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
6736   if (XLen == 64)
6737     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6738   return Result;
6739 }
6740 
6741 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
6742                                                      EVT VT) const {
6743   VT = VT.getScalarType();
6744 
6745   if (!VT.isSimple())
6746     return false;
6747 
6748   switch (VT.getSimpleVT().SimpleTy) {
6749   case MVT::f16:
6750     return Subtarget.hasStdExtZfh();
6751   case MVT::f32:
6752     return Subtarget.hasStdExtF();
6753   case MVT::f64:
6754     return Subtarget.hasStdExtD();
6755   default:
6756     break;
6757   }
6758 
6759   return false;
6760 }
6761 
6762 Register RISCVTargetLowering::getExceptionPointerRegister(
6763     const Constant *PersonalityFn) const {
6764   return RISCV::X10;
6765 }
6766 
6767 Register RISCVTargetLowering::getExceptionSelectorRegister(
6768     const Constant *PersonalityFn) const {
6769   return RISCV::X11;
6770 }
6771 
6772 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
6773   // Return false to suppress the unnecessary extensions if the LibCall
6774   // arguments or return value is f32 type for LP64 ABI.
6775   RISCVABI::ABI ABI = Subtarget.getTargetABI();
6776   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
6777     return false;
6778 
6779   return true;
6780 }
6781 
6782 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
6783   if (Subtarget.is64Bit() && Type == MVT::i32)
6784     return true;
6785 
6786   return IsSigned;
6787 }
6788 
6789 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
6790                                                  SDValue C) const {
6791   // Check integral scalar types.
6792   if (VT.isScalarInteger()) {
6793     // Omit the optimization if the sub target has the M extension and the data
6794     // size exceeds XLen.
6795     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
6796       return false;
6797     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6798       // Break the MUL to a SLLI and an ADD/SUB.
6799       const APInt &Imm = ConstNode->getAPIntValue();
6800       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6801           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6802         return true;
6803       // Omit the following optimization if the sub target has the M extension
6804       // and the data size >= XLen.
6805       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
6806         return false;
6807       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
6808       // a pair of LUI/ADDI.
6809       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
6810         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
6811         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
6812             (1 - ImmS).isPowerOf2())
6813         return true;
6814       }
6815     }
6816   }
6817 
6818   return false;
6819 }
6820 
6821 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
6822   if (!Subtarget.useRVVForFixedLengthVectors())
6823     return false;
6824 
6825   if (!VT.isFixedLengthVector())
6826     return false;
6827 
6828   // Don't use RVV for vectors we cannot scalarize if required.
6829   switch (VT.getVectorElementType().SimpleTy) {
6830   // i1 is supported but has different rules.
6831   default:
6832     return false;
6833   case MVT::i1:
6834     // Masks can only use a single register.
6835     if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits())
6836       return false;
6837     break;
6838   case MVT::i8:
6839   case MVT::i16:
6840   case MVT::i32:
6841   case MVT::i64:
6842     break;
6843   case MVT::f16:
6844     if (!Subtarget.hasStdExtZfh())
6845       return false;
6846     break;
6847   case MVT::f32:
6848     if (!Subtarget.hasStdExtF())
6849       return false;
6850     break;
6851   case MVT::f64:
6852     if (!Subtarget.hasStdExtD())
6853       return false;
6854     break;
6855   }
6856 
6857   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
6858   // Don't use RVV for types that don't fit.
6859   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
6860     return false;
6861 
6862   // TODO: Perhaps an artificial restriction, but worth having whilst getting
6863   // the base fixed length RVV support in place.
6864   if (!VT.isPow2VectorType())
6865     return false;
6866 
6867   return true;
6868 }
6869 
6870 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
6871     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
6872     bool *Fast) const {
6873   if (!VT.isScalableVector())
6874     return false;
6875 
6876   EVT ElemVT = VT.getVectorElementType();
6877   if (Alignment >= ElemVT.getStoreSize()) {
6878     if (Fast)
6879       *Fast = true;
6880     return true;
6881   }
6882 
6883   return false;
6884 }
6885 
6886 bool RISCVTargetLowering::splitValueIntoRegisterParts(
6887     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
6888     unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
6889   EVT ValueVT = Val.getValueType();
6890   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
6891     LLVMContext &Context = *DAG.getContext();
6892     EVT ValueEltVT = ValueVT.getVectorElementType();
6893     EVT PartEltVT = PartVT.getVectorElementType();
6894     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
6895     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
6896     if (PartVTBitSize % ValueVTBitSize == 0) {
6897       // If the element types are different, bitcast to the same element type of
6898       // PartVT first.
6899       if (ValueEltVT != PartEltVT) {
6900         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
6901         assert(Count != 0 && "The number of element should not be zero.");
6902         EVT SameEltTypeVT =
6903             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
6904         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
6905       }
6906       Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
6907                         Val, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
6908       Parts[0] = Val;
6909       return true;
6910     }
6911   }
6912   return false;
6913 }
6914 
6915 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
6916     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
6917     MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
6918   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
6919     LLVMContext &Context = *DAG.getContext();
6920     SDValue Val = Parts[0];
6921     EVT ValueEltVT = ValueVT.getVectorElementType();
6922     EVT PartEltVT = PartVT.getVectorElementType();
6923     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
6924     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
6925     if (PartVTBitSize % ValueVTBitSize == 0) {
6926       EVT SameEltTypeVT = ValueVT;
6927       // If the element types are different, convert it to the same element type
6928       // of PartVT.
6929       if (ValueEltVT != PartEltVT) {
6930         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
6931         assert(Count != 0 && "The number of element should not be zero.");
6932         SameEltTypeVT =
6933             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
6934       }
6935       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val,
6936                         DAG.getConstant(0, DL, Subtarget.getXLenVT()));
6937       if (ValueEltVT != PartEltVT)
6938         Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
6939       return Val;
6940     }
6941   }
6942   return SDValue();
6943 }
6944 
6945 #define GET_REGISTER_MATCHER
6946 #include "RISCVGenAsmMatcher.inc"
6947 
6948 Register
6949 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
6950                                        const MachineFunction &MF) const {
6951   Register Reg = MatchRegisterAltName(RegName);
6952   if (Reg == RISCV::NoRegister)
6953     Reg = MatchRegisterName(RegName);
6954   if (Reg == RISCV::NoRegister)
6955     report_fatal_error(
6956         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6957   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6958   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
6959     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6960                              StringRef(RegName) + "\"."));
6961   return Reg;
6962 }
6963 
6964 namespace llvm {
6965 namespace RISCVVIntrinsicsTable {
6966 
6967 #define GET_RISCVVIntrinsicsTable_IMPL
6968 #include "RISCVGenSearchableTables.inc"
6969 
6970 } // namespace RISCVVIntrinsicsTable
6971 
6972 } // namespace llvm
6973