1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/CodeGen/ValueTypes.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/IR/DiagnosticPrinter.h"
31 #include "llvm/IR/IntrinsicsRISCV.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtZfh())
87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88   if (Subtarget.hasStdExtF())
89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90   if (Subtarget.hasStdExtD())
91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93   static const MVT::SimpleValueType BoolVecVTs[] = {
94       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
95       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
96   static const MVT::SimpleValueType IntVecVTs[] = {
97       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
98       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
99       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
100       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
101       MVT::nxv4i64, MVT::nxv8i64};
102   static const MVT::SimpleValueType F16VecVTs[] = {
103       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
104       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
105   static const MVT::SimpleValueType F32VecVTs[] = {
106       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
107   static const MVT::SimpleValueType F64VecVTs[] = {
108       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
109 
110   if (Subtarget.hasStdExtV()) {
111     auto addRegClassForRVV = [this](MVT VT) {
112       unsigned Size = VT.getSizeInBits().getKnownMinValue();
113       assert(Size <= 512 && isPowerOf2_32(Size));
114       const TargetRegisterClass *RC;
115       if (Size <= 64)
116         RC = &RISCV::VRRegClass;
117       else if (Size == 128)
118         RC = &RISCV::VRM2RegClass;
119       else if (Size == 256)
120         RC = &RISCV::VRM4RegClass;
121       else
122         RC = &RISCV::VRM8RegClass;
123 
124       addRegisterClass(VT, RC);
125     };
126 
127     for (MVT VT : BoolVecVTs)
128       addRegClassForRVV(VT);
129     for (MVT VT : IntVecVTs)
130       addRegClassForRVV(VT);
131 
132     if (Subtarget.hasStdExtZfh())
133       for (MVT VT : F16VecVTs)
134         addRegClassForRVV(VT);
135 
136     if (Subtarget.hasStdExtF())
137       for (MVT VT : F32VecVTs)
138         addRegClassForRVV(VT);
139 
140     if (Subtarget.hasStdExtD())
141       for (MVT VT : F64VecVTs)
142         addRegClassForRVV(VT);
143 
144     if (Subtarget.useRVVForFixedLengthVectors()) {
145       auto addRegClassForFixedVectors = [this](MVT VT) {
146         MVT ContainerVT = getContainerForFixedLengthVector(VT);
147         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
148         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
149         addRegisterClass(VT, TRI.getRegClass(RCID));
150       };
151       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
152         if (useRVVForFixedLengthVectorVT(VT))
153           addRegClassForFixedVectors(VT);
154 
155       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
156         if (useRVVForFixedLengthVectorVT(VT))
157           addRegClassForFixedVectors(VT);
158     }
159   }
160 
161   // Compute derived properties from the register classes.
162   computeRegisterProperties(STI.getRegisterInfo());
163 
164   setStackPointerRegisterToSaveRestore(RISCV::X2);
165 
166   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
167     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
168 
169   // TODO: add all necessary setOperationAction calls.
170   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
171 
172   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
173   setOperationAction(ISD::BR_CC, XLenVT, Expand);
174   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
175   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
176 
177   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
178   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
179 
180   setOperationAction(ISD::VASTART, MVT::Other, Custom);
181   setOperationAction(ISD::VAARG, MVT::Other, Expand);
182   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
183   setOperationAction(ISD::VAEND, MVT::Other, Expand);
184 
185   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
186   if (!Subtarget.hasStdExtZbb()) {
187     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
188     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
189   }
190 
191   if (Subtarget.is64Bit()) {
192     setOperationAction(ISD::ADD, MVT::i32, Custom);
193     setOperationAction(ISD::SUB, MVT::i32, Custom);
194     setOperationAction(ISD::SHL, MVT::i32, Custom);
195     setOperationAction(ISD::SRA, MVT::i32, Custom);
196     setOperationAction(ISD::SRL, MVT::i32, Custom);
197 
198     setOperationAction(ISD::UADDO, MVT::i32, Custom);
199     setOperationAction(ISD::USUBO, MVT::i32, Custom);
200     setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
201     setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
202   }
203 
204   if (!Subtarget.hasStdExtM()) {
205     setOperationAction(ISD::MUL, XLenVT, Expand);
206     setOperationAction(ISD::MULHS, XLenVT, Expand);
207     setOperationAction(ISD::MULHU, XLenVT, Expand);
208     setOperationAction(ISD::SDIV, XLenVT, Expand);
209     setOperationAction(ISD::UDIV, XLenVT, Expand);
210     setOperationAction(ISD::SREM, XLenVT, Expand);
211     setOperationAction(ISD::UREM, XLenVT, Expand);
212   } else {
213     if (Subtarget.is64Bit()) {
214       setOperationAction(ISD::MUL, MVT::i32, Custom);
215       setOperationAction(ISD::MUL, MVT::i128, Custom);
216 
217       setOperationAction(ISD::SDIV, MVT::i8, Custom);
218       setOperationAction(ISD::UDIV, MVT::i8, Custom);
219       setOperationAction(ISD::UREM, MVT::i8, Custom);
220       setOperationAction(ISD::SDIV, MVT::i16, Custom);
221       setOperationAction(ISD::UDIV, MVT::i16, Custom);
222       setOperationAction(ISD::UREM, MVT::i16, Custom);
223       setOperationAction(ISD::SDIV, MVT::i32, Custom);
224       setOperationAction(ISD::UDIV, MVT::i32, Custom);
225       setOperationAction(ISD::UREM, MVT::i32, Custom);
226     } else {
227       setOperationAction(ISD::MUL, MVT::i64, Custom);
228     }
229   }
230 
231   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
232   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
233   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
234   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
235 
236   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
237   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
238   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
239 
240   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
241     if (Subtarget.is64Bit()) {
242       setOperationAction(ISD::ROTL, MVT::i32, Custom);
243       setOperationAction(ISD::ROTR, MVT::i32, Custom);
244     }
245   } else {
246     setOperationAction(ISD::ROTL, XLenVT, Expand);
247     setOperationAction(ISD::ROTR, XLenVT, Expand);
248   }
249 
250   if (Subtarget.hasStdExtZbp()) {
251     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
252     // more combining.
253     setOperationAction(ISD::BITREVERSE, XLenVT,   Custom);
254     setOperationAction(ISD::BSWAP,      XLenVT,   Custom);
255     setOperationAction(ISD::BITREVERSE, MVT::i8,  Custom);
256     // BSWAP i8 doesn't exist.
257     setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
258     setOperationAction(ISD::BSWAP,      MVT::i16, Custom);
259 
260     if (Subtarget.is64Bit()) {
261       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
262       setOperationAction(ISD::BSWAP,      MVT::i32, Custom);
263     }
264   } else {
265     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
266     // pattern match it directly in isel.
267     setOperationAction(ISD::BSWAP, XLenVT,
268                        Subtarget.hasStdExtZbb() ? Legal : Expand);
269   }
270 
271   if (Subtarget.hasStdExtZbb()) {
272     setOperationAction(ISD::SMIN, XLenVT, Legal);
273     setOperationAction(ISD::SMAX, XLenVT, Legal);
274     setOperationAction(ISD::UMIN, XLenVT, Legal);
275     setOperationAction(ISD::UMAX, XLenVT, Legal);
276 
277     if (Subtarget.is64Bit()) {
278       setOperationAction(ISD::CTTZ, MVT::i32, Custom);
279       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
280       setOperationAction(ISD::CTLZ, MVT::i32, Custom);
281       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
282     }
283   } else {
284     setOperationAction(ISD::CTTZ, XLenVT, Expand);
285     setOperationAction(ISD::CTLZ, XLenVT, Expand);
286     setOperationAction(ISD::CTPOP, XLenVT, Expand);
287   }
288 
289   if (Subtarget.hasStdExtZbt()) {
290     setOperationAction(ISD::FSHL, XLenVT, Custom);
291     setOperationAction(ISD::FSHR, XLenVT, Custom);
292     setOperationAction(ISD::SELECT, XLenVT, Legal);
293 
294     if (Subtarget.is64Bit()) {
295       setOperationAction(ISD::FSHL, MVT::i32, Custom);
296       setOperationAction(ISD::FSHR, MVT::i32, Custom);
297     }
298   } else {
299     setOperationAction(ISD::SELECT, XLenVT, Custom);
300   }
301 
302   ISD::CondCode FPCCToExpand[] = {
303       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
304       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
305       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
306 
307   ISD::NodeType FPOpToExpand[] = {
308       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
309       ISD::FP_TO_FP16};
310 
311   if (Subtarget.hasStdExtZfh())
312     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
313 
314   if (Subtarget.hasStdExtZfh()) {
315     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
316     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
317     for (auto CC : FPCCToExpand)
318       setCondCodeAction(CC, MVT::f16, Expand);
319     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
320     setOperationAction(ISD::SELECT, MVT::f16, Custom);
321     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
322     for (auto Op : FPOpToExpand)
323       setOperationAction(Op, MVT::f16, Expand);
324   }
325 
326   if (Subtarget.hasStdExtF()) {
327     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
328     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
329     for (auto CC : FPCCToExpand)
330       setCondCodeAction(CC, MVT::f32, Expand);
331     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
332     setOperationAction(ISD::SELECT, MVT::f32, Custom);
333     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
334     for (auto Op : FPOpToExpand)
335       setOperationAction(Op, MVT::f32, Expand);
336     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
337     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
338   }
339 
340   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
341     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
342 
343   if (Subtarget.hasStdExtD()) {
344     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
345     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
346     for (auto CC : FPCCToExpand)
347       setCondCodeAction(CC, MVT::f64, Expand);
348     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
349     setOperationAction(ISD::SELECT, MVT::f64, Custom);
350     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
351     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
352     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
353     for (auto Op : FPOpToExpand)
354       setOperationAction(Op, MVT::f64, Expand);
355     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
356     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
357   }
358 
359   if (Subtarget.is64Bit()) {
360     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
361     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
362     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
363     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
364   }
365 
366   if (Subtarget.hasStdExtF()) {
367     setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
368     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
369   }
370 
371   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
372   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
373   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
374   setOperationAction(ISD::JumpTable, XLenVT, Custom);
375 
376   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
377 
378   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
379   // Unfortunately this can't be determined just from the ISA naming string.
380   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
381                      Subtarget.is64Bit() ? Legal : Custom);
382 
383   setOperationAction(ISD::TRAP, MVT::Other, Legal);
384   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
385   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
386   if (Subtarget.is64Bit())
387     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
388 
389   if (Subtarget.hasStdExtA()) {
390     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
391     setMinCmpXchgSizeInBits(32);
392   } else {
393     setMaxAtomicSizeInBitsSupported(0);
394   }
395 
396   setBooleanContents(ZeroOrOneBooleanContent);
397 
398   if (Subtarget.hasStdExtV()) {
399     setBooleanVectorContents(ZeroOrOneBooleanContent);
400 
401     setOperationAction(ISD::VSCALE, XLenVT, Custom);
402 
403     // RVV intrinsics may have illegal operands.
404     // We also need to custom legalize vmv.x.s.
405     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
406     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
407     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
408     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
409     if (Subtarget.is64Bit()) {
410       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
411     } else {
412       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
413       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
414     }
415 
416     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
417 
418     static unsigned IntegerVPOps[] = {
419         ISD::VP_ADD,  ISD::VP_SUB,  ISD::VP_MUL, ISD::VP_SDIV, ISD::VP_UDIV,
420         ISD::VP_SREM, ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,   ISD::VP_XOR,
421         ISD::VP_ASHR, ISD::VP_LSHR, ISD::VP_SHL};
422 
423     static unsigned FloatingPointVPOps[] = {ISD::VP_FADD, ISD::VP_FSUB,
424                                             ISD::VP_FMUL, ISD::VP_FDIV};
425 
426     if (!Subtarget.is64Bit()) {
427       // We must custom-lower certain vXi64 operations on RV32 due to the vector
428       // element type being illegal.
429       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
430       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
431 
432       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
433       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
434       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
435       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
436       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
437       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
438       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
439       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
440     }
441 
442     for (MVT VT : BoolVecVTs) {
443       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
444 
445       // Mask VTs are custom-expanded into a series of standard nodes
446       setOperationAction(ISD::TRUNCATE, VT, Custom);
447       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
448       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
449       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
450 
451       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
452       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
453 
454       setOperationAction(ISD::SELECT, VT, Expand);
455       setOperationAction(ISD::SELECT_CC, VT, Expand);
456       setOperationAction(ISD::VSELECT, VT, Expand);
457 
458       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
459       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
460       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
461 
462       // RVV has native int->float & float->int conversions where the
463       // element type sizes are within one power-of-two of each other. Any
464       // wider distances between type sizes have to be lowered as sequences
465       // which progressively narrow the gap in stages.
466       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
467       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
468       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
469       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
470 
471       // Expand all extending loads to types larger than this, and truncating
472       // stores from types larger than this.
473       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
474         setTruncStoreAction(OtherVT, VT, Expand);
475         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
476         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
477         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
478       }
479     }
480 
481     for (MVT VT : IntVecVTs) {
482       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
483       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
484 
485       setOperationAction(ISD::SMIN, VT, Legal);
486       setOperationAction(ISD::SMAX, VT, Legal);
487       setOperationAction(ISD::UMIN, VT, Legal);
488       setOperationAction(ISD::UMAX, VT, Legal);
489 
490       setOperationAction(ISD::ROTL, VT, Expand);
491       setOperationAction(ISD::ROTR, VT, Expand);
492 
493       // Custom-lower extensions and truncations from/to mask types.
494       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
495       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
496       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
497 
498       // RVV has native int->float & float->int conversions where the
499       // element type sizes are within one power-of-two of each other. Any
500       // wider distances between type sizes have to be lowered as sequences
501       // which progressively narrow the gap in stages.
502       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
503       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
504       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
505       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
506 
507       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
508       // nodes which truncate by one power of two at a time.
509       setOperationAction(ISD::TRUNCATE, VT, Custom);
510 
511       // Custom-lower insert/extract operations to simplify patterns.
512       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
513       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
514 
515       // Custom-lower reduction operations to set up the corresponding custom
516       // nodes' operands.
517       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
518       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
519       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
520       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
521       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
522       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
523       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
524       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
525 
526       for (unsigned VPOpc : IntegerVPOps)
527         setOperationAction(VPOpc, VT, Custom);
528 
529       setOperationAction(ISD::LOAD, VT, Custom);
530       setOperationAction(ISD::STORE, VT, Custom);
531 
532       setOperationAction(ISD::MLOAD, VT, Custom);
533       setOperationAction(ISD::MSTORE, VT, Custom);
534       setOperationAction(ISD::MGATHER, VT, Custom);
535       setOperationAction(ISD::MSCATTER, VT, Custom);
536 
537       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
538       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
539       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
540 
541       setOperationAction(ISD::SELECT, VT, Expand);
542       setOperationAction(ISD::SELECT_CC, VT, Expand);
543 
544       setOperationAction(ISD::STEP_VECTOR, VT, Custom);
545       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
546 
547       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
548         setTruncStoreAction(VT, OtherVT, Expand);
549         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
550         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
551         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
552       }
553     }
554 
555     // Expand various CCs to best match the RVV ISA, which natively supports UNE
556     // but no other unordered comparisons, and supports all ordered comparisons
557     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
558     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
559     // and we pattern-match those back to the "original", swapping operands once
560     // more. This way we catch both operations and both "vf" and "fv" forms with
561     // fewer patterns.
562     ISD::CondCode VFPCCToExpand[] = {
563         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
564         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
565         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
566     };
567 
568     // Sets common operation actions on RVV floating-point vector types.
569     const auto SetCommonVFPActions = [&](MVT VT) {
570       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
571       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
572       // sizes are within one power-of-two of each other. Therefore conversions
573       // between vXf16 and vXf64 must be lowered as sequences which convert via
574       // vXf32.
575       setOperationAction(ISD::FP_ROUND, VT, Custom);
576       setOperationAction(ISD::FP_EXTEND, VT, Custom);
577       // Custom-lower insert/extract operations to simplify patterns.
578       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
579       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
580       // Expand various condition codes (explained above).
581       for (auto CC : VFPCCToExpand)
582         setCondCodeAction(CC, VT, Expand);
583 
584       setOperationAction(ISD::FMINNUM, VT, Legal);
585       setOperationAction(ISD::FMAXNUM, VT, Legal);
586 
587       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
588       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
589       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
590       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
591       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
592 
593       setOperationAction(ISD::LOAD, VT, Custom);
594       setOperationAction(ISD::STORE, VT, Custom);
595 
596       setOperationAction(ISD::MLOAD, VT, Custom);
597       setOperationAction(ISD::MSTORE, VT, Custom);
598       setOperationAction(ISD::MGATHER, VT, Custom);
599       setOperationAction(ISD::MSCATTER, VT, Custom);
600 
601       setOperationAction(ISD::SELECT, VT, Expand);
602       setOperationAction(ISD::SELECT_CC, VT, Expand);
603 
604       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
605       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
606       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
607 
608       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
609 
610       for (unsigned VPOpc : FloatingPointVPOps)
611         setOperationAction(VPOpc, VT, Custom);
612     };
613 
614     // Sets common extload/truncstore actions on RVV floating-point vector
615     // types.
616     const auto SetCommonVFPExtLoadTruncStoreActions =
617         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
618           for (auto SmallVT : SmallerVTs) {
619             setTruncStoreAction(VT, SmallVT, Expand);
620             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
621           }
622         };
623 
624     if (Subtarget.hasStdExtZfh())
625       for (MVT VT : F16VecVTs)
626         SetCommonVFPActions(VT);
627 
628     for (MVT VT : F32VecVTs) {
629       if (Subtarget.hasStdExtF())
630         SetCommonVFPActions(VT);
631       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
632     }
633 
634     for (MVT VT : F64VecVTs) {
635       if (Subtarget.hasStdExtD())
636         SetCommonVFPActions(VT);
637       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
638       SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
639     }
640 
641     if (Subtarget.useRVVForFixedLengthVectors()) {
642       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
643         if (!useRVVForFixedLengthVectorVT(VT))
644           continue;
645 
646         // By default everything must be expanded.
647         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
648           setOperationAction(Op, VT, Expand);
649         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
650           setTruncStoreAction(VT, OtherVT, Expand);
651           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
652           setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
653           setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
654         }
655 
656         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
657         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
658         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
659 
660         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
661         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
662 
663         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
664         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
665 
666         setOperationAction(ISD::LOAD, VT, Custom);
667         setOperationAction(ISD::STORE, VT, Custom);
668 
669         setOperationAction(ISD::SETCC, VT, Custom);
670 
671         setOperationAction(ISD::TRUNCATE, VT, Custom);
672 
673         setOperationAction(ISD::BITCAST, VT, Custom);
674 
675         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
676         setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
677         setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
678 
679         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
680         setOperationAction(ISD::UINT_TO_FP, VT, Custom);
681         setOperationAction(ISD::FP_TO_SINT, VT, Custom);
682         setOperationAction(ISD::FP_TO_UINT, VT, Custom);
683 
684         // Operations below are different for between masks and other vectors.
685         if (VT.getVectorElementType() == MVT::i1) {
686           setOperationAction(ISD::AND, VT, Custom);
687           setOperationAction(ISD::OR, VT, Custom);
688           setOperationAction(ISD::XOR, VT, Custom);
689           continue;
690         }
691 
692         // Use SPLAT_VECTOR to prevent type legalization from destroying the
693         // splats when type legalizing i64 scalar on RV32.
694         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
695         // improvements first.
696         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
697           setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
698           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
699         }
700 
701         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
702         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
703 
704         setOperationAction(ISD::MLOAD, VT, Custom);
705         setOperationAction(ISD::MSTORE, VT, Custom);
706         setOperationAction(ISD::MGATHER, VT, Custom);
707         setOperationAction(ISD::MSCATTER, VT, Custom);
708         setOperationAction(ISD::ADD, VT, Custom);
709         setOperationAction(ISD::MUL, VT, Custom);
710         setOperationAction(ISD::SUB, VT, Custom);
711         setOperationAction(ISD::AND, VT, Custom);
712         setOperationAction(ISD::OR, VT, Custom);
713         setOperationAction(ISD::XOR, VT, Custom);
714         setOperationAction(ISD::SDIV, VT, Custom);
715         setOperationAction(ISD::SREM, VT, Custom);
716         setOperationAction(ISD::UDIV, VT, Custom);
717         setOperationAction(ISD::UREM, VT, Custom);
718         setOperationAction(ISD::SHL, VT, Custom);
719         setOperationAction(ISD::SRA, VT, Custom);
720         setOperationAction(ISD::SRL, VT, Custom);
721 
722         setOperationAction(ISD::SMIN, VT, Custom);
723         setOperationAction(ISD::SMAX, VT, Custom);
724         setOperationAction(ISD::UMIN, VT, Custom);
725         setOperationAction(ISD::UMAX, VT, Custom);
726         setOperationAction(ISD::ABS,  VT, Custom);
727 
728         setOperationAction(ISD::MULHS, VT, Custom);
729         setOperationAction(ISD::MULHU, VT, Custom);
730 
731         setOperationAction(ISD::VSELECT, VT, Custom);
732         setOperationAction(ISD::SELECT, VT, Expand);
733         setOperationAction(ISD::SELECT_CC, VT, Expand);
734 
735         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
736         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
737         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
738 
739         // Custom-lower reduction operations to set up the corresponding custom
740         // nodes' operands.
741         setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
742         setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
743         setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
744         setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
745         setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
746 
747         for (unsigned VPOpc : IntegerVPOps)
748           setOperationAction(VPOpc, VT, Custom);
749       }
750 
751       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
752         if (!useRVVForFixedLengthVectorVT(VT))
753           continue;
754 
755         // By default everything must be expanded.
756         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
757           setOperationAction(Op, VT, Expand);
758         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
759           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
760           setTruncStoreAction(VT, OtherVT, Expand);
761         }
762 
763         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
764         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
765         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
766 
767         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
768         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
769         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
770         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
771 
772         setOperationAction(ISD::LOAD, VT, Custom);
773         setOperationAction(ISD::STORE, VT, Custom);
774         setOperationAction(ISD::MLOAD, VT, Custom);
775         setOperationAction(ISD::MSTORE, VT, Custom);
776         setOperationAction(ISD::MGATHER, VT, Custom);
777         setOperationAction(ISD::MSCATTER, VT, Custom);
778         setOperationAction(ISD::FADD, VT, Custom);
779         setOperationAction(ISD::FSUB, VT, Custom);
780         setOperationAction(ISD::FMUL, VT, Custom);
781         setOperationAction(ISD::FDIV, VT, Custom);
782         setOperationAction(ISD::FNEG, VT, Custom);
783         setOperationAction(ISD::FABS, VT, Custom);
784         setOperationAction(ISD::FCOPYSIGN, VT, Custom);
785         setOperationAction(ISD::FSQRT, VT, Custom);
786         setOperationAction(ISD::FMA, VT, Custom);
787         setOperationAction(ISD::FMINNUM, VT, Custom);
788         setOperationAction(ISD::FMAXNUM, VT, Custom);
789 
790         setOperationAction(ISD::FP_ROUND, VT, Custom);
791         setOperationAction(ISD::FP_EXTEND, VT, Custom);
792 
793         for (auto CC : VFPCCToExpand)
794           setCondCodeAction(CC, VT, Expand);
795 
796         setOperationAction(ISD::VSELECT, VT, Custom);
797         setOperationAction(ISD::SELECT, VT, Expand);
798         setOperationAction(ISD::SELECT_CC, VT, Expand);
799 
800         setOperationAction(ISD::BITCAST, VT, Custom);
801 
802         setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
803         setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
804         setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
805         setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
806 
807         for (unsigned VPOpc : FloatingPointVPOps)
808           setOperationAction(VPOpc, VT, Custom);
809       }
810 
811       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
812       setOperationAction(ISD::BITCAST, MVT::i8, Custom);
813       setOperationAction(ISD::BITCAST, MVT::i16, Custom);
814       setOperationAction(ISD::BITCAST, MVT::i32, Custom);
815       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
816       setOperationAction(ISD::BITCAST, MVT::f16, Custom);
817       setOperationAction(ISD::BITCAST, MVT::f32, Custom);
818       setOperationAction(ISD::BITCAST, MVT::f64, Custom);
819     }
820   }
821 
822   // Function alignments.
823   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
824   setMinFunctionAlignment(FunctionAlignment);
825   setPrefFunctionAlignment(FunctionAlignment);
826 
827   setMinimumJumpTableEntries(5);
828 
829   // Jumps are expensive, compared to logic
830   setJumpIsExpensive();
831 
832   // We can use any register for comparisons
833   setHasMultipleConditionRegisters();
834 
835   setTargetDAGCombine(ISD::AND);
836   setTargetDAGCombine(ISD::OR);
837   setTargetDAGCombine(ISD::XOR);
838   if (Subtarget.hasStdExtV()) {
839     setTargetDAGCombine(ISD::FCOPYSIGN);
840     setTargetDAGCombine(ISD::MGATHER);
841     setTargetDAGCombine(ISD::MSCATTER);
842     setTargetDAGCombine(ISD::SRA);
843     setTargetDAGCombine(ISD::SRL);
844     setTargetDAGCombine(ISD::SHL);
845   }
846 }
847 
848 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
849                                             LLVMContext &Context,
850                                             EVT VT) const {
851   if (!VT.isVector())
852     return getPointerTy(DL);
853   if (Subtarget.hasStdExtV() &&
854       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
855     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
856   return VT.changeVectorElementTypeToInteger();
857 }
858 
859 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
860   return Subtarget.getXLenVT();
861 }
862 
863 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
864                                              const CallInst &I,
865                                              MachineFunction &MF,
866                                              unsigned Intrinsic) const {
867   switch (Intrinsic) {
868   default:
869     return false;
870   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
871   case Intrinsic::riscv_masked_atomicrmw_add_i32:
872   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
873   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
874   case Intrinsic::riscv_masked_atomicrmw_max_i32:
875   case Intrinsic::riscv_masked_atomicrmw_min_i32:
876   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
877   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
878   case Intrinsic::riscv_masked_cmpxchg_i32:
879     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
880     Info.opc = ISD::INTRINSIC_W_CHAIN;
881     Info.memVT = MVT::getVT(PtrTy->getElementType());
882     Info.ptrVal = I.getArgOperand(0);
883     Info.offset = 0;
884     Info.align = Align(4);
885     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
886                  MachineMemOperand::MOVolatile;
887     return true;
888   }
889 }
890 
891 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
892                                                 const AddrMode &AM, Type *Ty,
893                                                 unsigned AS,
894                                                 Instruction *I) const {
895   // No global is ever allowed as a base.
896   if (AM.BaseGV)
897     return false;
898 
899   // Require a 12-bit signed offset.
900   if (!isInt<12>(AM.BaseOffs))
901     return false;
902 
903   switch (AM.Scale) {
904   case 0: // "r+i" or just "i", depending on HasBaseReg.
905     break;
906   case 1:
907     if (!AM.HasBaseReg) // allow "r+i".
908       break;
909     return false; // disallow "r+r" or "r+r+i".
910   default:
911     return false;
912   }
913 
914   return true;
915 }
916 
917 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
918   return isInt<12>(Imm);
919 }
920 
921 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
922   return isInt<12>(Imm);
923 }
924 
925 // On RV32, 64-bit integers are split into their high and low parts and held
926 // in two different registers, so the trunc is free since the low register can
927 // just be used.
928 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
929   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
930     return false;
931   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
932   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
933   return (SrcBits == 64 && DestBits == 32);
934 }
935 
936 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
937   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
938       !SrcVT.isInteger() || !DstVT.isInteger())
939     return false;
940   unsigned SrcBits = SrcVT.getSizeInBits();
941   unsigned DestBits = DstVT.getSizeInBits();
942   return (SrcBits == 64 && DestBits == 32);
943 }
944 
945 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
946   // Zexts are free if they can be combined with a load.
947   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
948     EVT MemVT = LD->getMemoryVT();
949     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
950          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
951         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
952          LD->getExtensionType() == ISD::ZEXTLOAD))
953       return true;
954   }
955 
956   return TargetLowering::isZExtFree(Val, VT2);
957 }
958 
959 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
960   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
961 }
962 
963 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
964   return Subtarget.hasStdExtZbb();
965 }
966 
967 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
968   return Subtarget.hasStdExtZbb();
969 }
970 
971 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
972                                        bool ForCodeSize) const {
973   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
974     return false;
975   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
976     return false;
977   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
978     return false;
979   if (Imm.isNegZero())
980     return false;
981   return Imm.isZero();
982 }
983 
984 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
985   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
986          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
987          (VT == MVT::f64 && Subtarget.hasStdExtD());
988 }
989 
990 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
991                                                       CallingConv::ID CC,
992                                                       EVT VT) const {
993   // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
994   // end up using a GPR but that will be decided based on ABI.
995   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
996     return MVT::f32;
997 
998   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
999 }
1000 
1001 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1002                                                            CallingConv::ID CC,
1003                                                            EVT VT) const {
1004   // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
1005   // end up using a GPR but that will be decided based on ABI.
1006   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1007     return 1;
1008 
1009   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1010 }
1011 
1012 // Changes the condition code and swaps operands if necessary, so the SetCC
1013 // operation matches one of the comparisons supported directly by branches
1014 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1015 // with 1/-1.
1016 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1017                                     ISD::CondCode &CC, SelectionDAG &DAG) {
1018   // Convert X > -1 to X >= 0.
1019   if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1020     RHS = DAG.getConstant(0, DL, RHS.getValueType());
1021     CC = ISD::SETGE;
1022     return;
1023   }
1024   // Convert X < 1 to 0 >= X.
1025   if (CC == ISD::SETLT && isOneConstant(RHS)) {
1026     RHS = LHS;
1027     LHS = DAG.getConstant(0, DL, RHS.getValueType());
1028     CC = ISD::SETGE;
1029     return;
1030   }
1031 
1032   switch (CC) {
1033   default:
1034     break;
1035   case ISD::SETGT:
1036   case ISD::SETLE:
1037   case ISD::SETUGT:
1038   case ISD::SETULE:
1039     CC = ISD::getSetCCSwappedOperands(CC);
1040     std::swap(LHS, RHS);
1041     break;
1042   }
1043 }
1044 
1045 // Return the RISC-V branch opcode that matches the given DAG integer
1046 // condition code. The CondCode must be one of those supported by the RISC-V
1047 // ISA (see translateSetCCForBranch).
1048 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
1049   switch (CC) {
1050   default:
1051     llvm_unreachable("Unsupported CondCode");
1052   case ISD::SETEQ:
1053     return RISCV::BEQ;
1054   case ISD::SETNE:
1055     return RISCV::BNE;
1056   case ISD::SETLT:
1057     return RISCV::BLT;
1058   case ISD::SETGE:
1059     return RISCV::BGE;
1060   case ISD::SETULT:
1061     return RISCV::BLTU;
1062   case ISD::SETUGE:
1063     return RISCV::BGEU;
1064   }
1065 }
1066 
1067 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
1068   assert(VT.isScalableVector() && "Expecting a scalable vector type");
1069   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1070   if (VT.getVectorElementType() == MVT::i1)
1071     KnownSize *= 8;
1072 
1073   switch (KnownSize) {
1074   default:
1075     llvm_unreachable("Invalid LMUL.");
1076   case 8:
1077     return RISCVII::VLMUL::LMUL_F8;
1078   case 16:
1079     return RISCVII::VLMUL::LMUL_F4;
1080   case 32:
1081     return RISCVII::VLMUL::LMUL_F2;
1082   case 64:
1083     return RISCVII::VLMUL::LMUL_1;
1084   case 128:
1085     return RISCVII::VLMUL::LMUL_2;
1086   case 256:
1087     return RISCVII::VLMUL::LMUL_4;
1088   case 512:
1089     return RISCVII::VLMUL::LMUL_8;
1090   }
1091 }
1092 
1093 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
1094   switch (LMul) {
1095   default:
1096     llvm_unreachable("Invalid LMUL.");
1097   case RISCVII::VLMUL::LMUL_F8:
1098   case RISCVII::VLMUL::LMUL_F4:
1099   case RISCVII::VLMUL::LMUL_F2:
1100   case RISCVII::VLMUL::LMUL_1:
1101     return RISCV::VRRegClassID;
1102   case RISCVII::VLMUL::LMUL_2:
1103     return RISCV::VRM2RegClassID;
1104   case RISCVII::VLMUL::LMUL_4:
1105     return RISCV::VRM4RegClassID;
1106   case RISCVII::VLMUL::LMUL_8:
1107     return RISCV::VRM8RegClassID;
1108   }
1109 }
1110 
1111 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
1112   RISCVII::VLMUL LMUL = getLMUL(VT);
1113   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1114       LMUL == RISCVII::VLMUL::LMUL_F4 ||
1115       LMUL == RISCVII::VLMUL::LMUL_F2 ||
1116       LMUL == RISCVII::VLMUL::LMUL_1) {
1117     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1118                   "Unexpected subreg numbering");
1119     return RISCV::sub_vrm1_0 + Index;
1120   }
1121   if (LMUL == RISCVII::VLMUL::LMUL_2) {
1122     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1123                   "Unexpected subreg numbering");
1124     return RISCV::sub_vrm2_0 + Index;
1125   }
1126   if (LMUL == RISCVII::VLMUL::LMUL_4) {
1127     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1128                   "Unexpected subreg numbering");
1129     return RISCV::sub_vrm4_0 + Index;
1130   }
1131   llvm_unreachable("Invalid vector type.");
1132 }
1133 
1134 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
1135   if (VT.getVectorElementType() == MVT::i1)
1136     return RISCV::VRRegClassID;
1137   return getRegClassIDForLMUL(getLMUL(VT));
1138 }
1139 
1140 // Attempt to decompose a subvector insert/extract between VecVT and
1141 // SubVecVT via subregister indices. Returns the subregister index that
1142 // can perform the subvector insert/extract with the given element index, as
1143 // well as the index corresponding to any leftover subvectors that must be
1144 // further inserted/extracted within the register class for SubVecVT.
1145 std::pair<unsigned, unsigned>
1146 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1147     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1148     const RISCVRegisterInfo *TRI) {
1149   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1150                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1151                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1152                 "Register classes not ordered");
1153   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1154   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1155   // Try to compose a subregister index that takes us from the incoming
1156   // LMUL>1 register class down to the outgoing one. At each step we half
1157   // the LMUL:
1158   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1159   // Note that this is not guaranteed to find a subregister index, such as
1160   // when we are extracting from one VR type to another.
1161   unsigned SubRegIdx = RISCV::NoSubRegister;
1162   for (const unsigned RCID :
1163        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1164     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1165       VecVT = VecVT.getHalfNumVectorElementsVT();
1166       bool IsHi =
1167           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1168       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1169                                             getSubregIndexByMVT(VecVT, IsHi));
1170       if (IsHi)
1171         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1172     }
1173   return {SubRegIdx, InsertExtractIdx};
1174 }
1175 
1176 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1177 // stores for those types.
1178 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1179   return !Subtarget.useRVVForFixedLengthVectors() ||
1180          (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
1181 }
1182 
1183 static bool useRVVForFixedLengthVectorVT(MVT VT,
1184                                          const RISCVSubtarget &Subtarget) {
1185   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1186   if (!Subtarget.useRVVForFixedLengthVectors())
1187     return false;
1188 
1189   // We only support a set of vector types with a consistent maximum fixed size
1190   // across all supported vector element types to avoid legalization issues.
1191   // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1192   // fixed-length vector type we support is 1024 bytes.
1193   if (VT.getFixedSizeInBits() > 1024 * 8)
1194     return false;
1195 
1196   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1197 
1198   // Don't use RVV for vectors we cannot scalarize if required.
1199   switch (VT.getVectorElementType().SimpleTy) {
1200   // i1 is supported but has different rules.
1201   default:
1202     return false;
1203   case MVT::i1:
1204     // Masks can only use a single register.
1205     if (VT.getVectorNumElements() > MinVLen)
1206       return false;
1207     MinVLen /= 8;
1208     break;
1209   case MVT::i8:
1210   case MVT::i16:
1211   case MVT::i32:
1212   case MVT::i64:
1213     break;
1214   case MVT::f16:
1215     if (!Subtarget.hasStdExtZfh())
1216       return false;
1217     break;
1218   case MVT::f32:
1219     if (!Subtarget.hasStdExtF())
1220       return false;
1221     break;
1222   case MVT::f64:
1223     if (!Subtarget.hasStdExtD())
1224       return false;
1225     break;
1226   }
1227 
1228   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1229   // Don't use RVV for types that don't fit.
1230   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1231     return false;
1232 
1233   // TODO: Perhaps an artificial restriction, but worth having whilst getting
1234   // the base fixed length RVV support in place.
1235   if (!VT.isPow2VectorType())
1236     return false;
1237 
1238   return true;
1239 }
1240 
1241 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1242   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1243 }
1244 
1245 // Return the largest legal scalable vector type that matches VT's element type.
1246 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
1247                                             const RISCVSubtarget &Subtarget) {
1248   // This may be called before legal types are setup.
1249   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1250           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1251          "Expected legal fixed length vector!");
1252 
1253   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1254 
1255   MVT EltVT = VT.getVectorElementType();
1256   switch (EltVT.SimpleTy) {
1257   default:
1258     llvm_unreachable("unexpected element type for RVV container");
1259   case MVT::i1:
1260   case MVT::i8:
1261   case MVT::i16:
1262   case MVT::i32:
1263   case MVT::i64:
1264   case MVT::f16:
1265   case MVT::f32:
1266   case MVT::f64: {
1267     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1268     // narrower types, but we can't have a fractional LMUL with demoninator less
1269     // than 64/SEW.
1270     unsigned NumElts =
1271         divideCeil(VT.getVectorNumElements(), MinVLen / RISCV::RVVBitsPerBlock);
1272     return MVT::getScalableVectorVT(EltVT, NumElts);
1273   }
1274   }
1275 }
1276 
1277 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
1278                                             const RISCVSubtarget &Subtarget) {
1279   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1280                                           Subtarget);
1281 }
1282 
1283 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1284   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
1285 }
1286 
1287 // Grow V to consume an entire RVV register.
1288 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1289                                        const RISCVSubtarget &Subtarget) {
1290   assert(VT.isScalableVector() &&
1291          "Expected to convert into a scalable vector!");
1292   assert(V.getValueType().isFixedLengthVector() &&
1293          "Expected a fixed length vector operand!");
1294   SDLoc DL(V);
1295   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1296   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1297 }
1298 
1299 // Shrink V so it's just big enough to maintain a VT's worth of data.
1300 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1301                                          const RISCVSubtarget &Subtarget) {
1302   assert(VT.isFixedLengthVector() &&
1303          "Expected to convert into a fixed length vector!");
1304   assert(V.getValueType().isScalableVector() &&
1305          "Expected a scalable vector operand!");
1306   SDLoc DL(V);
1307   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1308   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1309 }
1310 
1311 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1312 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1313 // the vector type that it is contained in.
1314 static std::pair<SDValue, SDValue>
1315 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1316                 const RISCVSubtarget &Subtarget) {
1317   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1318   MVT XLenVT = Subtarget.getXLenVT();
1319   SDValue VL = VecVT.isFixedLengthVector()
1320                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1321                    : DAG.getRegister(RISCV::X0, XLenVT);
1322   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1323   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1324   return {Mask, VL};
1325 }
1326 
1327 // As above but assuming the given type is a scalable vector type.
1328 static std::pair<SDValue, SDValue>
1329 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1330                         const RISCVSubtarget &Subtarget) {
1331   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1332   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1333 }
1334 
1335 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1336 // of either is (currently) supported. This can get us into an infinite loop
1337 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1338 // as a ..., etc.
1339 // Until either (or both) of these can reliably lower any node, reporting that
1340 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1341 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1342 // which is not desirable.
1343 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1344     EVT VT, unsigned DefinedValues) const {
1345   return false;
1346 }
1347 
1348 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1349   // Only splats are currently supported.
1350   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1351     return true;
1352 
1353   return false;
1354 }
1355 
1356 static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
1357                                  const RISCVSubtarget &Subtarget) {
1358   MVT VT = Op.getSimpleValueType();
1359   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1360 
1361   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1362 
1363   SDLoc DL(Op);
1364   SDValue Mask, VL;
1365   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1366 
1367   unsigned Opc =
1368       VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
1369   SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
1370   return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1371 }
1372 
1373 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
1374                                  const RISCVSubtarget &Subtarget) {
1375   MVT VT = Op.getSimpleValueType();
1376   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1377 
1378   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1379 
1380   SDLoc DL(Op);
1381   SDValue Mask, VL;
1382   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1383 
1384   MVT XLenVT = Subtarget.getXLenVT();
1385   unsigned NumElts = Op.getNumOperands();
1386 
1387   if (VT.getVectorElementType() == MVT::i1) {
1388     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1389       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1390       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1391     }
1392 
1393     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1394       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1395       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1396     }
1397 
1398     // Lower constant mask BUILD_VECTORs via an integer vector type, in
1399     // scalar integer chunks whose bit-width depends on the number of mask
1400     // bits and XLEN.
1401     // First, determine the most appropriate scalar integer type to use. This
1402     // is at most XLenVT, but may be shrunk to a smaller vector element type
1403     // according to the size of the final vector - use i8 chunks rather than
1404     // XLenVT if we're producing a v8i1. This results in more consistent
1405     // codegen across RV32 and RV64.
1406     unsigned NumViaIntegerBits =
1407         std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1408     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1409       // If we have to use more than one INSERT_VECTOR_ELT then this
1410       // optimization is likely to increase code size; avoid peforming it in
1411       // such a case. We can use a load from a constant pool in this case.
1412       if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1413         return SDValue();
1414       // Now we can create our integer vector type. Note that it may be larger
1415       // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1416       MVT IntegerViaVecVT =
1417           MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1418                            divideCeil(NumElts, NumViaIntegerBits));
1419 
1420       uint64_t Bits = 0;
1421       unsigned BitPos = 0, IntegerEltIdx = 0;
1422       SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1423 
1424       for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1425         // Once we accumulate enough bits to fill our scalar type, insert into
1426         // our vector and clear our accumulated data.
1427         if (I != 0 && I % NumViaIntegerBits == 0) {
1428           if (NumViaIntegerBits <= 32)
1429             Bits = SignExtend64(Bits, 32);
1430           SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1431           Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1432                             Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1433           Bits = 0;
1434           BitPos = 0;
1435           IntegerEltIdx++;
1436         }
1437         SDValue V = Op.getOperand(I);
1438         bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
1439         Bits |= ((uint64_t)BitValue << BitPos);
1440       }
1441 
1442       // Insert the (remaining) scalar value into position in our integer
1443       // vector type.
1444       if (NumViaIntegerBits <= 32)
1445         Bits = SignExtend64(Bits, 32);
1446       SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1447       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
1448                         DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1449 
1450       if (NumElts < NumViaIntegerBits) {
1451         // If we're producing a smaller vector than our minimum legal integer
1452         // type, bitcast to the equivalent (known-legal) mask type, and extract
1453         // our final mask.
1454         assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
1455         Vec = DAG.getBitcast(MVT::v8i1, Vec);
1456         Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
1457                           DAG.getConstant(0, DL, XLenVT));
1458       } else {
1459         // Else we must have produced an integer type with the same size as the
1460         // mask type; bitcast for the final result.
1461         assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
1462         Vec = DAG.getBitcast(VT, Vec);
1463       }
1464 
1465       return Vec;
1466     }
1467 
1468     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
1469     // vector type, we have a legal equivalently-sized i8 type, so we can use
1470     // that.
1471     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
1472     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
1473 
1474     SDValue WideVec;
1475     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1476       // For a splat, perform a scalar truncate before creating the wider
1477       // vector.
1478       assert(Splat.getValueType() == XLenVT &&
1479              "Unexpected type for i1 splat value");
1480       Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
1481                           DAG.getConstant(1, DL, XLenVT));
1482       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
1483     } else {
1484       SmallVector<SDValue, 8> Ops(Op->op_values());
1485       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
1486       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
1487       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
1488     }
1489 
1490     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
1491   }
1492 
1493   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1494     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1495                                         : RISCVISD::VMV_V_X_VL;
1496     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1497     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1498   }
1499 
1500   // Try and match an index sequence, which we can lower directly to the vid
1501   // instruction. An all-undef vector is matched by getSplatValue, above.
1502   if (VT.isInteger()) {
1503     bool IsVID = true;
1504     for (unsigned I = 0; I < NumElts && IsVID; I++)
1505       IsVID &= Op.getOperand(I).isUndef() ||
1506                (isa<ConstantSDNode>(Op.getOperand(I)) &&
1507                 Op.getConstantOperandVal(I) == I);
1508 
1509     if (IsVID) {
1510       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1511       return convertFromScalableVector(VT, VID, DAG, Subtarget);
1512     }
1513   }
1514 
1515   // Attempt to detect "hidden" splats, which only reveal themselves as splats
1516   // when re-interpreted as a vector with a larger element type. For example,
1517   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
1518   // could be instead splat as
1519   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
1520   // TODO: This optimization could also work on non-constant splats, but it
1521   // would require bit-manipulation instructions to construct the splat value.
1522   SmallVector<SDValue> Sequence;
1523   unsigned EltBitSize = VT.getScalarSizeInBits();
1524   const auto *BV = cast<BuildVectorSDNode>(Op);
1525   if (VT.isInteger() && EltBitSize < 64 &&
1526       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
1527       BV->getRepeatedSequence(Sequence) &&
1528       (Sequence.size() * EltBitSize) <= 64) {
1529     unsigned SeqLen = Sequence.size();
1530     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
1531     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
1532     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
1533             ViaIntVT == MVT::i64) &&
1534            "Unexpected sequence type");
1535 
1536     unsigned EltIdx = 0;
1537     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
1538     uint64_t SplatValue = 0;
1539     // Construct the amalgamated value which can be splatted as this larger
1540     // vector type.
1541     for (const auto &SeqV : Sequence) {
1542       if (!SeqV.isUndef())
1543         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
1544                        << (EltIdx * EltBitSize));
1545       EltIdx++;
1546     }
1547 
1548     // On RV64, sign-extend from 32 to 64 bits where possible in order to
1549     // achieve better constant materializion.
1550     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
1551       SplatValue = SignExtend64(SplatValue, 32);
1552 
1553     // Since we can't introduce illegal i64 types at this stage, we can only
1554     // perform an i64 splat on RV32 if it is its own sign-extended value. That
1555     // way we can use RVV instructions to splat.
1556     assert((ViaIntVT.bitsLE(XLenVT) ||
1557             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
1558            "Unexpected bitcast sequence");
1559     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
1560       SDValue ViaVL =
1561           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
1562       MVT ViaContainerVT =
1563           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
1564       SDValue Splat =
1565           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
1566                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
1567       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
1568       return DAG.getBitcast(VT, Splat);
1569     }
1570   }
1571 
1572   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
1573   // which constitute a large proportion of the elements. In such cases we can
1574   // splat a vector with the dominant element and make up the shortfall with
1575   // INSERT_VECTOR_ELTs.
1576   // Note that this includes vectors of 2 elements by association. The
1577   // upper-most element is the "dominant" one, allowing us to use a splat to
1578   // "insert" the upper element, and an insert of the lower element at position
1579   // 0, which improves codegen.
1580   SDValue DominantValue;
1581   unsigned MostCommonCount = 0;
1582   DenseMap<SDValue, unsigned> ValueCounts;
1583   unsigned NumUndefElts =
1584       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
1585 
1586   for (SDValue V : Op->op_values()) {
1587     if (V.isUndef())
1588       continue;
1589 
1590     ValueCounts.insert(std::make_pair(V, 0));
1591     unsigned &Count = ValueCounts[V];
1592 
1593     // Is this value dominant? In case of a tie, prefer the highest element as
1594     // it's cheaper to insert near the beginning of a vector than it is at the
1595     // end.
1596     if (++Count >= MostCommonCount) {
1597       DominantValue = V;
1598       MostCommonCount = Count;
1599     }
1600   }
1601 
1602   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
1603   unsigned NumDefElts = NumElts - NumUndefElts;
1604   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
1605 
1606   // Don't perform this optimization when optimizing for size, since
1607   // materializing elements and inserting them tends to cause code bloat.
1608   if (!DAG.shouldOptForSize() &&
1609       ((MostCommonCount > DominantValueCountThreshold) ||
1610        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
1611     // Start by splatting the most common element.
1612     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
1613 
1614     DenseSet<SDValue> Processed{DominantValue};
1615     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
1616     for (const auto &OpIdx : enumerate(Op->ops())) {
1617       const SDValue &V = OpIdx.value();
1618       if (V.isUndef() || !Processed.insert(V).second)
1619         continue;
1620       if (ValueCounts[V] == 1) {
1621         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
1622                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
1623       } else {
1624         // Blend in all instances of this value using a VSELECT, using a
1625         // mask where each bit signals whether that element is the one
1626         // we're after.
1627         SmallVector<SDValue> Ops;
1628         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
1629           return DAG.getConstant(V == V1, DL, XLenVT);
1630         });
1631         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
1632                           DAG.getBuildVector(SelMaskTy, DL, Ops),
1633                           DAG.getSplatBuildVector(VT, DL, V), Vec);
1634       }
1635     }
1636 
1637     return Vec;
1638   }
1639 
1640   return SDValue();
1641 }
1642 
1643 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
1644                                    SDValue Hi, SDValue VL, SelectionDAG &DAG) {
1645   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
1646     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
1647     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
1648     // If Hi constant is all the same sign bit as Lo, lower this as a custom
1649     // node in order to try and match RVV vector/scalar instructions.
1650     if ((LoC >> 31) == HiC)
1651       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
1652   }
1653 
1654   // Fall back to a stack store and stride x0 vector load.
1655   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
1656 }
1657 
1658 // Called by type legalization to handle splat of i64 on RV32.
1659 // FIXME: We can optimize this when the type has sign or zero bits in one
1660 // of the halves.
1661 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
1662                                    SDValue VL, SelectionDAG &DAG) {
1663   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
1664   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
1665                            DAG.getConstant(0, DL, MVT::i32));
1666   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
1667                            DAG.getConstant(1, DL, MVT::i32));
1668   return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
1669 }
1670 
1671 // This function lowers a splat of a scalar operand Splat with the vector
1672 // length VL. It ensures the final sequence is type legal, which is useful when
1673 // lowering a splat after type legalization.
1674 static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
1675                                 SelectionDAG &DAG,
1676                                 const RISCVSubtarget &Subtarget) {
1677   if (VT.isFloatingPoint())
1678     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
1679 
1680   MVT XLenVT = Subtarget.getXLenVT();
1681 
1682   // Simplest case is that the operand needs to be promoted to XLenVT.
1683   if (Scalar.getValueType().bitsLE(XLenVT)) {
1684     // If the operand is a constant, sign extend to increase our chances
1685     // of being able to use a .vi instruction. ANY_EXTEND would become a
1686     // a zero extend and the simm5 check in isel would fail.
1687     // FIXME: Should we ignore the upper bits in isel instead?
1688     unsigned ExtOpc =
1689         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
1690     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
1691     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
1692   }
1693 
1694   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
1695          "Unexpected scalar for splat lowering!");
1696 
1697   // Otherwise use the more complicated splatting algorithm.
1698   return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
1699 }
1700 
1701 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
1702                                    const RISCVSubtarget &Subtarget) {
1703   SDValue V1 = Op.getOperand(0);
1704   SDValue V2 = Op.getOperand(1);
1705   SDLoc DL(Op);
1706   MVT XLenVT = Subtarget.getXLenVT();
1707   MVT VT = Op.getSimpleValueType();
1708   unsigned NumElts = VT.getVectorNumElements();
1709   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
1710 
1711   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1712 
1713   SDValue TrueMask, VL;
1714   std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1715 
1716   if (SVN->isSplat()) {
1717     const int Lane = SVN->getSplatIndex();
1718     if (Lane >= 0) {
1719       MVT SVT = VT.getVectorElementType();
1720 
1721       // Turn splatted vector load into a strided load with an X0 stride.
1722       SDValue V = V1;
1723       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
1724       // with undef.
1725       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
1726       int Offset = Lane;
1727       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
1728         int OpElements =
1729             V.getOperand(0).getSimpleValueType().getVectorNumElements();
1730         V = V.getOperand(Offset / OpElements);
1731         Offset %= OpElements;
1732       }
1733 
1734       // We need to ensure the load isn't atomic or volatile.
1735       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
1736         auto *Ld = cast<LoadSDNode>(V);
1737         Offset *= SVT.getStoreSize();
1738         SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
1739                                                    TypeSize::Fixed(Offset), DL);
1740 
1741         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
1742         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
1743           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
1744           SDValue IntID =
1745               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
1746           SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
1747                            DAG.getRegister(RISCV::X0, XLenVT), VL};
1748           SDValue NewLoad = DAG.getMemIntrinsicNode(
1749               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
1750               DAG.getMachineFunction().getMachineMemOperand(
1751                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
1752           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
1753           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
1754         }
1755 
1756         // Otherwise use a scalar load and splat. This will give the best
1757         // opportunity to fold a splat into the operation. ISel can turn it into
1758         // the x0 strided load if we aren't able to fold away the select.
1759         if (SVT.isFloatingPoint())
1760           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
1761                           Ld->getPointerInfo().getWithOffset(Offset),
1762                           Ld->getOriginalAlign(),
1763                           Ld->getMemOperand()->getFlags());
1764         else
1765           V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
1766                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
1767                              Ld->getOriginalAlign(),
1768                              Ld->getMemOperand()->getFlags());
1769         DAG.makeEquivalentMemoryOrdering(Ld, V);
1770 
1771         unsigned Opc =
1772             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
1773         SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
1774         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1775       }
1776 
1777       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
1778       assert(Lane < (int)NumElts && "Unexpected lane!");
1779       SDValue Gather =
1780           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
1781                       DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
1782       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1783     }
1784   }
1785 
1786   // Detect shuffles which can be re-expressed as vector selects; these are
1787   // shuffles in which each element in the destination is taken from an element
1788   // at the corresponding index in either source vectors.
1789   bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
1790     int MaskIndex = MaskIdx.value();
1791     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
1792   });
1793 
1794   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
1795 
1796   SmallVector<SDValue> MaskVals;
1797   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
1798   // merged with a second vrgather.
1799   SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
1800 
1801   // By default we preserve the original operand order, and use a mask to
1802   // select LHS as true and RHS as false. However, since RVV vector selects may
1803   // feature splats but only on the LHS, we may choose to invert our mask and
1804   // instead select between RHS and LHS.
1805   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
1806   bool InvertMask = IsSelect == SwapOps;
1807 
1808   // Now construct the mask that will be used by the vselect or blended
1809   // vrgather operation. For vrgathers, construct the appropriate indices into
1810   // each vector.
1811   for (int MaskIndex : SVN->getMask()) {
1812     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
1813     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
1814     if (!IsSelect) {
1815       bool IsLHS = MaskIndex < (int)NumElts;
1816       // For "undef" elements of -1, shuffle in element 0 instead.
1817       GatherIndicesLHS.push_back(
1818           DAG.getConstant(IsLHS ? std::max(MaskIndex, 0) : 0, DL, XLenVT));
1819       // TODO: If we're masking out unused elements anyway, it might produce
1820       // better code if we use the most-common element index instead of 0.
1821       GatherIndicesRHS.push_back(
1822           DAG.getConstant(IsLHS ? 0 : MaskIndex - NumElts, DL, XLenVT));
1823     }
1824   }
1825 
1826   if (SwapOps) {
1827     std::swap(V1, V2);
1828     std::swap(GatherIndicesLHS, GatherIndicesRHS);
1829   }
1830 
1831   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
1832   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
1833   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
1834 
1835   if (IsSelect)
1836     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
1837 
1838   if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
1839     // On such a large vector we're unable to use i8 as the index type.
1840     // FIXME: We could promote the index to i16 and use vrgatherei16, but that
1841     // may involve vector splitting if we're already at LMUL=8, or our
1842     // user-supplied maximum fixed-length LMUL.
1843     return SDValue();
1844   }
1845 
1846   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
1847   MVT IndexVT = VT.changeTypeToInteger();
1848   // Since we can't introduce illegal index types at this stage, use i16 and
1849   // vrgatherei16 if the corresponding index type for plain vrgather is greater
1850   // than XLenVT.
1851   if (IndexVT.getScalarType().bitsGT(XLenVT)) {
1852     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
1853     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
1854   }
1855 
1856   MVT IndexContainerVT =
1857       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
1858 
1859   SDValue Gather;
1860   // TODO: This doesn't trigger for i64 vectors on RV32, since there we
1861   // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
1862   if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
1863     Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
1864   } else {
1865     SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
1866     LHSIndices =
1867         convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
1868 
1869     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
1870     Gather =
1871         DAG.getNode(GatherOpc, DL, ContainerVT, V1, LHSIndices, TrueMask, VL);
1872   }
1873 
1874   // If a second vector operand is used by this shuffle, blend it in with an
1875   // additional vrgather.
1876   if (!V2.isUndef()) {
1877     MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
1878     SelectMask =
1879         convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
1880 
1881     SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
1882     RHSIndices =
1883         convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
1884 
1885     V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
1886     V2 = DAG.getNode(GatherOpc, DL, ContainerVT, V2, RHSIndices, TrueMask, VL);
1887     Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
1888                          Gather, VL);
1889   }
1890 
1891   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1892 }
1893 
1894 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
1895                                      SDLoc DL, SelectionDAG &DAG,
1896                                      const RISCVSubtarget &Subtarget) {
1897   if (VT.isScalableVector())
1898     return DAG.getFPExtendOrRound(Op, DL, VT);
1899   assert(VT.isFixedLengthVector() &&
1900          "Unexpected value type for RVV FP extend/round lowering");
1901   SDValue Mask, VL;
1902   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1903   unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
1904                         ? RISCVISD::FP_EXTEND_VL
1905                         : RISCVISD::FP_ROUND_VL;
1906   return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
1907 }
1908 
1909 // While RVV has alignment restrictions, we should always be able to load as a
1910 // legal equivalently-sized byte-typed vector instead. This method is
1911 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
1912 // the load is already correctly-aligned, it returns SDValue().
1913 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
1914                                                     SelectionDAG &DAG) const {
1915   auto *Load = cast<LoadSDNode>(Op);
1916   assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
1917 
1918   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
1919                                      Load->getMemoryVT(),
1920                                      *Load->getMemOperand()))
1921     return SDValue();
1922 
1923   SDLoc DL(Op);
1924   MVT VT = Op.getSimpleValueType();
1925   unsigned EltSizeBits = VT.getScalarSizeInBits();
1926   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
1927          "Unexpected unaligned RVV load type");
1928   MVT NewVT =
1929       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
1930   assert(NewVT.isValid() &&
1931          "Expecting equally-sized RVV vector types to be legal");
1932   SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
1933                           Load->getPointerInfo(), Load->getOriginalAlign(),
1934                           Load->getMemOperand()->getFlags());
1935   return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
1936 }
1937 
1938 // While RVV has alignment restrictions, we should always be able to store as a
1939 // legal equivalently-sized byte-typed vector instead. This method is
1940 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
1941 // returns SDValue() if the store is already correctly aligned.
1942 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
1943                                                      SelectionDAG &DAG) const {
1944   auto *Store = cast<StoreSDNode>(Op);
1945   assert(Store && Store->getValue().getValueType().isVector() &&
1946          "Expected vector store");
1947 
1948   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
1949                                      Store->getMemoryVT(),
1950                                      *Store->getMemOperand()))
1951     return SDValue();
1952 
1953   SDLoc DL(Op);
1954   SDValue StoredVal = Store->getValue();
1955   MVT VT = StoredVal.getSimpleValueType();
1956   unsigned EltSizeBits = VT.getScalarSizeInBits();
1957   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
1958          "Unexpected unaligned RVV store type");
1959   MVT NewVT =
1960       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
1961   assert(NewVT.isValid() &&
1962          "Expecting equally-sized RVV vector types to be legal");
1963   StoredVal = DAG.getBitcast(NewVT, StoredVal);
1964   return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
1965                       Store->getPointerInfo(), Store->getOriginalAlign(),
1966                       Store->getMemOperand()->getFlags());
1967 }
1968 
1969 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
1970                                             SelectionDAG &DAG) const {
1971   switch (Op.getOpcode()) {
1972   default:
1973     report_fatal_error("unimplemented operand");
1974   case ISD::GlobalAddress:
1975     return lowerGlobalAddress(Op, DAG);
1976   case ISD::BlockAddress:
1977     return lowerBlockAddress(Op, DAG);
1978   case ISD::ConstantPool:
1979     return lowerConstantPool(Op, DAG);
1980   case ISD::JumpTable:
1981     return lowerJumpTable(Op, DAG);
1982   case ISD::GlobalTLSAddress:
1983     return lowerGlobalTLSAddress(Op, DAG);
1984   case ISD::SELECT:
1985     return lowerSELECT(Op, DAG);
1986   case ISD::BRCOND:
1987     return lowerBRCOND(Op, DAG);
1988   case ISD::VASTART:
1989     return lowerVASTART(Op, DAG);
1990   case ISD::FRAMEADDR:
1991     return lowerFRAMEADDR(Op, DAG);
1992   case ISD::RETURNADDR:
1993     return lowerRETURNADDR(Op, DAG);
1994   case ISD::SHL_PARTS:
1995     return lowerShiftLeftParts(Op, DAG);
1996   case ISD::SRA_PARTS:
1997     return lowerShiftRightParts(Op, DAG, true);
1998   case ISD::SRL_PARTS:
1999     return lowerShiftRightParts(Op, DAG, false);
2000   case ISD::BITCAST: {
2001     SDLoc DL(Op);
2002     EVT VT = Op.getValueType();
2003     SDValue Op0 = Op.getOperand(0);
2004     EVT Op0VT = Op0.getValueType();
2005     MVT XLenVT = Subtarget.getXLenVT();
2006     if (VT.isFixedLengthVector()) {
2007       // We can handle fixed length vector bitcasts with a simple replacement
2008       // in isel.
2009       if (Op0VT.isFixedLengthVector())
2010         return Op;
2011       // When bitcasting from scalar to fixed-length vector, insert the scalar
2012       // into a one-element vector of the result type, and perform a vector
2013       // bitcast.
2014       if (!Op0VT.isVector()) {
2015         auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2016         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2017                                               DAG.getUNDEF(BVT), Op0,
2018                                               DAG.getConstant(0, DL, XLenVT)));
2019       }
2020       return SDValue();
2021     }
2022     // Custom-legalize bitcasts from fixed-length vector types to scalar types
2023     // thus: bitcast the vector to a one-element vector type whose element type
2024     // is the same as the result type, and extract the first element.
2025     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2026       LLVMContext &Context = *DAG.getContext();
2027       SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
2028       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
2029                          DAG.getConstant(0, DL, XLenVT));
2030     }
2031     if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
2032       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
2033       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
2034       return FPConv;
2035     }
2036     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
2037         Subtarget.hasStdExtF()) {
2038       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2039       SDValue FPConv =
2040           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
2041       return FPConv;
2042     }
2043     return SDValue();
2044   }
2045   case ISD::INTRINSIC_WO_CHAIN:
2046     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2047   case ISD::INTRINSIC_W_CHAIN:
2048     return LowerINTRINSIC_W_CHAIN(Op, DAG);
2049   case ISD::BSWAP:
2050   case ISD::BITREVERSE: {
2051     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
2052     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2053     MVT VT = Op.getSimpleValueType();
2054     SDLoc DL(Op);
2055     // Start with the maximum immediate value which is the bitwidth - 1.
2056     unsigned Imm = VT.getSizeInBits() - 1;
2057     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
2058     if (Op.getOpcode() == ISD::BSWAP)
2059       Imm &= ~0x7U;
2060     return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
2061                        DAG.getConstant(Imm, DL, VT));
2062   }
2063   case ISD::FSHL:
2064   case ISD::FSHR: {
2065     MVT VT = Op.getSimpleValueType();
2066     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
2067     SDLoc DL(Op);
2068     if (Op.getOperand(2).getOpcode() == ISD::Constant)
2069       return Op;
2070     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
2071     // use log(XLen) bits. Mask the shift amount accordingly.
2072     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
2073     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
2074                                 DAG.getConstant(ShAmtWidth, DL, VT));
2075     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
2076     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
2077   }
2078   case ISD::TRUNCATE: {
2079     SDLoc DL(Op);
2080     MVT VT = Op.getSimpleValueType();
2081     // Only custom-lower vector truncates
2082     if (!VT.isVector())
2083       return Op;
2084 
2085     // Truncates to mask types are handled differently
2086     if (VT.getVectorElementType() == MVT::i1)
2087       return lowerVectorMaskTrunc(Op, DAG);
2088 
2089     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
2090     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
2091     // truncate by one power of two at a time.
2092     MVT DstEltVT = VT.getVectorElementType();
2093 
2094     SDValue Src = Op.getOperand(0);
2095     MVT SrcVT = Src.getSimpleValueType();
2096     MVT SrcEltVT = SrcVT.getVectorElementType();
2097 
2098     assert(DstEltVT.bitsLT(SrcEltVT) &&
2099            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
2100            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
2101            "Unexpected vector truncate lowering");
2102 
2103     MVT ContainerVT = SrcVT;
2104     if (SrcVT.isFixedLengthVector()) {
2105       ContainerVT = getContainerForFixedLengthVector(SrcVT);
2106       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2107     }
2108 
2109     SDValue Result = Src;
2110     SDValue Mask, VL;
2111     std::tie(Mask, VL) =
2112         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
2113     LLVMContext &Context = *DAG.getContext();
2114     const ElementCount Count = ContainerVT.getVectorElementCount();
2115     do {
2116       SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2117       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
2118       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
2119                            Mask, VL);
2120     } while (SrcEltVT != DstEltVT);
2121 
2122     if (SrcVT.isFixedLengthVector())
2123       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
2124 
2125     return Result;
2126   }
2127   case ISD::ANY_EXTEND:
2128   case ISD::ZERO_EXTEND:
2129     if (Op.getOperand(0).getValueType().isVector() &&
2130         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2131       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
2132     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
2133   case ISD::SIGN_EXTEND:
2134     if (Op.getOperand(0).getValueType().isVector() &&
2135         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2136       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
2137     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
2138   case ISD::SPLAT_VECTOR_PARTS:
2139     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
2140   case ISD::INSERT_VECTOR_ELT:
2141     return lowerINSERT_VECTOR_ELT(Op, DAG);
2142   case ISD::EXTRACT_VECTOR_ELT:
2143     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
2144   case ISD::VSCALE: {
2145     MVT VT = Op.getSimpleValueType();
2146     SDLoc DL(Op);
2147     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
2148     // We define our scalable vector types for lmul=1 to use a 64 bit known
2149     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
2150     // vscale as VLENB / 8.
2151     assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!");
2152     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
2153                                  DAG.getConstant(3, DL, VT));
2154     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
2155   }
2156   case ISD::FP_EXTEND: {
2157     // RVV can only do fp_extend to types double the size as the source. We
2158     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
2159     // via f32.
2160     SDLoc DL(Op);
2161     MVT VT = Op.getSimpleValueType();
2162     SDValue Src = Op.getOperand(0);
2163     MVT SrcVT = Src.getSimpleValueType();
2164 
2165     // Prepare any fixed-length vector operands.
2166     MVT ContainerVT = VT;
2167     if (SrcVT.isFixedLengthVector()) {
2168       ContainerVT = getContainerForFixedLengthVector(VT);
2169       MVT SrcContainerVT =
2170           ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
2171       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2172     }
2173 
2174     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
2175         SrcVT.getVectorElementType() != MVT::f16) {
2176       // For scalable vectors, we only need to close the gap between
2177       // vXf16->vXf64.
2178       if (!VT.isFixedLengthVector())
2179         return Op;
2180       // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
2181       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2182       return convertFromScalableVector(VT, Src, DAG, Subtarget);
2183     }
2184 
2185     MVT InterVT = VT.changeVectorElementType(MVT::f32);
2186     MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
2187     SDValue IntermediateExtend = getRVVFPExtendOrRound(
2188         Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
2189 
2190     SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
2191                                            DL, DAG, Subtarget);
2192     if (VT.isFixedLengthVector())
2193       return convertFromScalableVector(VT, Extend, DAG, Subtarget);
2194     return Extend;
2195   }
2196   case ISD::FP_ROUND: {
2197     // RVV can only do fp_round to types half the size as the source. We
2198     // custom-lower f64->f16 rounds via RVV's round-to-odd float
2199     // conversion instruction.
2200     SDLoc DL(Op);
2201     MVT VT = Op.getSimpleValueType();
2202     SDValue Src = Op.getOperand(0);
2203     MVT SrcVT = Src.getSimpleValueType();
2204 
2205     // Prepare any fixed-length vector operands.
2206     MVT ContainerVT = VT;
2207     if (VT.isFixedLengthVector()) {
2208       MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2209       ContainerVT =
2210           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2211       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2212     }
2213 
2214     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
2215         SrcVT.getVectorElementType() != MVT::f64) {
2216       // For scalable vectors, we only need to close the gap between
2217       // vXf64<->vXf16.
2218       if (!VT.isFixedLengthVector())
2219         return Op;
2220       // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
2221       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2222       return convertFromScalableVector(VT, Src, DAG, Subtarget);
2223     }
2224 
2225     SDValue Mask, VL;
2226     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2227 
2228     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
2229     SDValue IntermediateRound =
2230         DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
2231     SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
2232                                           DL, DAG, Subtarget);
2233 
2234     if (VT.isFixedLengthVector())
2235       return convertFromScalableVector(VT, Round, DAG, Subtarget);
2236     return Round;
2237   }
2238   case ISD::FP_TO_SINT:
2239   case ISD::FP_TO_UINT:
2240   case ISD::SINT_TO_FP:
2241   case ISD::UINT_TO_FP: {
2242     // RVV can only do fp<->int conversions to types half/double the size as
2243     // the source. We custom-lower any conversions that do two hops into
2244     // sequences.
2245     MVT VT = Op.getSimpleValueType();
2246     if (!VT.isVector())
2247       return Op;
2248     SDLoc DL(Op);
2249     SDValue Src = Op.getOperand(0);
2250     MVT EltVT = VT.getVectorElementType();
2251     MVT SrcVT = Src.getSimpleValueType();
2252     MVT SrcEltVT = SrcVT.getVectorElementType();
2253     unsigned EltSize = EltVT.getSizeInBits();
2254     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2255     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
2256            "Unexpected vector element types");
2257 
2258     bool IsInt2FP = SrcEltVT.isInteger();
2259     // Widening conversions
2260     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
2261       if (IsInt2FP) {
2262         // Do a regular integer sign/zero extension then convert to float.
2263         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
2264                                       VT.getVectorElementCount());
2265         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
2266                                  ? ISD::ZERO_EXTEND
2267                                  : ISD::SIGN_EXTEND;
2268         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
2269         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
2270       }
2271       // FP2Int
2272       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
2273       // Do one doubling fp_extend then complete the operation by converting
2274       // to int.
2275       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2276       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
2277       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
2278     }
2279 
2280     // Narrowing conversions
2281     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
2282       if (IsInt2FP) {
2283         // One narrowing int_to_fp, then an fp_round.
2284         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
2285         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2286         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
2287         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
2288       }
2289       // FP2Int
2290       // One narrowing fp_to_int, then truncate the integer. If the float isn't
2291       // representable by the integer, the result is poison.
2292       MVT IVecVT =
2293           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
2294                            VT.getVectorElementCount());
2295       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
2296       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
2297     }
2298 
2299     // Scalable vectors can exit here. Patterns will handle equally-sized
2300     // conversions halving/doubling ones.
2301     if (!VT.isFixedLengthVector())
2302       return Op;
2303 
2304     // For fixed-length vectors we lower to a custom "VL" node.
2305     unsigned RVVOpc = 0;
2306     switch (Op.getOpcode()) {
2307     default:
2308       llvm_unreachable("Impossible opcode");
2309     case ISD::FP_TO_SINT:
2310       RVVOpc = RISCVISD::FP_TO_SINT_VL;
2311       break;
2312     case ISD::FP_TO_UINT:
2313       RVVOpc = RISCVISD::FP_TO_UINT_VL;
2314       break;
2315     case ISD::SINT_TO_FP:
2316       RVVOpc = RISCVISD::SINT_TO_FP_VL;
2317       break;
2318     case ISD::UINT_TO_FP:
2319       RVVOpc = RISCVISD::UINT_TO_FP_VL;
2320       break;
2321     }
2322 
2323     MVT ContainerVT, SrcContainerVT;
2324     // Derive the reference container type from the larger vector type.
2325     if (SrcEltSize > EltSize) {
2326       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2327       ContainerVT =
2328           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2329     } else {
2330       ContainerVT = getContainerForFixedLengthVector(VT);
2331       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
2332     }
2333 
2334     SDValue Mask, VL;
2335     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2336 
2337     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2338     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
2339     return convertFromScalableVector(VT, Src, DAG, Subtarget);
2340   }
2341   case ISD::VECREDUCE_ADD:
2342   case ISD::VECREDUCE_UMAX:
2343   case ISD::VECREDUCE_SMAX:
2344   case ISD::VECREDUCE_UMIN:
2345   case ISD::VECREDUCE_SMIN:
2346     return lowerVECREDUCE(Op, DAG);
2347   case ISD::VECREDUCE_AND:
2348   case ISD::VECREDUCE_OR:
2349   case ISD::VECREDUCE_XOR:
2350     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2351       return lowerVectorMaskVECREDUCE(Op, DAG);
2352     return lowerVECREDUCE(Op, DAG);
2353   case ISD::VECREDUCE_FADD:
2354   case ISD::VECREDUCE_SEQ_FADD:
2355   case ISD::VECREDUCE_FMIN:
2356   case ISD::VECREDUCE_FMAX:
2357     return lowerFPVECREDUCE(Op, DAG);
2358   case ISD::INSERT_SUBVECTOR:
2359     return lowerINSERT_SUBVECTOR(Op, DAG);
2360   case ISD::EXTRACT_SUBVECTOR:
2361     return lowerEXTRACT_SUBVECTOR(Op, DAG);
2362   case ISD::STEP_VECTOR:
2363     return lowerSTEP_VECTOR(Op, DAG);
2364   case ISD::VECTOR_REVERSE:
2365     return lowerVECTOR_REVERSE(Op, DAG);
2366   case ISD::BUILD_VECTOR:
2367     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
2368   case ISD::SPLAT_VECTOR:
2369     if (Op.getValueType().getVectorElementType() == MVT::i1)
2370       return lowerVectorMaskSplat(Op, DAG);
2371     return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
2372   case ISD::VECTOR_SHUFFLE:
2373     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
2374   case ISD::CONCAT_VECTORS: {
2375     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
2376     // better than going through the stack, as the default expansion does.
2377     SDLoc DL(Op);
2378     MVT VT = Op.getSimpleValueType();
2379     unsigned NumOpElts =
2380         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
2381     SDValue Vec = DAG.getUNDEF(VT);
2382     for (const auto &OpIdx : enumerate(Op->ops()))
2383       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
2384                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
2385     return Vec;
2386   }
2387   case ISD::LOAD:
2388     if (auto V = expandUnalignedRVVLoad(Op, DAG))
2389       return V;
2390     if (Op.getValueType().isFixedLengthVector())
2391       return lowerFixedLengthVectorLoadToRVV(Op, DAG);
2392     return Op;
2393   case ISD::STORE:
2394     if (auto V = expandUnalignedRVVStore(Op, DAG))
2395       return V;
2396     if (Op.getOperand(1).getValueType().isFixedLengthVector())
2397       return lowerFixedLengthVectorStoreToRVV(Op, DAG);
2398     return Op;
2399   case ISD::MLOAD:
2400     return lowerMLOAD(Op, DAG);
2401   case ISD::MSTORE:
2402     return lowerMSTORE(Op, DAG);
2403   case ISD::SETCC:
2404     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
2405   case ISD::ADD:
2406     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
2407   case ISD::SUB:
2408     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
2409   case ISD::MUL:
2410     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
2411   case ISD::MULHS:
2412     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
2413   case ISD::MULHU:
2414     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
2415   case ISD::AND:
2416     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
2417                                               RISCVISD::AND_VL);
2418   case ISD::OR:
2419     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
2420                                               RISCVISD::OR_VL);
2421   case ISD::XOR:
2422     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
2423                                               RISCVISD::XOR_VL);
2424   case ISD::SDIV:
2425     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
2426   case ISD::SREM:
2427     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
2428   case ISD::UDIV:
2429     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
2430   case ISD::UREM:
2431     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
2432   case ISD::SHL:
2433     return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
2434   case ISD::SRA:
2435     return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
2436   case ISD::SRL:
2437     return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
2438   case ISD::FADD:
2439     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
2440   case ISD::FSUB:
2441     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
2442   case ISD::FMUL:
2443     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
2444   case ISD::FDIV:
2445     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
2446   case ISD::FNEG:
2447     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
2448   case ISD::FABS:
2449     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
2450   case ISD::FSQRT:
2451     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
2452   case ISD::FMA:
2453     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
2454   case ISD::SMIN:
2455     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
2456   case ISD::SMAX:
2457     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
2458   case ISD::UMIN:
2459     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
2460   case ISD::UMAX:
2461     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
2462   case ISD::FMINNUM:
2463     return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
2464   case ISD::FMAXNUM:
2465     return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
2466   case ISD::ABS:
2467     return lowerABS(Op, DAG);
2468   case ISD::VSELECT:
2469     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
2470   case ISD::FCOPYSIGN:
2471     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
2472   case ISD::MGATHER:
2473     return lowerMGATHER(Op, DAG);
2474   case ISD::MSCATTER:
2475     return lowerMSCATTER(Op, DAG);
2476   case ISD::FLT_ROUNDS_:
2477     return lowerGET_ROUNDING(Op, DAG);
2478   case ISD::SET_ROUNDING:
2479     return lowerSET_ROUNDING(Op, DAG);
2480   case ISD::VP_ADD:
2481     return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
2482   case ISD::VP_SUB:
2483     return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
2484   case ISD::VP_MUL:
2485     return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
2486   case ISD::VP_SDIV:
2487     return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
2488   case ISD::VP_UDIV:
2489     return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
2490   case ISD::VP_SREM:
2491     return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
2492   case ISD::VP_UREM:
2493     return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
2494   case ISD::VP_AND:
2495     return lowerVPOp(Op, DAG, RISCVISD::AND_VL);
2496   case ISD::VP_OR:
2497     return lowerVPOp(Op, DAG, RISCVISD::OR_VL);
2498   case ISD::VP_XOR:
2499     return lowerVPOp(Op, DAG, RISCVISD::XOR_VL);
2500   case ISD::VP_ASHR:
2501     return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
2502   case ISD::VP_LSHR:
2503     return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
2504   case ISD::VP_SHL:
2505     return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
2506   case ISD::VP_FADD:
2507     return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
2508   case ISD::VP_FSUB:
2509     return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
2510   case ISD::VP_FMUL:
2511     return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
2512   case ISD::VP_FDIV:
2513     return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
2514   }
2515 }
2516 
2517 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
2518                              SelectionDAG &DAG, unsigned Flags) {
2519   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
2520 }
2521 
2522 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
2523                              SelectionDAG &DAG, unsigned Flags) {
2524   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
2525                                    Flags);
2526 }
2527 
2528 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
2529                              SelectionDAG &DAG, unsigned Flags) {
2530   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
2531                                    N->getOffset(), Flags);
2532 }
2533 
2534 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
2535                              SelectionDAG &DAG, unsigned Flags) {
2536   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
2537 }
2538 
2539 template <class NodeTy>
2540 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2541                                      bool IsLocal) const {
2542   SDLoc DL(N);
2543   EVT Ty = getPointerTy(DAG.getDataLayout());
2544 
2545   if (isPositionIndependent()) {
2546     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2547     if (IsLocal)
2548       // Use PC-relative addressing to access the symbol. This generates the
2549       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
2550       // %pcrel_lo(auipc)).
2551       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2552 
2553     // Use PC-relative addressing to access the GOT for this symbol, then load
2554     // the address from the GOT. This generates the pattern (PseudoLA sym),
2555     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
2556     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
2557   }
2558 
2559   switch (getTargetMachine().getCodeModel()) {
2560   default:
2561     report_fatal_error("Unsupported code model for lowering");
2562   case CodeModel::Small: {
2563     // Generate a sequence for accessing addresses within the first 2 GiB of
2564     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
2565     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
2566     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
2567     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2568     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
2569   }
2570   case CodeModel::Medium: {
2571     // Generate a sequence for accessing addresses within any 2GiB range within
2572     // the address space. This generates the pattern (PseudoLLA sym), which
2573     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
2574     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2575     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2576   }
2577   }
2578 }
2579 
2580 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
2581                                                 SelectionDAG &DAG) const {
2582   SDLoc DL(Op);
2583   EVT Ty = Op.getValueType();
2584   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2585   int64_t Offset = N->getOffset();
2586   MVT XLenVT = Subtarget.getXLenVT();
2587 
2588   const GlobalValue *GV = N->getGlobal();
2589   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
2590   SDValue Addr = getAddr(N, DAG, IsLocal);
2591 
2592   // In order to maximise the opportunity for common subexpression elimination,
2593   // emit a separate ADD node for the global address offset instead of folding
2594   // it in the global address node. Later peephole optimisations may choose to
2595   // fold it back in when profitable.
2596   if (Offset != 0)
2597     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2598                        DAG.getConstant(Offset, DL, XLenVT));
2599   return Addr;
2600 }
2601 
2602 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
2603                                                SelectionDAG &DAG) const {
2604   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
2605 
2606   return getAddr(N, DAG);
2607 }
2608 
2609 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
2610                                                SelectionDAG &DAG) const {
2611   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
2612 
2613   return getAddr(N, DAG);
2614 }
2615 
2616 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
2617                                             SelectionDAG &DAG) const {
2618   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
2619 
2620   return getAddr(N, DAG);
2621 }
2622 
2623 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
2624                                               SelectionDAG &DAG,
2625                                               bool UseGOT) const {
2626   SDLoc DL(N);
2627   EVT Ty = getPointerTy(DAG.getDataLayout());
2628   const GlobalValue *GV = N->getGlobal();
2629   MVT XLenVT = Subtarget.getXLenVT();
2630 
2631   if (UseGOT) {
2632     // Use PC-relative addressing to access the GOT for this TLS symbol, then
2633     // load the address from the GOT and add the thread pointer. This generates
2634     // the pattern (PseudoLA_TLS_IE sym), which expands to
2635     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
2636     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2637     SDValue Load =
2638         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
2639 
2640     // Add the thread pointer.
2641     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2642     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
2643   }
2644 
2645   // Generate a sequence for accessing the address relative to the thread
2646   // pointer, with the appropriate adjustment for the thread pointer offset.
2647   // This generates the pattern
2648   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
2649   SDValue AddrHi =
2650       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
2651   SDValue AddrAdd =
2652       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
2653   SDValue AddrLo =
2654       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
2655 
2656   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2657   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2658   SDValue MNAdd = SDValue(
2659       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
2660       0);
2661   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
2662 }
2663 
2664 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
2665                                                SelectionDAG &DAG) const {
2666   SDLoc DL(N);
2667   EVT Ty = getPointerTy(DAG.getDataLayout());
2668   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
2669   const GlobalValue *GV = N->getGlobal();
2670 
2671   // Use a PC-relative addressing mode to access the global dynamic GOT address.
2672   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
2673   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
2674   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2675   SDValue Load =
2676       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
2677 
2678   // Prepare argument list to generate call.
2679   ArgListTy Args;
2680   ArgListEntry Entry;
2681   Entry.Node = Load;
2682   Entry.Ty = CallTy;
2683   Args.push_back(Entry);
2684 
2685   // Setup call to __tls_get_addr.
2686   TargetLowering::CallLoweringInfo CLI(DAG);
2687   CLI.setDebugLoc(DL)
2688       .setChain(DAG.getEntryNode())
2689       .setLibCallee(CallingConv::C, CallTy,
2690                     DAG.getExternalSymbol("__tls_get_addr", Ty),
2691                     std::move(Args));
2692 
2693   return LowerCallTo(CLI).first;
2694 }
2695 
2696 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
2697                                                    SelectionDAG &DAG) const {
2698   SDLoc DL(Op);
2699   EVT Ty = Op.getValueType();
2700   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2701   int64_t Offset = N->getOffset();
2702   MVT XLenVT = Subtarget.getXLenVT();
2703 
2704   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
2705 
2706   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
2707       CallingConv::GHC)
2708     report_fatal_error("In GHC calling convention TLS is not supported");
2709 
2710   SDValue Addr;
2711   switch (Model) {
2712   case TLSModel::LocalExec:
2713     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
2714     break;
2715   case TLSModel::InitialExec:
2716     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
2717     break;
2718   case TLSModel::LocalDynamic:
2719   case TLSModel::GeneralDynamic:
2720     Addr = getDynamicTLSAddr(N, DAG);
2721     break;
2722   }
2723 
2724   // In order to maximise the opportunity for common subexpression elimination,
2725   // emit a separate ADD node for the global address offset instead of folding
2726   // it in the global address node. Later peephole optimisations may choose to
2727   // fold it back in when profitable.
2728   if (Offset != 0)
2729     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2730                        DAG.getConstant(Offset, DL, XLenVT));
2731   return Addr;
2732 }
2733 
2734 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2735   SDValue CondV = Op.getOperand(0);
2736   SDValue TrueV = Op.getOperand(1);
2737   SDValue FalseV = Op.getOperand(2);
2738   SDLoc DL(Op);
2739   MVT XLenVT = Subtarget.getXLenVT();
2740 
2741   // If the result type is XLenVT and CondV is the output of a SETCC node
2742   // which also operated on XLenVT inputs, then merge the SETCC node into the
2743   // lowered RISCVISD::SELECT_CC to take advantage of the integer
2744   // compare+branch instructions. i.e.:
2745   // (select (setcc lhs, rhs, cc), truev, falsev)
2746   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
2747   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
2748       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
2749     SDValue LHS = CondV.getOperand(0);
2750     SDValue RHS = CondV.getOperand(1);
2751     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
2752     ISD::CondCode CCVal = CC->get();
2753 
2754     // Special case for a select of 2 constants that have a diffence of 1.
2755     // Normally this is done by DAGCombine, but if the select is introduced by
2756     // type legalization or op legalization, we miss it. Restricting to SETLT
2757     // case for now because that is what signed saturating add/sub need.
2758     // FIXME: We don't need the condition to be SETLT or even a SETCC,
2759     // but we would probably want to swap the true/false values if the condition
2760     // is SETGE/SETLE to avoid an XORI.
2761     if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
2762         CCVal == ISD::SETLT) {
2763       const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
2764       const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
2765       if (TrueVal - 1 == FalseVal)
2766         return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
2767       if (TrueVal + 1 == FalseVal)
2768         return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
2769     }
2770 
2771     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
2772 
2773     SDValue TargetCC = DAG.getTargetConstant(CCVal, DL, XLenVT);
2774     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
2775     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
2776   }
2777 
2778   // Otherwise:
2779   // (select condv, truev, falsev)
2780   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
2781   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
2782   SDValue SetNE = DAG.getTargetConstant(ISD::SETNE, DL, XLenVT);
2783 
2784   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
2785 
2786   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
2787 }
2788 
2789 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
2790   SDValue CondV = Op.getOperand(1);
2791   SDLoc DL(Op);
2792   MVT XLenVT = Subtarget.getXLenVT();
2793 
2794   if (CondV.getOpcode() == ISD::SETCC &&
2795       CondV.getOperand(0).getValueType() == XLenVT) {
2796     SDValue LHS = CondV.getOperand(0);
2797     SDValue RHS = CondV.getOperand(1);
2798     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
2799 
2800     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
2801 
2802     SDValue TargetCC = DAG.getCondCode(CCVal);
2803     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
2804                        LHS, RHS, TargetCC, Op.getOperand(2));
2805   }
2806 
2807   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
2808                      CondV, DAG.getConstant(0, DL, XLenVT),
2809                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
2810 }
2811 
2812 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2813   MachineFunction &MF = DAG.getMachineFunction();
2814   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
2815 
2816   SDLoc DL(Op);
2817   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2818                                  getPointerTy(MF.getDataLayout()));
2819 
2820   // vastart just stores the address of the VarArgsFrameIndex slot into the
2821   // memory location argument.
2822   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2823   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
2824                       MachinePointerInfo(SV));
2825 }
2826 
2827 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
2828                                             SelectionDAG &DAG) const {
2829   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
2830   MachineFunction &MF = DAG.getMachineFunction();
2831   MachineFrameInfo &MFI = MF.getFrameInfo();
2832   MFI.setFrameAddressIsTaken(true);
2833   Register FrameReg = RI.getFrameRegister(MF);
2834   int XLenInBytes = Subtarget.getXLen() / 8;
2835 
2836   EVT VT = Op.getValueType();
2837   SDLoc DL(Op);
2838   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2839   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2840   while (Depth--) {
2841     int Offset = -(XLenInBytes * 2);
2842     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2843                               DAG.getIntPtrConstant(Offset, DL));
2844     FrameAddr =
2845         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2846   }
2847   return FrameAddr;
2848 }
2849 
2850 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
2851                                              SelectionDAG &DAG) const {
2852   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
2853   MachineFunction &MF = DAG.getMachineFunction();
2854   MachineFrameInfo &MFI = MF.getFrameInfo();
2855   MFI.setReturnAddressIsTaken(true);
2856   MVT XLenVT = Subtarget.getXLenVT();
2857   int XLenInBytes = Subtarget.getXLen() / 8;
2858 
2859   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
2860     return SDValue();
2861 
2862   EVT VT = Op.getValueType();
2863   SDLoc DL(Op);
2864   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2865   if (Depth) {
2866     int Off = -XLenInBytes;
2867     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
2868     SDValue Offset = DAG.getConstant(Off, DL, VT);
2869     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
2870                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
2871                        MachinePointerInfo());
2872   }
2873 
2874   // Return the value of the return address register, marking it an implicit
2875   // live-in.
2876   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
2877   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
2878 }
2879 
2880 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
2881                                                  SelectionDAG &DAG) const {
2882   SDLoc DL(Op);
2883   SDValue Lo = Op.getOperand(0);
2884   SDValue Hi = Op.getOperand(1);
2885   SDValue Shamt = Op.getOperand(2);
2886   EVT VT = Lo.getValueType();
2887 
2888   // if Shamt-XLEN < 0: // Shamt < XLEN
2889   //   Lo = Lo << Shamt
2890   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
2891   // else:
2892   //   Lo = 0
2893   //   Hi = Lo << (Shamt-XLEN)
2894 
2895   SDValue Zero = DAG.getConstant(0, DL, VT);
2896   SDValue One = DAG.getConstant(1, DL, VT);
2897   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2898   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2899   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2900   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2901 
2902   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2903   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2904   SDValue ShiftRightLo =
2905       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
2906   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2907   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2908   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
2909 
2910   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2911 
2912   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2913   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2914 
2915   SDValue Parts[2] = {Lo, Hi};
2916   return DAG.getMergeValues(Parts, DL);
2917 }
2918 
2919 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
2920                                                   bool IsSRA) const {
2921   SDLoc DL(Op);
2922   SDValue Lo = Op.getOperand(0);
2923   SDValue Hi = Op.getOperand(1);
2924   SDValue Shamt = Op.getOperand(2);
2925   EVT VT = Lo.getValueType();
2926 
2927   // SRA expansion:
2928   //   if Shamt-XLEN < 0: // Shamt < XLEN
2929   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2930   //     Hi = Hi >>s Shamt
2931   //   else:
2932   //     Lo = Hi >>s (Shamt-XLEN);
2933   //     Hi = Hi >>s (XLEN-1)
2934   //
2935   // SRL expansion:
2936   //   if Shamt-XLEN < 0: // Shamt < XLEN
2937   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2938   //     Hi = Hi >>u Shamt
2939   //   else:
2940   //     Lo = Hi >>u (Shamt-XLEN);
2941   //     Hi = 0;
2942 
2943   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2944 
2945   SDValue Zero = DAG.getConstant(0, DL, VT);
2946   SDValue One = DAG.getConstant(1, DL, VT);
2947   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2948   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2949   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2950   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2951 
2952   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2953   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2954   SDValue ShiftLeftHi =
2955       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
2956   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2957   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2958   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
2959   SDValue HiFalse =
2960       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
2961 
2962   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2963 
2964   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2965   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2966 
2967   SDValue Parts[2] = {Lo, Hi};
2968   return DAG.getMergeValues(Parts, DL);
2969 }
2970 
2971 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
2972 // legal equivalently-sized i8 type, so we can use that as a go-between.
2973 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
2974                                                   SelectionDAG &DAG) const {
2975   SDLoc DL(Op);
2976   MVT VT = Op.getSimpleValueType();
2977   SDValue SplatVal = Op.getOperand(0);
2978   // All-zeros or all-ones splats are handled specially.
2979   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
2980     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
2981     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
2982   }
2983   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
2984     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
2985     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
2986   }
2987   MVT XLenVT = Subtarget.getXLenVT();
2988   assert(SplatVal.getValueType() == XLenVT &&
2989          "Unexpected type for i1 splat value");
2990   MVT InterVT = VT.changeVectorElementType(MVT::i8);
2991   SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
2992                          DAG.getConstant(1, DL, XLenVT));
2993   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
2994   SDValue Zero = DAG.getConstant(0, DL, InterVT);
2995   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
2996 }
2997 
2998 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
2999 // illegal (currently only vXi64 RV32).
3000 // FIXME: We could also catch non-constant sign-extended i32 values and lower
3001 // them to SPLAT_VECTOR_I64
3002 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
3003                                                      SelectionDAG &DAG) const {
3004   SDLoc DL(Op);
3005   MVT VecVT = Op.getSimpleValueType();
3006   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
3007          "Unexpected SPLAT_VECTOR_PARTS lowering");
3008 
3009   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
3010   SDValue Lo = Op.getOperand(0);
3011   SDValue Hi = Op.getOperand(1);
3012 
3013   if (VecVT.isFixedLengthVector()) {
3014     MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3015     SDLoc DL(Op);
3016     SDValue Mask, VL;
3017     std::tie(Mask, VL) =
3018         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3019 
3020     SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
3021     return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
3022   }
3023 
3024   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3025     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3026     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3027     // If Hi constant is all the same sign bit as Lo, lower this as a custom
3028     // node in order to try and match RVV vector/scalar instructions.
3029     if ((LoC >> 31) == HiC)
3030       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3031   }
3032 
3033   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3034   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3035       isa<ConstantSDNode>(Hi.getOperand(1)) &&
3036       Hi.getConstantOperandVal(1) == 31)
3037     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3038 
3039   // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
3040   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
3041                      DAG.getRegister(RISCV::X0, MVT::i64));
3042 }
3043 
3044 // Custom-lower extensions from mask vectors by using a vselect either with 1
3045 // for zero/any-extension or -1 for sign-extension:
3046 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
3047 // Note that any-extension is lowered identically to zero-extension.
3048 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
3049                                                 int64_t ExtTrueVal) const {
3050   SDLoc DL(Op);
3051   MVT VecVT = Op.getSimpleValueType();
3052   SDValue Src = Op.getOperand(0);
3053   // Only custom-lower extensions from mask types
3054   assert(Src.getValueType().isVector() &&
3055          Src.getValueType().getVectorElementType() == MVT::i1);
3056 
3057   MVT XLenVT = Subtarget.getXLenVT();
3058   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
3059   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
3060 
3061   if (VecVT.isScalableVector()) {
3062     // Be careful not to introduce illegal scalar types at this stage, and be
3063     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
3064     // illegal and must be expanded. Since we know that the constants are
3065     // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
3066     bool IsRV32E64 =
3067         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
3068 
3069     if (!IsRV32E64) {
3070       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
3071       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
3072     } else {
3073       SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
3074       SplatTrueVal =
3075           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
3076     }
3077 
3078     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
3079   }
3080 
3081   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3082   MVT I1ContainerVT =
3083       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3084 
3085   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
3086 
3087   SDValue Mask, VL;
3088   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3089 
3090   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
3091   SplatTrueVal =
3092       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
3093   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
3094                                SplatTrueVal, SplatZero, VL);
3095 
3096   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
3097 }
3098 
3099 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
3100     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
3101   MVT ExtVT = Op.getSimpleValueType();
3102   // Only custom-lower extensions from fixed-length vector types.
3103   if (!ExtVT.isFixedLengthVector())
3104     return Op;
3105   MVT VT = Op.getOperand(0).getSimpleValueType();
3106   // Grab the canonical container type for the extended type. Infer the smaller
3107   // type from that to ensure the same number of vector elements, as we know
3108   // the LMUL will be sufficient to hold the smaller type.
3109   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
3110   // Get the extended container type manually to ensure the same number of
3111   // vector elements between source and dest.
3112   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
3113                                      ContainerExtVT.getVectorElementCount());
3114 
3115   SDValue Op1 =
3116       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
3117 
3118   SDLoc DL(Op);
3119   SDValue Mask, VL;
3120   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3121 
3122   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
3123 
3124   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
3125 }
3126 
3127 // Custom-lower truncations from vectors to mask vectors by using a mask and a
3128 // setcc operation:
3129 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
3130 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
3131                                                   SelectionDAG &DAG) const {
3132   SDLoc DL(Op);
3133   EVT MaskVT = Op.getValueType();
3134   // Only expect to custom-lower truncations to mask types
3135   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
3136          "Unexpected type for vector mask lowering");
3137   SDValue Src = Op.getOperand(0);
3138   MVT VecVT = Src.getSimpleValueType();
3139 
3140   // If this is a fixed vector, we need to convert it to a scalable vector.
3141   MVT ContainerVT = VecVT;
3142   if (VecVT.isFixedLengthVector()) {
3143     ContainerVT = getContainerForFixedLengthVector(VecVT);
3144     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3145   }
3146 
3147   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
3148   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
3149 
3150   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
3151   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
3152 
3153   if (VecVT.isScalableVector()) {
3154     SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
3155     return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
3156   }
3157 
3158   SDValue Mask, VL;
3159   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3160 
3161   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
3162   SDValue Trunc =
3163       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
3164   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
3165                       DAG.getCondCode(ISD::SETNE), Mask, VL);
3166   return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
3167 }
3168 
3169 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
3170 // first position of a vector, and that vector is slid up to the insert index.
3171 // By limiting the active vector length to index+1 and merging with the
3172 // original vector (with an undisturbed tail policy for elements >= VL), we
3173 // achieve the desired result of leaving all elements untouched except the one
3174 // at VL-1, which is replaced with the desired value.
3175 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3176                                                     SelectionDAG &DAG) const {
3177   SDLoc DL(Op);
3178   MVT VecVT = Op.getSimpleValueType();
3179   SDValue Vec = Op.getOperand(0);
3180   SDValue Val = Op.getOperand(1);
3181   SDValue Idx = Op.getOperand(2);
3182 
3183   if (VecVT.getVectorElementType() == MVT::i1) {
3184     // FIXME: For now we just promote to an i8 vector and insert into that,
3185     // but this is probably not optimal.
3186     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
3187     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3188     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
3189     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
3190   }
3191 
3192   MVT ContainerVT = VecVT;
3193   // If the operand is a fixed-length vector, convert to a scalable one.
3194   if (VecVT.isFixedLengthVector()) {
3195     ContainerVT = getContainerForFixedLengthVector(VecVT);
3196     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3197   }
3198 
3199   MVT XLenVT = Subtarget.getXLenVT();
3200 
3201   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3202   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
3203   // Even i64-element vectors on RV32 can be lowered without scalar
3204   // legalization if the most-significant 32 bits of the value are not affected
3205   // by the sign-extension of the lower 32 bits.
3206   // TODO: We could also catch sign extensions of a 32-bit value.
3207   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
3208     const auto *CVal = cast<ConstantSDNode>(Val);
3209     if (isInt<32>(CVal->getSExtValue())) {
3210       IsLegalInsert = true;
3211       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3212     }
3213   }
3214 
3215   SDValue Mask, VL;
3216   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3217 
3218   SDValue ValInVec;
3219 
3220   if (IsLegalInsert) {
3221     unsigned Opc =
3222         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
3223     if (isNullConstant(Idx)) {
3224       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
3225       if (!VecVT.isFixedLengthVector())
3226         return Vec;
3227       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
3228     }
3229     ValInVec =
3230         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
3231   } else {
3232     // On RV32, i64-element vectors must be specially handled to place the
3233     // value at element 0, by using two vslide1up instructions in sequence on
3234     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
3235     // this.
3236     SDValue One = DAG.getConstant(1, DL, XLenVT);
3237     SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
3238     SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
3239     MVT I32ContainerVT =
3240         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
3241     SDValue I32Mask =
3242         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
3243     // Limit the active VL to two.
3244     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
3245     // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
3246     // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
3247     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
3248                            InsertI64VL);
3249     // First slide in the hi value, then the lo in underneath it.
3250     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3251                            ValHi, I32Mask, InsertI64VL);
3252     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3253                            ValLo, I32Mask, InsertI64VL);
3254     // Bitcast back to the right container type.
3255     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
3256   }
3257 
3258   // Now that the value is in a vector, slide it into position.
3259   SDValue InsertVL =
3260       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
3261   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
3262                                 ValInVec, Idx, Mask, InsertVL);
3263   if (!VecVT.isFixedLengthVector())
3264     return Slideup;
3265   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
3266 }
3267 
3268 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
3269 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
3270 // types this is done using VMV_X_S to allow us to glean information about the
3271 // sign bits of the result.
3272 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3273                                                      SelectionDAG &DAG) const {
3274   SDLoc DL(Op);
3275   SDValue Idx = Op.getOperand(1);
3276   SDValue Vec = Op.getOperand(0);
3277   EVT EltVT = Op.getValueType();
3278   MVT VecVT = Vec.getSimpleValueType();
3279   MVT XLenVT = Subtarget.getXLenVT();
3280 
3281   if (VecVT.getVectorElementType() == MVT::i1) {
3282     // FIXME: For now we just promote to an i8 vector and extract from that,
3283     // but this is probably not optimal.
3284     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
3285     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3286     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
3287   }
3288 
3289   // If this is a fixed vector, we need to convert it to a scalable vector.
3290   MVT ContainerVT = VecVT;
3291   if (VecVT.isFixedLengthVector()) {
3292     ContainerVT = getContainerForFixedLengthVector(VecVT);
3293     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3294   }
3295 
3296   // If the index is 0, the vector is already in the right position.
3297   if (!isNullConstant(Idx)) {
3298     // Use a VL of 1 to avoid processing more elements than we need.
3299     SDValue VL = DAG.getConstant(1, DL, XLenVT);
3300     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3301     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3302     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3303                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
3304   }
3305 
3306   if (!EltVT.isInteger()) {
3307     // Floating-point extracts are handled in TableGen.
3308     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
3309                        DAG.getConstant(0, DL, XLenVT));
3310   }
3311 
3312   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
3313   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
3314 }
3315 
3316 // Some RVV intrinsics may claim that they want an integer operand to be
3317 // promoted or expanded.
3318 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
3319                                           const RISCVSubtarget &Subtarget) {
3320   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3321           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
3322          "Unexpected opcode");
3323 
3324   if (!Subtarget.hasStdExtV())
3325     return SDValue();
3326 
3327   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
3328   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
3329   SDLoc DL(Op);
3330 
3331   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
3332       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
3333   if (!II || !II->SplatOperand)
3334     return SDValue();
3335 
3336   unsigned SplatOp = II->SplatOperand + HasChain;
3337   assert(SplatOp < Op.getNumOperands());
3338 
3339   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
3340   SDValue &ScalarOp = Operands[SplatOp];
3341   MVT OpVT = ScalarOp.getSimpleValueType();
3342   MVT XLenVT = Subtarget.getXLenVT();
3343 
3344   // If this isn't a scalar, or its type is XLenVT we're done.
3345   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
3346     return SDValue();
3347 
3348   // Simplest case is that the operand needs to be promoted to XLenVT.
3349   if (OpVT.bitsLT(XLenVT)) {
3350     // If the operand is a constant, sign extend to increase our chances
3351     // of being able to use a .vi instruction. ANY_EXTEND would become a
3352     // a zero extend and the simm5 check in isel would fail.
3353     // FIXME: Should we ignore the upper bits in isel instead?
3354     unsigned ExtOpc =
3355         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3356     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
3357     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3358   }
3359 
3360   // Use the previous operand to get the vXi64 VT. The result might be a mask
3361   // VT for compares. Using the previous operand assumes that the previous
3362   // operand will never have a smaller element size than a scalar operand and
3363   // that a widening operation never uses SEW=64.
3364   // NOTE: If this fails the below assert, we can probably just find the
3365   // element count from any operand or result and use it to construct the VT.
3366   assert(II->SplatOperand > 1 && "Unexpected splat operand!");
3367   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
3368 
3369   // The more complex case is when the scalar is larger than XLenVT.
3370   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
3371          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
3372 
3373   // If this is a sign-extended 32-bit constant, we can truncate it and rely
3374   // on the instruction to sign-extend since SEW>XLEN.
3375   if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
3376     if (isInt<32>(CVal->getSExtValue())) {
3377       ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3378       return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3379     }
3380   }
3381 
3382   // We need to convert the scalar to a splat vector.
3383   // FIXME: Can we implicitly truncate the scalar if it is known to
3384   // be sign extended?
3385   // VL should be the last operand.
3386   SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
3387   assert(VL.getValueType() == XLenVT);
3388   ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
3389   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3390 }
3391 
3392 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3393                                                      SelectionDAG &DAG) const {
3394   unsigned IntNo = Op.getConstantOperandVal(0);
3395   SDLoc DL(Op);
3396   MVT XLenVT = Subtarget.getXLenVT();
3397 
3398   switch (IntNo) {
3399   default:
3400     break; // Don't custom lower most intrinsics.
3401   case Intrinsic::thread_pointer: {
3402     EVT PtrVT = getPointerTy(DAG.getDataLayout());
3403     return DAG.getRegister(RISCV::X4, PtrVT);
3404   }
3405   case Intrinsic::riscv_orc_b:
3406     // Lower to the GORCI encoding for orc.b.
3407     return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
3408                        DAG.getConstant(7, DL, XLenVT));
3409   case Intrinsic::riscv_grev:
3410   case Intrinsic::riscv_gorc: {
3411     unsigned Opc =
3412         IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
3413     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3414   }
3415   case Intrinsic::riscv_shfl:
3416   case Intrinsic::riscv_unshfl: {
3417     unsigned Opc =
3418         IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
3419     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3420   }
3421   case Intrinsic::riscv_bcompress:
3422   case Intrinsic::riscv_bdecompress: {
3423     unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
3424                                                        : RISCVISD::BDECOMPRESS;
3425     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3426   }
3427   case Intrinsic::riscv_vmv_x_s:
3428     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
3429     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
3430                        Op.getOperand(1));
3431   case Intrinsic::riscv_vmv_v_x:
3432     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
3433                             Op.getSimpleValueType(), DL, DAG, Subtarget);
3434   case Intrinsic::riscv_vfmv_v_f:
3435     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
3436                        Op.getOperand(1), Op.getOperand(2));
3437   case Intrinsic::riscv_vmv_s_x: {
3438     SDValue Scalar = Op.getOperand(2);
3439 
3440     if (Scalar.getValueType().bitsLE(XLenVT)) {
3441       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
3442       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
3443                          Op.getOperand(1), Scalar, Op.getOperand(3));
3444     }
3445 
3446     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
3447 
3448     // This is an i64 value that lives in two scalar registers. We have to
3449     // insert this in a convoluted way. First we build vXi64 splat containing
3450     // the/ two values that we assemble using some bit math. Next we'll use
3451     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
3452     // to merge element 0 from our splat into the source vector.
3453     // FIXME: This is probably not the best way to do this, but it is
3454     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
3455     // point.
3456     //   sw lo, (a0)
3457     //   sw hi, 4(a0)
3458     //   vlse vX, (a0)
3459     //
3460     //   vid.v      vVid
3461     //   vmseq.vx   mMask, vVid, 0
3462     //   vmerge.vvm vDest, vSrc, vVal, mMask
3463     MVT VT = Op.getSimpleValueType();
3464     SDValue Vec = Op.getOperand(1);
3465     SDValue VL = Op.getOperand(3);
3466 
3467     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
3468     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
3469                                       DAG.getConstant(0, DL, MVT::i32), VL);
3470 
3471     MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
3472     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3473     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
3474     SDValue SelectCond =
3475         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
3476                     DAG.getCondCode(ISD::SETEQ), Mask, VL);
3477     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
3478                        Vec, VL);
3479   }
3480   case Intrinsic::riscv_vslide1up:
3481   case Intrinsic::riscv_vslide1down:
3482   case Intrinsic::riscv_vslide1up_mask:
3483   case Intrinsic::riscv_vslide1down_mask: {
3484     // We need to special case these when the scalar is larger than XLen.
3485     unsigned NumOps = Op.getNumOperands();
3486     bool IsMasked = NumOps == 6;
3487     unsigned OpOffset = IsMasked ? 1 : 0;
3488     SDValue Scalar = Op.getOperand(2 + OpOffset);
3489     if (Scalar.getValueType().bitsLE(XLenVT))
3490       break;
3491 
3492     // Splatting a sign extended constant is fine.
3493     if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
3494       if (isInt<32>(CVal->getSExtValue()))
3495         break;
3496 
3497     MVT VT = Op.getSimpleValueType();
3498     assert(VT.getVectorElementType() == MVT::i64 &&
3499            Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
3500 
3501     // Convert the vector source to the equivalent nxvXi32 vector.
3502     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3503     SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
3504 
3505     SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3506                                    DAG.getConstant(0, DL, XLenVT));
3507     SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3508                                    DAG.getConstant(1, DL, XLenVT));
3509 
3510     // Double the VL since we halved SEW.
3511     SDValue VL = Op.getOperand(NumOps - 1);
3512     SDValue I32VL =
3513         DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
3514 
3515     MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
3516     SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
3517 
3518     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
3519     // instructions.
3520     if (IntNo == Intrinsic::riscv_vslide1up ||
3521         IntNo == Intrinsic::riscv_vslide1up_mask) {
3522       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
3523                         I32Mask, I32VL);
3524       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
3525                         I32Mask, I32VL);
3526     } else {
3527       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
3528                         I32Mask, I32VL);
3529       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
3530                         I32Mask, I32VL);
3531     }
3532 
3533     // Convert back to nxvXi64.
3534     Vec = DAG.getBitcast(VT, Vec);
3535 
3536     if (!IsMasked)
3537       return Vec;
3538 
3539     // Apply mask after the operation.
3540     SDValue Mask = Op.getOperand(NumOps - 2);
3541     SDValue MaskedOff = Op.getOperand(1);
3542     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
3543   }
3544   }
3545 
3546   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3547 }
3548 
3549 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
3550                                                     SelectionDAG &DAG) const {
3551   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3552 }
3553 
3554 static MVT getLMUL1VT(MVT VT) {
3555   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3556          "Unexpected vector MVT");
3557   return MVT::getScalableVectorVT(
3558       VT.getVectorElementType(),
3559       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3560 }
3561 
3562 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
3563   switch (ISDOpcode) {
3564   default:
3565     llvm_unreachable("Unhandled reduction");
3566   case ISD::VECREDUCE_ADD:
3567     return RISCVISD::VECREDUCE_ADD_VL;
3568   case ISD::VECREDUCE_UMAX:
3569     return RISCVISD::VECREDUCE_UMAX_VL;
3570   case ISD::VECREDUCE_SMAX:
3571     return RISCVISD::VECREDUCE_SMAX_VL;
3572   case ISD::VECREDUCE_UMIN:
3573     return RISCVISD::VECREDUCE_UMIN_VL;
3574   case ISD::VECREDUCE_SMIN:
3575     return RISCVISD::VECREDUCE_SMIN_VL;
3576   case ISD::VECREDUCE_AND:
3577     return RISCVISD::VECREDUCE_AND_VL;
3578   case ISD::VECREDUCE_OR:
3579     return RISCVISD::VECREDUCE_OR_VL;
3580   case ISD::VECREDUCE_XOR:
3581     return RISCVISD::VECREDUCE_XOR_VL;
3582   }
3583 }
3584 
3585 SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
3586                                                       SelectionDAG &DAG) const {
3587   SDLoc DL(Op);
3588   SDValue Vec = Op.getOperand(0);
3589   MVT VecVT = Vec.getSimpleValueType();
3590   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
3591           Op.getOpcode() == ISD::VECREDUCE_OR ||
3592           Op.getOpcode() == ISD::VECREDUCE_XOR) &&
3593          "Unexpected reduction lowering");
3594 
3595   MVT XLenVT = Subtarget.getXLenVT();
3596   assert(Op.getValueType() == XLenVT &&
3597          "Expected reduction output to be legalized to XLenVT");
3598 
3599   MVT ContainerVT = VecVT;
3600   if (VecVT.isFixedLengthVector()) {
3601     ContainerVT = getContainerForFixedLengthVector(VecVT);
3602     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3603   }
3604 
3605   SDValue Mask, VL;
3606   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3607   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3608 
3609   switch (Op.getOpcode()) {
3610   default:
3611     llvm_unreachable("Unhandled reduction");
3612   case ISD::VECREDUCE_AND:
3613     // vpopc ~x == 0
3614     Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, Mask, VL);
3615     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3616     return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETEQ);
3617   case ISD::VECREDUCE_OR:
3618     // vpopc x != 0
3619     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3620     return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
3621   case ISD::VECREDUCE_XOR: {
3622     // ((vpopc x) & 1) != 0
3623     SDValue One = DAG.getConstant(1, DL, XLenVT);
3624     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3625     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
3626     return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
3627   }
3628   }
3629 }
3630 
3631 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
3632                                             SelectionDAG &DAG) const {
3633   SDLoc DL(Op);
3634   SDValue Vec = Op.getOperand(0);
3635   EVT VecEVT = Vec.getValueType();
3636 
3637   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
3638 
3639   // Due to ordering in legalize types we may have a vector type that needs to
3640   // be split. Do that manually so we can get down to a legal type.
3641   while (getTypeAction(*DAG.getContext(), VecEVT) ==
3642          TargetLowering::TypeSplitVector) {
3643     SDValue Lo, Hi;
3644     std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
3645     VecEVT = Lo.getValueType();
3646     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
3647   }
3648 
3649   // TODO: The type may need to be widened rather than split. Or widened before
3650   // it can be split.
3651   if (!isTypeLegal(VecEVT))
3652     return SDValue();
3653 
3654   MVT VecVT = VecEVT.getSimpleVT();
3655   MVT VecEltVT = VecVT.getVectorElementType();
3656   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
3657 
3658   MVT ContainerVT = VecVT;
3659   if (VecVT.isFixedLengthVector()) {
3660     ContainerVT = getContainerForFixedLengthVector(VecVT);
3661     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3662   }
3663 
3664   MVT M1VT = getLMUL1VT(ContainerVT);
3665 
3666   SDValue Mask, VL;
3667   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3668 
3669   // FIXME: This is a VLMAX splat which might be too large and can prevent
3670   // vsetvli removal.
3671   SDValue NeutralElem =
3672       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
3673   SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
3674   SDValue Reduction =
3675       DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL);
3676   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
3677                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3678   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
3679 }
3680 
3681 // Given a reduction op, this function returns the matching reduction opcode,
3682 // the vector SDValue and the scalar SDValue required to lower this to a
3683 // RISCVISD node.
3684 static std::tuple<unsigned, SDValue, SDValue>
3685 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
3686   SDLoc DL(Op);
3687   auto Flags = Op->getFlags();
3688   unsigned Opcode = Op.getOpcode();
3689   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
3690   switch (Opcode) {
3691   default:
3692     llvm_unreachable("Unhandled reduction");
3693   case ISD::VECREDUCE_FADD:
3694     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
3695                            DAG.getConstantFP(0.0, DL, EltVT));
3696   case ISD::VECREDUCE_SEQ_FADD:
3697     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
3698                            Op.getOperand(0));
3699   case ISD::VECREDUCE_FMIN:
3700     return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
3701                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
3702   case ISD::VECREDUCE_FMAX:
3703     return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
3704                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
3705   }
3706 }
3707 
3708 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
3709                                               SelectionDAG &DAG) const {
3710   SDLoc DL(Op);
3711   MVT VecEltVT = Op.getSimpleValueType();
3712 
3713   unsigned RVVOpcode;
3714   SDValue VectorVal, ScalarVal;
3715   std::tie(RVVOpcode, VectorVal, ScalarVal) =
3716       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
3717   MVT VecVT = VectorVal.getSimpleValueType();
3718 
3719   MVT ContainerVT = VecVT;
3720   if (VecVT.isFixedLengthVector()) {
3721     ContainerVT = getContainerForFixedLengthVector(VecVT);
3722     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
3723   }
3724 
3725   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
3726 
3727   SDValue Mask, VL;
3728   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3729 
3730   // FIXME: This is a VLMAX splat which might be too large and can prevent
3731   // vsetvli removal.
3732   SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
3733   SDValue Reduction =
3734       DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL);
3735   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
3736                      DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3737 }
3738 
3739 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
3740                                                    SelectionDAG &DAG) const {
3741   SDValue Vec = Op.getOperand(0);
3742   SDValue SubVec = Op.getOperand(1);
3743   MVT VecVT = Vec.getSimpleValueType();
3744   MVT SubVecVT = SubVec.getSimpleValueType();
3745 
3746   SDLoc DL(Op);
3747   MVT XLenVT = Subtarget.getXLenVT();
3748   unsigned OrigIdx = Op.getConstantOperandVal(2);
3749   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3750 
3751   // We don't have the ability to slide mask vectors up indexed by their i1
3752   // elements; the smallest we can do is i8. Often we are able to bitcast to
3753   // equivalent i8 vectors. Note that when inserting a fixed-length vector
3754   // into a scalable one, we might not necessarily have enough scalable
3755   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
3756   if (SubVecVT.getVectorElementType() == MVT::i1 &&
3757       (OrigIdx != 0 || !Vec.isUndef())) {
3758     if (VecVT.getVectorMinNumElements() >= 8 &&
3759         SubVecVT.getVectorMinNumElements() >= 8) {
3760       assert(OrigIdx % 8 == 0 && "Invalid index");
3761       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
3762              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
3763              "Unexpected mask vector lowering");
3764       OrigIdx /= 8;
3765       SubVecVT =
3766           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
3767                            SubVecVT.isScalableVector());
3768       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
3769                                VecVT.isScalableVector());
3770       Vec = DAG.getBitcast(VecVT, Vec);
3771       SubVec = DAG.getBitcast(SubVecVT, SubVec);
3772     } else {
3773       // We can't slide this mask vector up indexed by its i1 elements.
3774       // This poses a problem when we wish to insert a scalable vector which
3775       // can't be re-expressed as a larger type. Just choose the slow path and
3776       // extend to a larger type, then truncate back down.
3777       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
3778       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
3779       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
3780       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
3781       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
3782                         Op.getOperand(2));
3783       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
3784       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
3785     }
3786   }
3787 
3788   // If the subvector vector is a fixed-length type, we cannot use subregister
3789   // manipulation to simplify the codegen; we don't know which register of a
3790   // LMUL group contains the specific subvector as we only know the minimum
3791   // register size. Therefore we must slide the vector group up the full
3792   // amount.
3793   if (SubVecVT.isFixedLengthVector()) {
3794     if (OrigIdx == 0 && Vec.isUndef())
3795       return Op;
3796     MVT ContainerVT = VecVT;
3797     if (VecVT.isFixedLengthVector()) {
3798       ContainerVT = getContainerForFixedLengthVector(VecVT);
3799       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3800     }
3801     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3802                          DAG.getUNDEF(ContainerVT), SubVec,
3803                          DAG.getConstant(0, DL, XLenVT));
3804     SDValue Mask =
3805         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
3806     // Set the vector length to only the number of elements we care about. Note
3807     // that for slideup this includes the offset.
3808     SDValue VL =
3809         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
3810     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
3811     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
3812                                   SubVec, SlideupAmt, Mask, VL);
3813     if (VecVT.isFixedLengthVector())
3814       Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
3815     return DAG.getBitcast(Op.getValueType(), Slideup);
3816   }
3817 
3818   unsigned SubRegIdx, RemIdx;
3819   std::tie(SubRegIdx, RemIdx) =
3820       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
3821           VecVT, SubVecVT, OrigIdx, TRI);
3822 
3823   RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
3824   bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
3825                          SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
3826                          SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
3827 
3828   // 1. If the Idx has been completely eliminated and this subvector's size is
3829   // a vector register or a multiple thereof, or the surrounding elements are
3830   // undef, then this is a subvector insert which naturally aligns to a vector
3831   // register. These can easily be handled using subregister manipulation.
3832   // 2. If the subvector is smaller than a vector register, then the insertion
3833   // must preserve the undisturbed elements of the register. We do this by
3834   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
3835   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
3836   // subvector within the vector register, and an INSERT_SUBVECTOR of that
3837   // LMUL=1 type back into the larger vector (resolving to another subregister
3838   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
3839   // to avoid allocating a large register group to hold our subvector.
3840   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
3841     return Op;
3842 
3843   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
3844   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
3845   // (in our case undisturbed). This means we can set up a subvector insertion
3846   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
3847   // size of the subvector.
3848   MVT InterSubVT = VecVT;
3849   SDValue AlignedExtract = Vec;
3850   unsigned AlignedIdx = OrigIdx - RemIdx;
3851   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
3852     InterSubVT = getLMUL1VT(VecVT);
3853     // Extract a subvector equal to the nearest full vector register type. This
3854     // should resolve to a EXTRACT_SUBREG instruction.
3855     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
3856                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
3857   }
3858 
3859   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
3860   // For scalable vectors this must be further multiplied by vscale.
3861   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
3862 
3863   SDValue Mask, VL;
3864   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
3865 
3866   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
3867   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
3868   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
3869   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
3870 
3871   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
3872                        DAG.getUNDEF(InterSubVT), SubVec,
3873                        DAG.getConstant(0, DL, XLenVT));
3874 
3875   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
3876                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
3877 
3878   // If required, insert this subvector back into the correct vector register.
3879   // This should resolve to an INSERT_SUBREG instruction.
3880   if (VecVT.bitsGT(InterSubVT))
3881     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
3882                           DAG.getConstant(AlignedIdx, DL, XLenVT));
3883 
3884   // We might have bitcast from a mask type: cast back to the original type if
3885   // required.
3886   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
3887 }
3888 
3889 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
3890                                                     SelectionDAG &DAG) const {
3891   SDValue Vec = Op.getOperand(0);
3892   MVT SubVecVT = Op.getSimpleValueType();
3893   MVT VecVT = Vec.getSimpleValueType();
3894 
3895   SDLoc DL(Op);
3896   MVT XLenVT = Subtarget.getXLenVT();
3897   unsigned OrigIdx = Op.getConstantOperandVal(1);
3898   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3899 
3900   // We don't have the ability to slide mask vectors down indexed by their i1
3901   // elements; the smallest we can do is i8. Often we are able to bitcast to
3902   // equivalent i8 vectors. Note that when extracting a fixed-length vector
3903   // from a scalable one, we might not necessarily have enough scalable
3904   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
3905   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
3906     if (VecVT.getVectorMinNumElements() >= 8 &&
3907         SubVecVT.getVectorMinNumElements() >= 8) {
3908       assert(OrigIdx % 8 == 0 && "Invalid index");
3909       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
3910              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
3911              "Unexpected mask vector lowering");
3912       OrigIdx /= 8;
3913       SubVecVT =
3914           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
3915                            SubVecVT.isScalableVector());
3916       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
3917                                VecVT.isScalableVector());
3918       Vec = DAG.getBitcast(VecVT, Vec);
3919     } else {
3920       // We can't slide this mask vector down, indexed by its i1 elements.
3921       // This poses a problem when we wish to extract a scalable vector which
3922       // can't be re-expressed as a larger type. Just choose the slow path and
3923       // extend to a larger type, then truncate back down.
3924       // TODO: We could probably improve this when extracting certain fixed
3925       // from fixed, where we can extract as i8 and shift the correct element
3926       // right to reach the desired subvector?
3927       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
3928       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
3929       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
3930       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
3931                         Op.getOperand(1));
3932       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
3933       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
3934     }
3935   }
3936 
3937   // If the subvector vector is a fixed-length type, we cannot use subregister
3938   // manipulation to simplify the codegen; we don't know which register of a
3939   // LMUL group contains the specific subvector as we only know the minimum
3940   // register size. Therefore we must slide the vector group down the full
3941   // amount.
3942   if (SubVecVT.isFixedLengthVector()) {
3943     // With an index of 0 this is a cast-like subvector, which can be performed
3944     // with subregister operations.
3945     if (OrigIdx == 0)
3946       return Op;
3947     MVT ContainerVT = VecVT;
3948     if (VecVT.isFixedLengthVector()) {
3949       ContainerVT = getContainerForFixedLengthVector(VecVT);
3950       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3951     }
3952     SDValue Mask =
3953         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
3954     // Set the vector length to only the number of elements we care about. This
3955     // avoids sliding down elements we're going to discard straight away.
3956     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
3957     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
3958     SDValue Slidedown =
3959         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3960                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
3961     // Now we can use a cast-like subvector extract to get the result.
3962     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
3963                             DAG.getConstant(0, DL, XLenVT));
3964     return DAG.getBitcast(Op.getValueType(), Slidedown);
3965   }
3966 
3967   unsigned SubRegIdx, RemIdx;
3968   std::tie(SubRegIdx, RemIdx) =
3969       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
3970           VecVT, SubVecVT, OrigIdx, TRI);
3971 
3972   // If the Idx has been completely eliminated then this is a subvector extract
3973   // which naturally aligns to a vector register. These can easily be handled
3974   // using subregister manipulation.
3975   if (RemIdx == 0)
3976     return Op;
3977 
3978   // Else we must shift our vector register directly to extract the subvector.
3979   // Do this using VSLIDEDOWN.
3980 
3981   // If the vector type is an LMUL-group type, extract a subvector equal to the
3982   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
3983   // instruction.
3984   MVT InterSubVT = VecVT;
3985   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
3986     InterSubVT = getLMUL1VT(VecVT);
3987     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
3988                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
3989   }
3990 
3991   // Slide this vector register down by the desired number of elements in order
3992   // to place the desired subvector starting at element 0.
3993   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
3994   // For scalable vectors this must be further multiplied by vscale.
3995   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
3996 
3997   SDValue Mask, VL;
3998   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
3999   SDValue Slidedown =
4000       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
4001                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
4002 
4003   // Now the vector is in the right position, extract our final subvector. This
4004   // should resolve to a COPY.
4005   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
4006                           DAG.getConstant(0, DL, XLenVT));
4007 
4008   // We might have bitcast from a mask type: cast back to the original type if
4009   // required.
4010   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
4011 }
4012 
4013 // Lower step_vector to the vid instruction. Any non-identity step value must
4014 // be accounted for my manual expansion.
4015 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
4016                                               SelectionDAG &DAG) const {
4017   SDLoc DL(Op);
4018   MVT VT = Op.getSimpleValueType();
4019   MVT XLenVT = Subtarget.getXLenVT();
4020   SDValue Mask, VL;
4021   std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
4022   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
4023   uint64_t StepValImm = Op.getConstantOperandVal(0);
4024   if (StepValImm != 1) {
4025     assert(Op.getOperand(0).getValueType() == XLenVT &&
4026            "Unexpected step value type");
4027     if (isPowerOf2_64(StepValImm)) {
4028       SDValue StepVal =
4029           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
4030                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
4031       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
4032     } else {
4033       SDValue StepVal =
4034           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Op.getOperand(0));
4035       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
4036     }
4037   }
4038   return StepVec;
4039 }
4040 
4041 // Implement vector_reverse using vrgather.vv with indices determined by
4042 // subtracting the id of each element from (VLMAX-1). This will convert
4043 // the indices like so:
4044 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
4045 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
4046 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
4047                                                  SelectionDAG &DAG) const {
4048   SDLoc DL(Op);
4049   MVT VecVT = Op.getSimpleValueType();
4050   unsigned EltSize = VecVT.getScalarSizeInBits();
4051   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
4052 
4053   unsigned MaxVLMAX = 0;
4054   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
4055   if (VectorBitsMax != 0)
4056     MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
4057 
4058   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
4059   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
4060 
4061   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
4062   // to use vrgatherei16.vv.
4063   // TODO: It's also possible to use vrgatherei16.vv for other types to
4064   // decrease register width for the index calculation.
4065   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
4066     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
4067     // Reverse each half, then reassemble them in reverse order.
4068     // NOTE: It's also possible that after splitting that VLMAX no longer
4069     // requires vrgatherei16.vv.
4070     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
4071       SDValue Lo, Hi;
4072       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
4073       EVT LoVT, HiVT;
4074       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
4075       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
4076       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
4077       // Reassemble the low and high pieces reversed.
4078       // FIXME: This is a CONCAT_VECTORS.
4079       SDValue Res =
4080           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
4081                       DAG.getIntPtrConstant(0, DL));
4082       return DAG.getNode(
4083           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
4084           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
4085     }
4086 
4087     // Just promote the int type to i16 which will double the LMUL.
4088     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
4089     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
4090   }
4091 
4092   MVT XLenVT = Subtarget.getXLenVT();
4093   SDValue Mask, VL;
4094   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
4095 
4096   // Calculate VLMAX-1 for the desired SEW.
4097   unsigned MinElts = VecVT.getVectorMinNumElements();
4098   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
4099                               DAG.getConstant(MinElts, DL, XLenVT));
4100   SDValue VLMinus1 =
4101       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
4102 
4103   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
4104   bool IsRV32E64 =
4105       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
4106   SDValue SplatVL;
4107   if (!IsRV32E64)
4108     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
4109   else
4110     SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
4111 
4112   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
4113   SDValue Indices =
4114       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
4115 
4116   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
4117 }
4118 
4119 SDValue
4120 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
4121                                                      SelectionDAG &DAG) const {
4122   SDLoc DL(Op);
4123   auto *Load = cast<LoadSDNode>(Op);
4124 
4125   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
4126                                         Load->getMemoryVT(),
4127                                         *Load->getMemOperand()) &&
4128          "Expecting a correctly-aligned load");
4129 
4130   MVT VT = Op.getSimpleValueType();
4131   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4132 
4133   SDValue VL =
4134       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
4135 
4136   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4137   SDValue NewLoad = DAG.getMemIntrinsicNode(
4138       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
4139       Load->getMemoryVT(), Load->getMemOperand());
4140 
4141   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4142   return DAG.getMergeValues({Result, Load->getChain()}, DL);
4143 }
4144 
4145 SDValue
4146 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
4147                                                       SelectionDAG &DAG) const {
4148   SDLoc DL(Op);
4149   auto *Store = cast<StoreSDNode>(Op);
4150 
4151   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
4152                                         Store->getMemoryVT(),
4153                                         *Store->getMemOperand()) &&
4154          "Expecting a correctly-aligned store");
4155 
4156   SDValue StoreVal = Store->getValue();
4157   MVT VT = StoreVal.getSimpleValueType();
4158 
4159   // If the size less than a byte, we need to pad with zeros to make a byte.
4160   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
4161     VT = MVT::v8i1;
4162     StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
4163                            DAG.getConstant(0, DL, VT), StoreVal,
4164                            DAG.getIntPtrConstant(0, DL));
4165   }
4166 
4167   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4168 
4169   SDValue VL =
4170       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
4171 
4172   SDValue NewValue =
4173       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
4174   return DAG.getMemIntrinsicNode(
4175       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
4176       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
4177       Store->getMemoryVT(), Store->getMemOperand());
4178 }
4179 
4180 SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
4181   auto *Load = cast<MaskedLoadSDNode>(Op);
4182 
4183   SDLoc DL(Op);
4184   MVT VT = Op.getSimpleValueType();
4185   MVT XLenVT = Subtarget.getXLenVT();
4186 
4187   SDValue Mask = Load->getMask();
4188   SDValue PassThru = Load->getPassThru();
4189   SDValue VL;
4190 
4191   MVT ContainerVT = VT;
4192   if (VT.isFixedLengthVector()) {
4193     ContainerVT = getContainerForFixedLengthVector(VT);
4194     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4195 
4196     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4197     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4198     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4199   } else
4200     VL = DAG.getRegister(RISCV::X0, XLenVT);
4201 
4202   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4203   SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT);
4204   SDValue Ops[] = {Load->getChain(),   IntID, PassThru,
4205                    Load->getBasePtr(), Mask,  VL};
4206   SDValue Result =
4207       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
4208                               Load->getMemoryVT(), Load->getMemOperand());
4209   SDValue Chain = Result.getValue(1);
4210 
4211   if (VT.isFixedLengthVector())
4212     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4213 
4214   return DAG.getMergeValues({Result, Chain}, DL);
4215 }
4216 
4217 SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const {
4218   auto *Store = cast<MaskedStoreSDNode>(Op);
4219 
4220   SDLoc DL(Op);
4221   SDValue Val = Store->getValue();
4222   SDValue Mask = Store->getMask();
4223   MVT VT = Val.getSimpleValueType();
4224   MVT XLenVT = Subtarget.getXLenVT();
4225   SDValue VL;
4226 
4227   MVT ContainerVT = VT;
4228   if (VT.isFixedLengthVector()) {
4229     ContainerVT = getContainerForFixedLengthVector(VT);
4230     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4231 
4232     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
4233     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4234     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4235   } else
4236     VL = DAG.getRegister(RISCV::X0, XLenVT);
4237 
4238   SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT);
4239   return DAG.getMemIntrinsicNode(
4240       ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
4241       {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL},
4242       Store->getMemoryVT(), Store->getMemOperand());
4243 }
4244 
4245 SDValue
4246 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
4247                                                       SelectionDAG &DAG) const {
4248   MVT InVT = Op.getOperand(0).getSimpleValueType();
4249   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
4250 
4251   MVT VT = Op.getSimpleValueType();
4252 
4253   SDValue Op1 =
4254       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
4255   SDValue Op2 =
4256       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
4257 
4258   SDLoc DL(Op);
4259   SDValue VL =
4260       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
4261 
4262   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4263   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4264 
4265   SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
4266                             Op.getOperand(2), Mask, VL);
4267 
4268   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
4269 }
4270 
4271 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
4272     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
4273   MVT VT = Op.getSimpleValueType();
4274 
4275   if (VT.getVectorElementType() == MVT::i1)
4276     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
4277 
4278   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
4279 }
4280 
4281 // Lower vector ABS to smax(X, sub(0, X)).
4282 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
4283   SDLoc DL(Op);
4284   MVT VT = Op.getSimpleValueType();
4285   SDValue X = Op.getOperand(0);
4286 
4287   assert(VT.isFixedLengthVector() && "Unexpected type");
4288 
4289   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4290   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
4291 
4292   SDValue Mask, VL;
4293   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4294 
4295   SDValue SplatZero =
4296       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4297                   DAG.getConstant(0, DL, Subtarget.getXLenVT()));
4298   SDValue NegX =
4299       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
4300   SDValue Max =
4301       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
4302 
4303   return convertFromScalableVector(VT, Max, DAG, Subtarget);
4304 }
4305 
4306 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
4307     SDValue Op, SelectionDAG &DAG) const {
4308   SDLoc DL(Op);
4309   MVT VT = Op.getSimpleValueType();
4310   SDValue Mag = Op.getOperand(0);
4311   SDValue Sign = Op.getOperand(1);
4312   assert(Mag.getValueType() == Sign.getValueType() &&
4313          "Can only handle COPYSIGN with matching types.");
4314 
4315   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4316   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
4317   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
4318 
4319   SDValue Mask, VL;
4320   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4321 
4322   SDValue CopySign =
4323       DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
4324 
4325   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
4326 }
4327 
4328 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
4329     SDValue Op, SelectionDAG &DAG) const {
4330   MVT VT = Op.getSimpleValueType();
4331   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4332 
4333   MVT I1ContainerVT =
4334       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4335 
4336   SDValue CC =
4337       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
4338   SDValue Op1 =
4339       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
4340   SDValue Op2 =
4341       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
4342 
4343   SDLoc DL(Op);
4344   SDValue Mask, VL;
4345   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4346 
4347   SDValue Select =
4348       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
4349 
4350   return convertFromScalableVector(VT, Select, DAG, Subtarget);
4351 }
4352 
4353 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
4354                                                unsigned NewOpc,
4355                                                bool HasMask) const {
4356   MVT VT = Op.getSimpleValueType();
4357   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4358 
4359   // Create list of operands by converting existing ones to scalable types.
4360   SmallVector<SDValue, 6> Ops;
4361   for (const SDValue &V : Op->op_values()) {
4362     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
4363 
4364     // Pass through non-vector operands.
4365     if (!V.getValueType().isVector()) {
4366       Ops.push_back(V);
4367       continue;
4368     }
4369 
4370     // "cast" fixed length vector to a scalable vector.
4371     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
4372            "Only fixed length vectors are supported!");
4373     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
4374   }
4375 
4376   SDLoc DL(Op);
4377   SDValue Mask, VL;
4378   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4379   if (HasMask)
4380     Ops.push_back(Mask);
4381   Ops.push_back(VL);
4382 
4383   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
4384   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
4385 }
4386 
4387 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
4388 // * Operands of each node are assumed to be in the same order.
4389 // * The EVL operand is promoted from i32 to i64 on RV64.
4390 // * Fixed-length vectors are converted to their scalable-vector container
4391 //   types.
4392 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
4393                                        unsigned RISCVISDOpc) const {
4394   SDLoc DL(Op);
4395   MVT VT = Op.getSimpleValueType();
4396   SmallVector<SDValue, 4> Ops;
4397 
4398   for (const auto &OpIdx : enumerate(Op->ops())) {
4399     SDValue V = OpIdx.value();
4400     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
4401     // Pass through operands which aren't fixed-length vectors.
4402     if (!V.getValueType().isFixedLengthVector()) {
4403       Ops.push_back(V);
4404       continue;
4405     }
4406     // "cast" fixed length vector to a scalable vector.
4407     MVT OpVT = V.getSimpleValueType();
4408     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
4409     assert(useRVVForFixedLengthVectorVT(OpVT) &&
4410            "Only fixed length vectors are supported!");
4411     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
4412   }
4413 
4414   if (!VT.isFixedLengthVector())
4415     return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
4416 
4417   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4418 
4419   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
4420 
4421   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
4422 }
4423 
4424 // Custom lower MGATHER to a legalized form for RVV. It will then be matched to
4425 // a RVV indexed load. The RVV indexed load instructions only support the
4426 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
4427 // truncated to XLEN and are treated as byte offsets. Any signed or scaled
4428 // indexing is extended to the XLEN value type and scaled accordingly.
4429 SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
4430   auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
4431   SDLoc DL(Op);
4432 
4433   SDValue Index = MGN->getIndex();
4434   SDValue Mask = MGN->getMask();
4435   SDValue PassThru = MGN->getPassThru();
4436 
4437   MVT VT = Op.getSimpleValueType();
4438   MVT IndexVT = Index.getSimpleValueType();
4439   MVT XLenVT = Subtarget.getXLenVT();
4440 
4441   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
4442          "Unexpected VTs!");
4443   assert(MGN->getBasePtr().getSimpleValueType() == XLenVT &&
4444          "Unexpected pointer type");
4445   // Targets have to explicitly opt-in for extending vector loads.
4446   assert(MGN->getExtensionType() == ISD::NON_EXTLOAD &&
4447          "Unexpected extending MGATHER");
4448 
4449   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4450   // the selection of the masked intrinsics doesn't do this for us.
4451   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4452 
4453   SDValue VL;
4454   MVT ContainerVT = VT;
4455   if (VT.isFixedLengthVector()) {
4456     // We need to use the larger of the result and index type to determine the
4457     // scalable type to use so we don't increase LMUL for any operand/result.
4458     if (VT.bitsGE(IndexVT)) {
4459       ContainerVT = getContainerForFixedLengthVector(VT);
4460       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
4461                                  ContainerVT.getVectorElementCount());
4462     } else {
4463       IndexVT = getContainerForFixedLengthVector(IndexVT);
4464       ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
4465                                      IndexVT.getVectorElementCount());
4466     }
4467 
4468     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
4469 
4470     if (!IsUnmasked) {
4471       MVT MaskVT =
4472           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4473       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4474       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4475     }
4476 
4477     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4478   } else
4479     VL = DAG.getRegister(RISCV::X0, XLenVT);
4480 
4481   unsigned IntID =
4482       IsUnmasked ? Intrinsic::riscv_vloxei : Intrinsic::riscv_vloxei_mask;
4483   SmallVector<SDValue, 8> Ops{MGN->getChain(),
4484                               DAG.getTargetConstant(IntID, DL, XLenVT)};
4485   if (!IsUnmasked)
4486     Ops.push_back(PassThru);
4487   Ops.push_back(MGN->getBasePtr());
4488   Ops.push_back(Index);
4489   if (!IsUnmasked)
4490     Ops.push_back(Mask);
4491   Ops.push_back(VL);
4492 
4493   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4494   SDValue Result =
4495       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
4496                               MGN->getMemoryVT(), MGN->getMemOperand());
4497   SDValue Chain = Result.getValue(1);
4498 
4499   if (VT.isFixedLengthVector())
4500     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4501 
4502   return DAG.getMergeValues({Result, Chain}, DL);
4503 }
4504 
4505 // Custom lower MSCATTER to a legalized form for RVV. It will then be matched to
4506 // a RVV indexed store. The RVV indexed store instructions only support the
4507 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
4508 // truncated to XLEN and are treated as byte offsets. Any signed or scaled
4509 // indexing is extended to the XLEN value type and scaled accordingly.
4510 SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
4511                                            SelectionDAG &DAG) const {
4512   auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
4513   SDLoc DL(Op);
4514   SDValue Index = MSN->getIndex();
4515   SDValue Mask = MSN->getMask();
4516   SDValue Val = MSN->getValue();
4517 
4518   MVT VT = Val.getSimpleValueType();
4519   MVT IndexVT = Index.getSimpleValueType();
4520   MVT XLenVT = Subtarget.getXLenVT();
4521 
4522   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
4523          "Unexpected VTs!");
4524   assert(MSN->getBasePtr().getSimpleValueType() == XLenVT &&
4525          "Unexpected pointer type");
4526   // Targets have to explicitly opt-in for extending vector loads and
4527   // truncating vector stores.
4528   assert(!MSN->isTruncatingStore() && "Unexpected extending MSCATTER");
4529 
4530   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4531   // the selection of the masked intrinsics doesn't do this for us.
4532   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4533 
4534   SDValue VL;
4535   if (VT.isFixedLengthVector()) {
4536     // We need to use the larger of the value and index type to determine the
4537     // scalable type to use so we don't increase LMUL for any operand/result.
4538     MVT ContainerVT;
4539     if (VT.bitsGE(IndexVT)) {
4540       ContainerVT = getContainerForFixedLengthVector(VT);
4541       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
4542                                  ContainerVT.getVectorElementCount());
4543     } else {
4544       IndexVT = getContainerForFixedLengthVector(IndexVT);
4545       ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
4546                                      IndexVT.getVectorElementCount());
4547     }
4548 
4549     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
4550     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
4551 
4552     if (!IsUnmasked) {
4553       MVT MaskVT =
4554           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4555       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4556     }
4557 
4558     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4559   } else
4560     VL = DAG.getRegister(RISCV::X0, XLenVT);
4561 
4562   unsigned IntID =
4563       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
4564   SmallVector<SDValue, 8> Ops{MSN->getChain(),
4565                               DAG.getTargetConstant(IntID, DL, XLenVT)};
4566   Ops.push_back(Val);
4567   Ops.push_back(MSN->getBasePtr());
4568   Ops.push_back(Index);
4569   if (!IsUnmasked)
4570     Ops.push_back(Mask);
4571   Ops.push_back(VL);
4572 
4573   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, MSN->getVTList(), Ops,
4574                                  MSN->getMemoryVT(), MSN->getMemOperand());
4575 }
4576 
4577 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
4578                                                SelectionDAG &DAG) const {
4579   const MVT XLenVT = Subtarget.getXLenVT();
4580   SDLoc DL(Op);
4581   SDValue Chain = Op->getOperand(0);
4582   SDValue SysRegNo = DAG.getConstant(
4583       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
4584   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
4585   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
4586 
4587   // Encoding used for rounding mode in RISCV differs from that used in
4588   // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
4589   // table, which consists of a sequence of 4-bit fields, each representing
4590   // corresponding FLT_ROUNDS mode.
4591   static const int Table =
4592       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
4593       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
4594       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
4595       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
4596       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
4597 
4598   SDValue Shift =
4599       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
4600   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
4601                                 DAG.getConstant(Table, DL, XLenVT), Shift);
4602   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
4603                                DAG.getConstant(7, DL, XLenVT));
4604 
4605   return DAG.getMergeValues({Masked, Chain}, DL);
4606 }
4607 
4608 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
4609                                                SelectionDAG &DAG) const {
4610   const MVT XLenVT = Subtarget.getXLenVT();
4611   SDLoc DL(Op);
4612   SDValue Chain = Op->getOperand(0);
4613   SDValue RMValue = Op->getOperand(1);
4614   SDValue SysRegNo = DAG.getConstant(
4615       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
4616 
4617   // Encoding used for rounding mode in RISCV differs from that used in
4618   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
4619   // a table, which consists of a sequence of 4-bit fields, each representing
4620   // corresponding RISCV mode.
4621   static const unsigned Table =
4622       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
4623       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
4624       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
4625       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
4626       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
4627 
4628   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
4629                               DAG.getConstant(2, DL, XLenVT));
4630   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
4631                                 DAG.getConstant(Table, DL, XLenVT), Shift);
4632   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
4633                         DAG.getConstant(0x7, DL, XLenVT));
4634   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
4635                      RMValue);
4636 }
4637 
4638 // Returns the opcode of the target-specific SDNode that implements the 32-bit
4639 // form of the given Opcode.
4640 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
4641   switch (Opcode) {
4642   default:
4643     llvm_unreachable("Unexpected opcode");
4644   case ISD::SHL:
4645     return RISCVISD::SLLW;
4646   case ISD::SRA:
4647     return RISCVISD::SRAW;
4648   case ISD::SRL:
4649     return RISCVISD::SRLW;
4650   case ISD::SDIV:
4651     return RISCVISD::DIVW;
4652   case ISD::UDIV:
4653     return RISCVISD::DIVUW;
4654   case ISD::UREM:
4655     return RISCVISD::REMUW;
4656   case ISD::ROTL:
4657     return RISCVISD::ROLW;
4658   case ISD::ROTR:
4659     return RISCVISD::RORW;
4660   case RISCVISD::GREV:
4661     return RISCVISD::GREVW;
4662   case RISCVISD::GORC:
4663     return RISCVISD::GORCW;
4664   }
4665 }
4666 
4667 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
4668 // Because i32 isn't a legal type for RV64, these operations would otherwise
4669 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
4670 // later one because the fact the operation was originally of type i32 is
4671 // lost.
4672 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
4673                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
4674   SDLoc DL(N);
4675   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
4676   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4677   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4678   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4679   // ReplaceNodeResults requires we maintain the same type for the return value.
4680   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4681 }
4682 
4683 // Converts the given 32-bit operation to a i64 operation with signed extension
4684 // semantic to reduce the signed extension instructions.
4685 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
4686   SDLoc DL(N);
4687   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4688   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4689   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4690   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4691                                DAG.getValueType(MVT::i32));
4692   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4693 }
4694 
4695 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
4696                                              SmallVectorImpl<SDValue> &Results,
4697                                              SelectionDAG &DAG) const {
4698   SDLoc DL(N);
4699   switch (N->getOpcode()) {
4700   default:
4701     llvm_unreachable("Don't know how to custom type legalize this operation!");
4702   case ISD::STRICT_FP_TO_SINT:
4703   case ISD::STRICT_FP_TO_UINT:
4704   case ISD::FP_TO_SINT:
4705   case ISD::FP_TO_UINT: {
4706     bool IsStrict = N->isStrictFPOpcode();
4707     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4708            "Unexpected custom legalisation");
4709     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
4710     // If the FP type needs to be softened, emit a library call using the 'si'
4711     // version. If we left it to default legalization we'd end up with 'di'. If
4712     // the FP type doesn't need to be softened just let generic type
4713     // legalization promote the result type.
4714     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
4715         TargetLowering::TypeSoftenFloat)
4716       return;
4717     RTLIB::Libcall LC;
4718     if (N->getOpcode() == ISD::FP_TO_SINT ||
4719         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
4720       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
4721     else
4722       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
4723     MakeLibCallOptions CallOptions;
4724     EVT OpVT = Op0.getValueType();
4725     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
4726     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4727     SDValue Result;
4728     std::tie(Result, Chain) =
4729         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
4730     Results.push_back(Result);
4731     if (IsStrict)
4732       Results.push_back(Chain);
4733     break;
4734   }
4735   case ISD::READCYCLECOUNTER: {
4736     assert(!Subtarget.is64Bit() &&
4737            "READCYCLECOUNTER only has custom type legalization on riscv32");
4738 
4739     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
4740     SDValue RCW =
4741         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
4742 
4743     Results.push_back(
4744         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
4745     Results.push_back(RCW.getValue(2));
4746     break;
4747   }
4748   case ISD::MUL: {
4749     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
4750     unsigned XLen = Subtarget.getXLen();
4751     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
4752     if (Size > XLen) {
4753       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
4754       SDValue LHS = N->getOperand(0);
4755       SDValue RHS = N->getOperand(1);
4756       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
4757 
4758       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
4759       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
4760       // We need exactly one side to be unsigned.
4761       if (LHSIsU == RHSIsU)
4762         return;
4763 
4764       auto MakeMULPair = [&](SDValue S, SDValue U) {
4765         MVT XLenVT = Subtarget.getXLenVT();
4766         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
4767         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
4768         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
4769         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
4770         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
4771       };
4772 
4773       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
4774       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
4775 
4776       // The other operand should be signed, but still prefer MULH when
4777       // possible.
4778       if (RHSIsU && LHSIsS && !RHSIsS)
4779         Results.push_back(MakeMULPair(LHS, RHS));
4780       else if (LHSIsU && RHSIsS && !LHSIsS)
4781         Results.push_back(MakeMULPair(RHS, LHS));
4782 
4783       return;
4784     }
4785     LLVM_FALLTHROUGH;
4786   }
4787   case ISD::ADD:
4788   case ISD::SUB:
4789     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4790            "Unexpected custom legalisation");
4791     if (N->getOperand(1).getOpcode() == ISD::Constant)
4792       return;
4793     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4794     break;
4795   case ISD::SHL:
4796   case ISD::SRA:
4797   case ISD::SRL:
4798     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4799            "Unexpected custom legalisation");
4800     if (N->getOperand(1).getOpcode() == ISD::Constant)
4801       return;
4802     Results.push_back(customLegalizeToWOp(N, DAG));
4803     break;
4804   case ISD::ROTL:
4805   case ISD::ROTR:
4806     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4807            "Unexpected custom legalisation");
4808     Results.push_back(customLegalizeToWOp(N, DAG));
4809     break;
4810   case ISD::CTTZ:
4811   case ISD::CTTZ_ZERO_UNDEF:
4812   case ISD::CTLZ:
4813   case ISD::CTLZ_ZERO_UNDEF: {
4814     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4815            "Unexpected custom legalisation");
4816 
4817     SDValue NewOp0 =
4818         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4819     bool IsCTZ =
4820         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
4821     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
4822     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
4823     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4824     return;
4825   }
4826   case ISD::SDIV:
4827   case ISD::UDIV:
4828   case ISD::UREM: {
4829     MVT VT = N->getSimpleValueType(0);
4830     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
4831            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
4832            "Unexpected custom legalisation");
4833     if (N->getOperand(0).getOpcode() == ISD::Constant ||
4834         N->getOperand(1).getOpcode() == ISD::Constant)
4835       return;
4836 
4837     // If the input is i32, use ANY_EXTEND since the W instructions don't read
4838     // the upper 32 bits. For other types we need to sign or zero extend
4839     // based on the opcode.
4840     unsigned ExtOpc = ISD::ANY_EXTEND;
4841     if (VT != MVT::i32)
4842       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
4843                                            : ISD::ZERO_EXTEND;
4844 
4845     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
4846     break;
4847   }
4848   case ISD::UADDO:
4849   case ISD::USUBO: {
4850     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4851            "Unexpected custom legalisation");
4852     bool IsAdd = N->getOpcode() == ISD::UADDO;
4853     // Create an ADDW or SUBW.
4854     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4855     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4856     SDValue Res =
4857         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
4858     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
4859                       DAG.getValueType(MVT::i32));
4860 
4861     // Sign extend the LHS and perform an unsigned compare with the ADDW result.
4862     // Since the inputs are sign extended from i32, this is equivalent to
4863     // comparing the lower 32 bits.
4864     LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
4865     SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
4866                                     IsAdd ? ISD::SETULT : ISD::SETUGT);
4867 
4868     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4869     Results.push_back(Overflow);
4870     return;
4871   }
4872   case ISD::UADDSAT:
4873   case ISD::USUBSAT: {
4874     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4875            "Unexpected custom legalisation");
4876     if (Subtarget.hasStdExtZbb()) {
4877       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
4878       // sign extend allows overflow of the lower 32 bits to be detected on
4879       // the promoted size.
4880       SDValue LHS =
4881           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
4882       SDValue RHS =
4883           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
4884       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
4885       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4886       return;
4887     }
4888 
4889     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
4890     // promotion for UADDO/USUBO.
4891     Results.push_back(expandAddSubSat(N, DAG));
4892     return;
4893   }
4894   case ISD::BITCAST: {
4895     EVT VT = N->getValueType(0);
4896     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
4897     SDValue Op0 = N->getOperand(0);
4898     EVT Op0VT = Op0.getValueType();
4899     MVT XLenVT = Subtarget.getXLenVT();
4900     if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) {
4901       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
4902       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
4903     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
4904                Subtarget.hasStdExtF()) {
4905       SDValue FPConv =
4906           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
4907       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
4908     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
4909                isTypeLegal(Op0VT)) {
4910       // Custom-legalize bitcasts from fixed-length vector types to illegal
4911       // scalar types in order to improve codegen. Bitcast the vector to a
4912       // one-element vector type whose element type is the same as the result
4913       // type, and extract the first element.
4914       LLVMContext &Context = *DAG.getContext();
4915       SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
4916       Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
4917                                     DAG.getConstant(0, DL, XLenVT)));
4918     }
4919     break;
4920   }
4921   case RISCVISD::GREV:
4922   case RISCVISD::GORC: {
4923     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4924            "Unexpected custom legalisation");
4925     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
4926     // This is similar to customLegalizeToWOp, except that we pass the second
4927     // operand (a TargetConstant) straight through: it is already of type
4928     // XLenVT.
4929     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
4930     SDValue NewOp0 =
4931         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4932     SDValue NewOp1 =
4933         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4934     SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4935     // ReplaceNodeResults requires we maintain the same type for the return
4936     // value.
4937     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
4938     break;
4939   }
4940   case RISCVISD::SHFL: {
4941     // There is no SHFLIW instruction, but we can just promote the operation.
4942     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4943            "Unexpected custom legalisation");
4944     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
4945     SDValue NewOp0 =
4946         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4947     SDValue NewOp1 =
4948         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4949     SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1);
4950     // ReplaceNodeResults requires we maintain the same type for the return
4951     // value.
4952     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
4953     break;
4954   }
4955   case ISD::BSWAP:
4956   case ISD::BITREVERSE: {
4957     MVT VT = N->getSimpleValueType(0);
4958     MVT XLenVT = Subtarget.getXLenVT();
4959     assert((VT == MVT::i8 || VT == MVT::i16 ||
4960             (VT == MVT::i32 && Subtarget.is64Bit())) &&
4961            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
4962     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
4963     unsigned Imm = VT.getSizeInBits() - 1;
4964     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
4965     if (N->getOpcode() == ISD::BSWAP)
4966       Imm &= ~0x7U;
4967     unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV;
4968     SDValue GREVI =
4969         DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT));
4970     // ReplaceNodeResults requires we maintain the same type for the return
4971     // value.
4972     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI));
4973     break;
4974   }
4975   case ISD::FSHL:
4976   case ISD::FSHR: {
4977     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4978            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
4979     SDValue NewOp0 =
4980         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4981     SDValue NewOp1 =
4982         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4983     SDValue NewOp2 =
4984         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
4985     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
4986     // Mask the shift amount to 5 bits.
4987     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
4988                          DAG.getConstant(0x1f, DL, MVT::i64));
4989     unsigned Opc =
4990         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
4991     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
4992     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
4993     break;
4994   }
4995   case ISD::EXTRACT_VECTOR_ELT: {
4996     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
4997     // type is illegal (currently only vXi64 RV32).
4998     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
4999     // transferred to the destination register. We issue two of these from the
5000     // upper- and lower- halves of the SEW-bit vector element, slid down to the
5001     // first element.
5002     SDValue Vec = N->getOperand(0);
5003     SDValue Idx = N->getOperand(1);
5004 
5005     // The vector type hasn't been legalized yet so we can't issue target
5006     // specific nodes if it needs legalization.
5007     // FIXME: We would manually legalize if it's important.
5008     if (!isTypeLegal(Vec.getValueType()))
5009       return;
5010 
5011     MVT VecVT = Vec.getSimpleValueType();
5012 
5013     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
5014            VecVT.getVectorElementType() == MVT::i64 &&
5015            "Unexpected EXTRACT_VECTOR_ELT legalization");
5016 
5017     // If this is a fixed vector, we need to convert it to a scalable vector.
5018     MVT ContainerVT = VecVT;
5019     if (VecVT.isFixedLengthVector()) {
5020       ContainerVT = getContainerForFixedLengthVector(VecVT);
5021       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5022     }
5023 
5024     MVT XLenVT = Subtarget.getXLenVT();
5025 
5026     // Use a VL of 1 to avoid processing more elements than we need.
5027     MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
5028     SDValue VL = DAG.getConstant(1, DL, XLenVT);
5029     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
5030 
5031     // Unless the index is known to be 0, we must slide the vector down to get
5032     // the desired element into index 0.
5033     if (!isNullConstant(Idx)) {
5034       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
5035                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
5036     }
5037 
5038     // Extract the lower XLEN bits of the correct vector element.
5039     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
5040 
5041     // To extract the upper XLEN bits of the vector element, shift the first
5042     // element right by 32 bits and re-extract the lower XLEN bits.
5043     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
5044                                      DAG.getConstant(32, DL, XLenVT), VL);
5045     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
5046                                  ThirtyTwoV, Mask, VL);
5047 
5048     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
5049 
5050     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
5051     break;
5052   }
5053   case ISD::INTRINSIC_WO_CHAIN: {
5054     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
5055     switch (IntNo) {
5056     default:
5057       llvm_unreachable(
5058           "Don't know how to custom type legalize this intrinsic!");
5059     case Intrinsic::riscv_orc_b: {
5060       // Lower to the GORCI encoding for orc.b with the operand extended.
5061       SDValue NewOp =
5062           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5063       // If Zbp is enabled, use GORCIW which will sign extend the result.
5064       unsigned Opc =
5065           Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC;
5066       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp,
5067                                 DAG.getConstant(7, DL, MVT::i64));
5068       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5069       return;
5070     }
5071     case Intrinsic::riscv_grev:
5072     case Intrinsic::riscv_gorc: {
5073       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5074              "Unexpected custom legalisation");
5075       SDValue NewOp1 =
5076           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5077       SDValue NewOp2 =
5078           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
5079       unsigned Opc =
5080           IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW;
5081       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
5082       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5083       break;
5084     }
5085     case Intrinsic::riscv_shfl:
5086     case Intrinsic::riscv_unshfl: {
5087       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5088              "Unexpected custom legalisation");
5089       SDValue NewOp1 =
5090           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5091       SDValue NewOp2 =
5092           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
5093       unsigned Opc =
5094           IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
5095       if (isa<ConstantSDNode>(N->getOperand(2))) {
5096         NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
5097                              DAG.getConstant(0xf, DL, MVT::i64));
5098         Opc =
5099             IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
5100       }
5101       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
5102       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5103       break;
5104     }
5105     case Intrinsic::riscv_bcompress:
5106     case Intrinsic::riscv_bdecompress: {
5107       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5108              "Unexpected custom legalisation");
5109       SDValue NewOp1 =
5110           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5111       SDValue NewOp2 =
5112           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
5113       unsigned Opc = IntNo == Intrinsic::riscv_bcompress
5114                          ? RISCVISD::BCOMPRESSW
5115                          : RISCVISD::BDECOMPRESSW;
5116       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
5117       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5118       break;
5119     }
5120     case Intrinsic::riscv_vmv_x_s: {
5121       EVT VT = N->getValueType(0);
5122       MVT XLenVT = Subtarget.getXLenVT();
5123       if (VT.bitsLT(XLenVT)) {
5124         // Simple case just extract using vmv.x.s and truncate.
5125         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
5126                                       Subtarget.getXLenVT(), N->getOperand(1));
5127         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
5128         return;
5129       }
5130 
5131       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
5132              "Unexpected custom legalization");
5133 
5134       // We need to do the move in two steps.
5135       SDValue Vec = N->getOperand(1);
5136       MVT VecVT = Vec.getSimpleValueType();
5137 
5138       // First extract the lower XLEN bits of the element.
5139       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
5140 
5141       // To extract the upper XLEN bits of the vector element, shift the first
5142       // element right by 32 bits and re-extract the lower XLEN bits.
5143       SDValue VL = DAG.getConstant(1, DL, XLenVT);
5144       MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
5145       SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
5146       SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
5147                                        DAG.getConstant(32, DL, XLenVT), VL);
5148       SDValue LShr32 =
5149           DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
5150       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
5151 
5152       Results.push_back(
5153           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
5154       break;
5155     }
5156     }
5157     break;
5158   }
5159   case ISD::VECREDUCE_ADD:
5160   case ISD::VECREDUCE_AND:
5161   case ISD::VECREDUCE_OR:
5162   case ISD::VECREDUCE_XOR:
5163   case ISD::VECREDUCE_SMAX:
5164   case ISD::VECREDUCE_UMAX:
5165   case ISD::VECREDUCE_SMIN:
5166   case ISD::VECREDUCE_UMIN:
5167     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
5168       Results.push_back(V);
5169     break;
5170   case ISD::FLT_ROUNDS_: {
5171     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
5172     SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
5173     Results.push_back(Res.getValue(0));
5174     Results.push_back(Res.getValue(1));
5175     break;
5176   }
5177   }
5178 }
5179 
5180 // A structure to hold one of the bit-manipulation patterns below. Together, a
5181 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
5182 //   (or (and (shl x, 1), 0xAAAAAAAA),
5183 //       (and (srl x, 1), 0x55555555))
5184 struct RISCVBitmanipPat {
5185   SDValue Op;
5186   unsigned ShAmt;
5187   bool IsSHL;
5188 
5189   bool formsPairWith(const RISCVBitmanipPat &Other) const {
5190     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
5191   }
5192 };
5193 
5194 // Matches patterns of the form
5195 //   (and (shl x, C2), (C1 << C2))
5196 //   (and (srl x, C2), C1)
5197 //   (shl (and x, C1), C2)
5198 //   (srl (and x, (C1 << C2)), C2)
5199 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
5200 // The expected masks for each shift amount are specified in BitmanipMasks where
5201 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
5202 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
5203 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
5204 // XLen is 64.
5205 static Optional<RISCVBitmanipPat>
5206 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
5207   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
5208          "Unexpected number of masks");
5209   Optional<uint64_t> Mask;
5210   // Optionally consume a mask around the shift operation.
5211   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
5212     Mask = Op.getConstantOperandVal(1);
5213     Op = Op.getOperand(0);
5214   }
5215   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
5216     return None;
5217   bool IsSHL = Op.getOpcode() == ISD::SHL;
5218 
5219   if (!isa<ConstantSDNode>(Op.getOperand(1)))
5220     return None;
5221   uint64_t ShAmt = Op.getConstantOperandVal(1);
5222 
5223   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
5224   if (ShAmt >= Width || !isPowerOf2_64(ShAmt))
5225     return None;
5226   // If we don't have enough masks for 64 bit, then we must be trying to
5227   // match SHFL so we're only allowed to shift 1/4 of the width.
5228   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
5229     return None;
5230 
5231   SDValue Src = Op.getOperand(0);
5232 
5233   // The expected mask is shifted left when the AND is found around SHL
5234   // patterns.
5235   //   ((x >> 1) & 0x55555555)
5236   //   ((x << 1) & 0xAAAAAAAA)
5237   bool SHLExpMask = IsSHL;
5238 
5239   if (!Mask) {
5240     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
5241     // the mask is all ones: consume that now.
5242     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
5243       Mask = Src.getConstantOperandVal(1);
5244       Src = Src.getOperand(0);
5245       // The expected mask is now in fact shifted left for SRL, so reverse the
5246       // decision.
5247       //   ((x & 0xAAAAAAAA) >> 1)
5248       //   ((x & 0x55555555) << 1)
5249       SHLExpMask = !SHLExpMask;
5250     } else {
5251       // Use a default shifted mask of all-ones if there's no AND, truncated
5252       // down to the expected width. This simplifies the logic later on.
5253       Mask = maskTrailingOnes<uint64_t>(Width);
5254       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
5255     }
5256   }
5257 
5258   unsigned MaskIdx = Log2_32(ShAmt);
5259   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
5260 
5261   if (SHLExpMask)
5262     ExpMask <<= ShAmt;
5263 
5264   if (Mask != ExpMask)
5265     return None;
5266 
5267   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
5268 }
5269 
5270 // Matches any of the following bit-manipulation patterns:
5271 //   (and (shl x, 1), (0x55555555 << 1))
5272 //   (and (srl x, 1), 0x55555555)
5273 //   (shl (and x, 0x55555555), 1)
5274 //   (srl (and x, (0x55555555 << 1)), 1)
5275 // where the shift amount and mask may vary thus:
5276 //   [1]  = 0x55555555 / 0xAAAAAAAA
5277 //   [2]  = 0x33333333 / 0xCCCCCCCC
5278 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
5279 //   [8]  = 0x00FF00FF / 0xFF00FF00
5280 //   [16] = 0x0000FFFF / 0xFFFFFFFF
5281 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
5282 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
5283   // These are the unshifted masks which we use to match bit-manipulation
5284   // patterns. They may be shifted left in certain circumstances.
5285   static const uint64_t BitmanipMasks[] = {
5286       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
5287       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
5288 
5289   return matchRISCVBitmanipPat(Op, BitmanipMasks);
5290 }
5291 
5292 // Match the following pattern as a GREVI(W) operation
5293 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
5294 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
5295                                const RISCVSubtarget &Subtarget) {
5296   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
5297   EVT VT = Op.getValueType();
5298 
5299   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
5300     auto LHS = matchGREVIPat(Op.getOperand(0));
5301     auto RHS = matchGREVIPat(Op.getOperand(1));
5302     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
5303       SDLoc DL(Op);
5304       return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op,
5305                          DAG.getConstant(LHS->ShAmt, DL, VT));
5306     }
5307   }
5308   return SDValue();
5309 }
5310 
5311 // Matches any the following pattern as a GORCI(W) operation
5312 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
5313 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
5314 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
5315 // Note that with the variant of 3.,
5316 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
5317 // the inner pattern will first be matched as GREVI and then the outer
5318 // pattern will be matched to GORC via the first rule above.
5319 // 4.  (or (rotl/rotr x, bitwidth/2), x)
5320 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
5321                                const RISCVSubtarget &Subtarget) {
5322   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
5323   EVT VT = Op.getValueType();
5324 
5325   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
5326     SDLoc DL(Op);
5327     SDValue Op0 = Op.getOperand(0);
5328     SDValue Op1 = Op.getOperand(1);
5329 
5330     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
5331       if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X &&
5332           isa<ConstantSDNode>(Reverse.getOperand(1)) &&
5333           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
5334         return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1));
5335       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
5336       if ((Reverse.getOpcode() == ISD::ROTL ||
5337            Reverse.getOpcode() == ISD::ROTR) &&
5338           Reverse.getOperand(0) == X &&
5339           isa<ConstantSDNode>(Reverse.getOperand(1))) {
5340         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
5341         if (RotAmt == (VT.getSizeInBits() / 2))
5342           return DAG.getNode(RISCVISD::GORC, DL, VT, X,
5343                              DAG.getConstant(RotAmt, DL, VT));
5344       }
5345       return SDValue();
5346     };
5347 
5348     // Check for either commutable permutation of (or (GREVI x, shamt), x)
5349     if (SDValue V = MatchOROfReverse(Op0, Op1))
5350       return V;
5351     if (SDValue V = MatchOROfReverse(Op1, Op0))
5352       return V;
5353 
5354     // OR is commutable so canonicalize its OR operand to the left
5355     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
5356       std::swap(Op0, Op1);
5357     if (Op0.getOpcode() != ISD::OR)
5358       return SDValue();
5359     SDValue OrOp0 = Op0.getOperand(0);
5360     SDValue OrOp1 = Op0.getOperand(1);
5361     auto LHS = matchGREVIPat(OrOp0);
5362     // OR is commutable so swap the operands and try again: x might have been
5363     // on the left
5364     if (!LHS) {
5365       std::swap(OrOp0, OrOp1);
5366       LHS = matchGREVIPat(OrOp0);
5367     }
5368     auto RHS = matchGREVIPat(Op1);
5369     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
5370       return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op,
5371                          DAG.getConstant(LHS->ShAmt, DL, VT));
5372     }
5373   }
5374   return SDValue();
5375 }
5376 
5377 // Matches any of the following bit-manipulation patterns:
5378 //   (and (shl x, 1), (0x22222222 << 1))
5379 //   (and (srl x, 1), 0x22222222)
5380 //   (shl (and x, 0x22222222), 1)
5381 //   (srl (and x, (0x22222222 << 1)), 1)
5382 // where the shift amount and mask may vary thus:
5383 //   [1]  = 0x22222222 / 0x44444444
5384 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
5385 //   [4]  = 0x00F000F0 / 0x0F000F00
5386 //   [8]  = 0x0000FF00 / 0x00FF0000
5387 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
5388 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
5389   // These are the unshifted masks which we use to match bit-manipulation
5390   // patterns. They may be shifted left in certain circumstances.
5391   static const uint64_t BitmanipMasks[] = {
5392       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
5393       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
5394 
5395   return matchRISCVBitmanipPat(Op, BitmanipMasks);
5396 }
5397 
5398 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
5399 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
5400                                const RISCVSubtarget &Subtarget) {
5401   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
5402   EVT VT = Op.getValueType();
5403 
5404   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
5405     return SDValue();
5406 
5407   SDValue Op0 = Op.getOperand(0);
5408   SDValue Op1 = Op.getOperand(1);
5409 
5410   // Or is commutable so canonicalize the second OR to the LHS.
5411   if (Op0.getOpcode() != ISD::OR)
5412     std::swap(Op0, Op1);
5413   if (Op0.getOpcode() != ISD::OR)
5414     return SDValue();
5415 
5416   // We found an inner OR, so our operands are the operands of the inner OR
5417   // and the other operand of the outer OR.
5418   SDValue A = Op0.getOperand(0);
5419   SDValue B = Op0.getOperand(1);
5420   SDValue C = Op1;
5421 
5422   auto Match1 = matchSHFLPat(A);
5423   auto Match2 = matchSHFLPat(B);
5424 
5425   // If neither matched, we failed.
5426   if (!Match1 && !Match2)
5427     return SDValue();
5428 
5429   // We had at least one match. if one failed, try the remaining C operand.
5430   if (!Match1) {
5431     std::swap(A, C);
5432     Match1 = matchSHFLPat(A);
5433     if (!Match1)
5434       return SDValue();
5435   } else if (!Match2) {
5436     std::swap(B, C);
5437     Match2 = matchSHFLPat(B);
5438     if (!Match2)
5439       return SDValue();
5440   }
5441   assert(Match1 && Match2);
5442 
5443   // Make sure our matches pair up.
5444   if (!Match1->formsPairWith(*Match2))
5445     return SDValue();
5446 
5447   // All the remains is to make sure C is an AND with the same input, that masks
5448   // out the bits that are being shuffled.
5449   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
5450       C.getOperand(0) != Match1->Op)
5451     return SDValue();
5452 
5453   uint64_t Mask = C.getConstantOperandVal(1);
5454 
5455   static const uint64_t BitmanipMasks[] = {
5456       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
5457       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
5458   };
5459 
5460   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
5461   unsigned MaskIdx = Log2_32(Match1->ShAmt);
5462   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
5463 
5464   if (Mask != ExpMask)
5465     return SDValue();
5466 
5467   SDLoc DL(Op);
5468   return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op,
5469                      DAG.getConstant(Match1->ShAmt, DL, VT));
5470 }
5471 
5472 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
5473 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
5474 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
5475 // not undo itself, but they are redundant.
5476 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
5477   SDValue Src = N->getOperand(0);
5478 
5479   if (Src.getOpcode() != N->getOpcode())
5480     return SDValue();
5481 
5482   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
5483       !isa<ConstantSDNode>(Src.getOperand(1)))
5484     return SDValue();
5485 
5486   unsigned ShAmt1 = N->getConstantOperandVal(1);
5487   unsigned ShAmt2 = Src.getConstantOperandVal(1);
5488   Src = Src.getOperand(0);
5489 
5490   unsigned CombinedShAmt;
5491   if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW)
5492     CombinedShAmt = ShAmt1 | ShAmt2;
5493   else
5494     CombinedShAmt = ShAmt1 ^ ShAmt2;
5495 
5496   if (CombinedShAmt == 0)
5497     return Src;
5498 
5499   SDLoc DL(N);
5500   return DAG.getNode(
5501       N->getOpcode(), DL, N->getValueType(0), Src,
5502       DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
5503 }
5504 
5505 // Combine a constant select operand into its use:
5506 //
5507 // (and (select_cc lhs, rhs, cc, -1, c), x)
5508 //   -> (select_cc lhs, rhs, cc, x, (and, x, c))  [AllOnes=1]
5509 // (or  (select_cc lhs, rhs, cc, 0, c), x)
5510 //   -> (select_cc lhs, rhs, cc, x, (or, x, c))  [AllOnes=0]
5511 // (xor (select_cc lhs, rhs, cc, 0, c), x)
5512 //   -> (select_cc lhs, rhs, cc, x, (xor, x, c))  [AllOnes=0]
5513 static SDValue combineSelectCCAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
5514                                      SelectionDAG &DAG, bool AllOnes) {
5515   EVT VT = N->getValueType(0);
5516 
5517   if (Slct.getOpcode() != RISCVISD::SELECT_CC || !Slct.hasOneUse())
5518     return SDValue();
5519 
5520   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
5521     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
5522   };
5523 
5524   bool SwapSelectOps;
5525   SDValue TrueVal = Slct.getOperand(3);
5526   SDValue FalseVal = Slct.getOperand(4);
5527   SDValue NonConstantVal;
5528   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
5529     SwapSelectOps = false;
5530     NonConstantVal = FalseVal;
5531   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
5532     SwapSelectOps = true;
5533     NonConstantVal = TrueVal;
5534   } else
5535     return SDValue();
5536 
5537   // Slct is now know to be the desired identity constant when CC is true.
5538   TrueVal = OtherOp;
5539   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
5540   // Unless SwapSelectOps says CC should be false.
5541   if (SwapSelectOps)
5542     std::swap(TrueVal, FalseVal);
5543 
5544   return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
5545                      {Slct.getOperand(0), Slct.getOperand(1),
5546                       Slct.getOperand(2), TrueVal, FalseVal});
5547 }
5548 
5549 // Attempt combineSelectAndUse on each operand of a commutative operator N.
5550 static SDValue combineSelectCCAndUseCommutative(SDNode *N, SelectionDAG &DAG,
5551                                                 bool AllOnes) {
5552   SDValue N0 = N->getOperand(0);
5553   SDValue N1 = N->getOperand(1);
5554   if (SDValue Result = combineSelectCCAndUse(N, N0, N1, DAG, AllOnes))
5555     return Result;
5556   if (SDValue Result = combineSelectCCAndUse(N, N1, N0, DAG, AllOnes))
5557     return Result;
5558   return SDValue();
5559 }
5560 
5561 static SDValue performANDCombine(SDNode *N,
5562                                  TargetLowering::DAGCombinerInfo &DCI,
5563                                  const RISCVSubtarget &Subtarget) {
5564   SelectionDAG &DAG = DCI.DAG;
5565 
5566   // fold (and (select_cc lhs, rhs, cc, -1, y), x) ->
5567   //      (select lhs, rhs, cc, x, (and x, y))
5568   return combineSelectCCAndUseCommutative(N, DAG, true);
5569 }
5570 
5571 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
5572                                 const RISCVSubtarget &Subtarget) {
5573   SelectionDAG &DAG = DCI.DAG;
5574   if (Subtarget.hasStdExtZbp()) {
5575     if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
5576       return GREV;
5577     if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget))
5578       return GORC;
5579     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget))
5580       return SHFL;
5581   }
5582 
5583   // fold (or (select_cc lhs, rhs, cc, 0, y), x) ->
5584   //      (select lhs, rhs, cc, x, (or x, y))
5585   return combineSelectCCAndUseCommutative(N, DAG, false);
5586 }
5587 
5588 static SDValue performXORCombine(SDNode *N,
5589                                  TargetLowering::DAGCombinerInfo &DCI,
5590                                  const RISCVSubtarget &Subtarget) {
5591   SelectionDAG &DAG = DCI.DAG;
5592 
5593   // fold (xor (select_cc lhs, rhs, cc, 0, y), x) ->
5594   //      (select lhs, rhs, cc, x, (xor x, y))
5595   return combineSelectCCAndUseCommutative(N, DAG, false);
5596 }
5597 
5598 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
5599                                                DAGCombinerInfo &DCI) const {
5600   SelectionDAG &DAG = DCI.DAG;
5601 
5602   switch (N->getOpcode()) {
5603   default:
5604     break;
5605   case RISCVISD::SplitF64: {
5606     SDValue Op0 = N->getOperand(0);
5607     // If the input to SplitF64 is just BuildPairF64 then the operation is
5608     // redundant. Instead, use BuildPairF64's operands directly.
5609     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
5610       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
5611 
5612     SDLoc DL(N);
5613 
5614     // It's cheaper to materialise two 32-bit integers than to load a double
5615     // from the constant pool and transfer it to integer registers through the
5616     // stack.
5617     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
5618       APInt V = C->getValueAPF().bitcastToAPInt();
5619       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
5620       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
5621       return DCI.CombineTo(N, Lo, Hi);
5622     }
5623 
5624     // This is a target-specific version of a DAGCombine performed in
5625     // DAGCombiner::visitBITCAST. It performs the equivalent of:
5626     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
5627     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
5628     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
5629         !Op0.getNode()->hasOneUse())
5630       break;
5631     SDValue NewSplitF64 =
5632         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
5633                     Op0.getOperand(0));
5634     SDValue Lo = NewSplitF64.getValue(0);
5635     SDValue Hi = NewSplitF64.getValue(1);
5636     APInt SignBit = APInt::getSignMask(32);
5637     if (Op0.getOpcode() == ISD::FNEG) {
5638       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
5639                                   DAG.getConstant(SignBit, DL, MVT::i32));
5640       return DCI.CombineTo(N, Lo, NewHi);
5641     }
5642     assert(Op0.getOpcode() == ISD::FABS);
5643     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
5644                                 DAG.getConstant(~SignBit, DL, MVT::i32));
5645     return DCI.CombineTo(N, Lo, NewHi);
5646   }
5647   case RISCVISD::SLLW:
5648   case RISCVISD::SRAW:
5649   case RISCVISD::SRLW:
5650   case RISCVISD::ROLW:
5651   case RISCVISD::RORW: {
5652     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
5653     SDValue LHS = N->getOperand(0);
5654     SDValue RHS = N->getOperand(1);
5655     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
5656     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
5657     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
5658         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
5659       if (N->getOpcode() != ISD::DELETED_NODE)
5660         DCI.AddToWorklist(N);
5661       return SDValue(N, 0);
5662     }
5663     break;
5664   }
5665   case RISCVISD::CLZW:
5666   case RISCVISD::CTZW: {
5667     // Only the lower 32 bits of the first operand are read
5668     SDValue Op0 = N->getOperand(0);
5669     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
5670     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
5671       if (N->getOpcode() != ISD::DELETED_NODE)
5672         DCI.AddToWorklist(N);
5673       return SDValue(N, 0);
5674     }
5675     break;
5676   }
5677   case RISCVISD::FSL:
5678   case RISCVISD::FSR: {
5679     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
5680     SDValue ShAmt = N->getOperand(2);
5681     unsigned BitWidth = ShAmt.getValueSizeInBits();
5682     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
5683     APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
5684     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
5685       if (N->getOpcode() != ISD::DELETED_NODE)
5686         DCI.AddToWorklist(N);
5687       return SDValue(N, 0);
5688     }
5689     break;
5690   }
5691   case RISCVISD::FSLW:
5692   case RISCVISD::FSRW: {
5693     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
5694     // read.
5695     SDValue Op0 = N->getOperand(0);
5696     SDValue Op1 = N->getOperand(1);
5697     SDValue ShAmt = N->getOperand(2);
5698     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
5699     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
5700     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
5701         SimplifyDemandedBits(Op1, OpMask, DCI) ||
5702         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
5703       if (N->getOpcode() != ISD::DELETED_NODE)
5704         DCI.AddToWorklist(N);
5705       return SDValue(N, 0);
5706     }
5707     break;
5708   }
5709   case RISCVISD::GREV:
5710   case RISCVISD::GORC: {
5711     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
5712     SDValue ShAmt = N->getOperand(1);
5713     unsigned BitWidth = ShAmt.getValueSizeInBits();
5714     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
5715     APInt ShAmtMask(BitWidth, BitWidth - 1);
5716     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
5717       if (N->getOpcode() != ISD::DELETED_NODE)
5718         DCI.AddToWorklist(N);
5719       return SDValue(N, 0);
5720     }
5721 
5722     return combineGREVI_GORCI(N, DCI.DAG);
5723   }
5724   case RISCVISD::GREVW:
5725   case RISCVISD::GORCW: {
5726     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
5727     SDValue LHS = N->getOperand(0);
5728     SDValue RHS = N->getOperand(1);
5729     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
5730     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
5731     if (SimplifyDemandedBits(LHS, LHSMask, DCI) ||
5732         SimplifyDemandedBits(RHS, RHSMask, DCI)) {
5733       if (N->getOpcode() != ISD::DELETED_NODE)
5734         DCI.AddToWorklist(N);
5735       return SDValue(N, 0);
5736     }
5737 
5738     return combineGREVI_GORCI(N, DCI.DAG);
5739   }
5740   case RISCVISD::SHFL:
5741   case RISCVISD::UNSHFL: {
5742     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
5743     SDValue ShAmt = N->getOperand(1);
5744     unsigned BitWidth = ShAmt.getValueSizeInBits();
5745     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
5746     APInt ShAmtMask(BitWidth, (BitWidth / 2) - 1);
5747     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
5748       if (N->getOpcode() != ISD::DELETED_NODE)
5749         DCI.AddToWorklist(N);
5750       return SDValue(N, 0);
5751     }
5752 
5753     break;
5754   }
5755   case RISCVISD::SHFLW:
5756   case RISCVISD::UNSHFLW: {
5757     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
5758     SDValue LHS = N->getOperand(0);
5759     SDValue RHS = N->getOperand(1);
5760     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
5761     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
5762     if (SimplifyDemandedBits(LHS, LHSMask, DCI) ||
5763         SimplifyDemandedBits(RHS, RHSMask, DCI)) {
5764       if (N->getOpcode() != ISD::DELETED_NODE)
5765         DCI.AddToWorklist(N);
5766       return SDValue(N, 0);
5767     }
5768 
5769     break;
5770   }
5771   case RISCVISD::BCOMPRESSW:
5772   case RISCVISD::BDECOMPRESSW: {
5773     // Only the lower 32 bits of LHS and RHS are read.
5774     SDValue LHS = N->getOperand(0);
5775     SDValue RHS = N->getOperand(1);
5776     APInt Mask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
5777     if (SimplifyDemandedBits(LHS, Mask, DCI) ||
5778         SimplifyDemandedBits(RHS, Mask, DCI)) {
5779       if (N->getOpcode() != ISD::DELETED_NODE)
5780         DCI.AddToWorklist(N);
5781       return SDValue(N, 0);
5782     }
5783 
5784     break;
5785   }
5786   case RISCVISD::FMV_X_ANYEXTW_RV64: {
5787     SDLoc DL(N);
5788     SDValue Op0 = N->getOperand(0);
5789     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
5790     // conversion is unnecessary and can be replaced with an ANY_EXTEND
5791     // of the FMV_W_X_RV64 operand.
5792     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
5793       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
5794              "Unexpected value type!");
5795       return Op0.getOperand(0);
5796     }
5797 
5798     // This is a target-specific version of a DAGCombine performed in
5799     // DAGCombiner::visitBITCAST. It performs the equivalent of:
5800     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
5801     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
5802     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
5803         !Op0.getNode()->hasOneUse())
5804       break;
5805     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
5806                                  Op0.getOperand(0));
5807     APInt SignBit = APInt::getSignMask(32).sext(64);
5808     if (Op0.getOpcode() == ISD::FNEG)
5809       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
5810                          DAG.getConstant(SignBit, DL, MVT::i64));
5811 
5812     assert(Op0.getOpcode() == ISD::FABS);
5813     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
5814                        DAG.getConstant(~SignBit, DL, MVT::i64));
5815   }
5816   case ISD::AND:
5817     return performANDCombine(N, DCI, Subtarget);
5818   case ISD::OR:
5819     return performORCombine(N, DCI, Subtarget);
5820   case ISD::XOR:
5821     return performXORCombine(N, DCI, Subtarget);
5822   case RISCVISD::SELECT_CC: {
5823     // Transform
5824     SDValue LHS = N->getOperand(0);
5825     SDValue RHS = N->getOperand(1);
5826     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
5827     if (!ISD::isIntEqualitySetCC(CCVal))
5828       break;
5829 
5830     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
5831     //      (select_cc X, Y, lt, trueV, falseV)
5832     // Sometimes the setcc is introduced after select_cc has been formed.
5833     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5834         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
5835       // If we're looking for eq 0 instead of ne 0, we need to invert the
5836       // condition.
5837       bool Invert = CCVal == ISD::SETEQ;
5838       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5839       if (Invert)
5840         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5841 
5842       SDLoc DL(N);
5843       RHS = LHS.getOperand(1);
5844       LHS = LHS.getOperand(0);
5845       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5846 
5847       SDValue TargetCC =
5848           DAG.getTargetConstant(CCVal, DL, Subtarget.getXLenVT());
5849       return DAG.getNode(
5850           RISCVISD::SELECT_CC, DL, N->getValueType(0),
5851           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
5852     }
5853 
5854     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
5855     //      (select_cc X, Y, eq/ne, trueV, falseV)
5856     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
5857       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
5858                          {LHS.getOperand(0), LHS.getOperand(1),
5859                           N->getOperand(2), N->getOperand(3),
5860                           N->getOperand(4)});
5861     // (select_cc X, 1, setne, trueV, falseV) ->
5862     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
5863     // This can occur when legalizing some floating point comparisons.
5864     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5865     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5866       SDLoc DL(N);
5867       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5868       SDValue TargetCC =
5869           DAG.getTargetConstant(CCVal, DL, Subtarget.getXLenVT());
5870       RHS = DAG.getConstant(0, DL, LHS.getValueType());
5871       return DAG.getNode(
5872           RISCVISD::SELECT_CC, DL, N->getValueType(0),
5873           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
5874     }
5875 
5876     break;
5877   }
5878   case RISCVISD::BR_CC: {
5879     SDValue LHS = N->getOperand(1);
5880     SDValue RHS = N->getOperand(2);
5881     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
5882     if (!ISD::isIntEqualitySetCC(CCVal))
5883       break;
5884 
5885     // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
5886     //      (br_cc X, Y, lt, dest)
5887     // Sometimes the setcc is introduced after br_cc has been formed.
5888     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5889         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
5890       // If we're looking for eq 0 instead of ne 0, we need to invert the
5891       // condition.
5892       bool Invert = CCVal == ISD::SETEQ;
5893       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5894       if (Invert)
5895         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5896 
5897       SDLoc DL(N);
5898       RHS = LHS.getOperand(1);
5899       LHS = LHS.getOperand(0);
5900       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5901 
5902       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
5903                          N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
5904                          N->getOperand(4));
5905     }
5906 
5907     // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
5908     //      (br_cc X, Y, eq/ne, trueV, falseV)
5909     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
5910       return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
5911                          N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
5912                          N->getOperand(3), N->getOperand(4));
5913 
5914     // (br_cc X, 1, setne, br_cc) ->
5915     // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
5916     // This can occur when legalizing some floating point comparisons.
5917     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5918     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5919       SDLoc DL(N);
5920       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5921       SDValue TargetCC = DAG.getCondCode(CCVal);
5922       RHS = DAG.getConstant(0, DL, LHS.getValueType());
5923       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
5924                          N->getOperand(0), LHS, RHS, TargetCC,
5925                          N->getOperand(4));
5926     }
5927     break;
5928   }
5929   case ISD::FCOPYSIGN: {
5930     EVT VT = N->getValueType(0);
5931     if (!VT.isVector())
5932       break;
5933     // There is a form of VFSGNJ which injects the negated sign of its second
5934     // operand. Try and bubble any FNEG up after the extend/round to produce
5935     // this optimized pattern. Avoid modifying cases where FP_ROUND and
5936     // TRUNC=1.
5937     SDValue In2 = N->getOperand(1);
5938     // Avoid cases where the extend/round has multiple uses, as duplicating
5939     // those is typically more expensive than removing a fneg.
5940     if (!In2.hasOneUse())
5941       break;
5942     if (In2.getOpcode() != ISD::FP_EXTEND &&
5943         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
5944       break;
5945     In2 = In2.getOperand(0);
5946     if (In2.getOpcode() != ISD::FNEG)
5947       break;
5948     SDLoc DL(N);
5949     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
5950     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
5951                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
5952   }
5953   case ISD::MGATHER:
5954   case ISD::MSCATTER: {
5955     if (!DCI.isBeforeLegalize())
5956       break;
5957     MaskedGatherScatterSDNode *MGSN = cast<MaskedGatherScatterSDNode>(N);
5958     SDValue Index = MGSN->getIndex();
5959     EVT IndexVT = Index.getValueType();
5960     MVT XLenVT = Subtarget.getXLenVT();
5961     // RISCV indexed loads only support the "unsigned unscaled" addressing
5962     // mode, so anything else must be manually legalized.
5963     bool NeedsIdxLegalization = MGSN->isIndexScaled() ||
5964                                 (MGSN->isIndexSigned() &&
5965                                  IndexVT.getVectorElementType().bitsLT(XLenVT));
5966     if (!NeedsIdxLegalization)
5967       break;
5968 
5969     SDLoc DL(N);
5970 
5971     // Any index legalization should first promote to XLenVT, so we don't lose
5972     // bits when scaling. This may create an illegal index type so we let
5973     // LLVM's legalization take care of the splitting.
5974     if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
5975       IndexVT = IndexVT.changeVectorElementType(XLenVT);
5976       Index = DAG.getNode(MGSN->isIndexSigned() ? ISD::SIGN_EXTEND
5977                                                 : ISD::ZERO_EXTEND,
5978                           DL, IndexVT, Index);
5979     }
5980 
5981     unsigned Scale = N->getConstantOperandVal(5);
5982     if (MGSN->isIndexScaled() && Scale != 1) {
5983       // Manually scale the indices by the element size.
5984       // TODO: Sanitize the scale operand here?
5985       assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
5986       SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
5987       Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
5988     }
5989 
5990     ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
5991     if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) {
5992       return DAG.getMaskedGather(
5993           N->getVTList(), MGSN->getMemoryVT(), DL,
5994           {MGSN->getChain(), MGN->getPassThru(), MGSN->getMask(),
5995            MGSN->getBasePtr(), Index, MGN->getScale()},
5996           MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
5997     }
5998     const auto *MSN = cast<MaskedScatterSDNode>(N);
5999     return DAG.getMaskedScatter(
6000         N->getVTList(), MGSN->getMemoryVT(), DL,
6001         {MGSN->getChain(), MSN->getValue(), MGSN->getMask(), MGSN->getBasePtr(),
6002          Index, MGSN->getScale()},
6003         MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
6004   }
6005   case RISCVISD::SRA_VL:
6006   case RISCVISD::SRL_VL:
6007   case RISCVISD::SHL_VL: {
6008     SDValue ShAmt = N->getOperand(1);
6009     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
6010       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
6011       SDLoc DL(N);
6012       SDValue VL = N->getOperand(3);
6013       EVT VT = N->getValueType(0);
6014       ShAmt =
6015           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL);
6016       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
6017                          N->getOperand(2), N->getOperand(3));
6018     }
6019     break;
6020   }
6021   case ISD::SRA:
6022   case ISD::SRL:
6023   case ISD::SHL: {
6024     SDValue ShAmt = N->getOperand(1);
6025     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
6026       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
6027       SDLoc DL(N);
6028       EVT VT = N->getValueType(0);
6029       ShAmt =
6030           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0));
6031       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
6032     }
6033     break;
6034   }
6035   }
6036 
6037   return SDValue();
6038 }
6039 
6040 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
6041     const SDNode *N, CombineLevel Level) const {
6042   // The following folds are only desirable if `(OP _, c1 << c2)` can be
6043   // materialised in fewer instructions than `(OP _, c1)`:
6044   //
6045   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6046   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6047   SDValue N0 = N->getOperand(0);
6048   EVT Ty = N0.getValueType();
6049   if (Ty.isScalarInteger() &&
6050       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
6051     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
6052     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
6053     if (C1 && C2) {
6054       const APInt &C1Int = C1->getAPIntValue();
6055       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
6056 
6057       // We can materialise `c1 << c2` into an add immediate, so it's "free",
6058       // and the combine should happen, to potentially allow further combines
6059       // later.
6060       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
6061           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
6062         return true;
6063 
6064       // We can materialise `c1` in an add immediate, so it's "free", and the
6065       // combine should be prevented.
6066       if (C1Int.getMinSignedBits() <= 64 &&
6067           isLegalAddImmediate(C1Int.getSExtValue()))
6068         return false;
6069 
6070       // Neither constant will fit into an immediate, so find materialisation
6071       // costs.
6072       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
6073                                               Subtarget.is64Bit());
6074       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
6075           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
6076 
6077       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
6078       // combine should be prevented.
6079       if (C1Cost < ShiftedC1Cost)
6080         return false;
6081     }
6082   }
6083   return true;
6084 }
6085 
6086 bool RISCVTargetLowering::targetShrinkDemandedConstant(
6087     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
6088     TargetLoweringOpt &TLO) const {
6089   // Delay this optimization as late as possible.
6090   if (!TLO.LegalOps)
6091     return false;
6092 
6093   EVT VT = Op.getValueType();
6094   if (VT.isVector())
6095     return false;
6096 
6097   // Only handle AND for now.
6098   if (Op.getOpcode() != ISD::AND)
6099     return false;
6100 
6101   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6102   if (!C)
6103     return false;
6104 
6105   const APInt &Mask = C->getAPIntValue();
6106 
6107   // Clear all non-demanded bits initially.
6108   APInt ShrunkMask = Mask & DemandedBits;
6109 
6110   // Try to make a smaller immediate by setting undemanded bits.
6111 
6112   APInt ExpandedMask = Mask | ~DemandedBits;
6113 
6114   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
6115     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
6116   };
6117   auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
6118     if (NewMask == Mask)
6119       return true;
6120     SDLoc DL(Op);
6121     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
6122     SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
6123     return TLO.CombineTo(Op, NewOp);
6124   };
6125 
6126   // If the shrunk mask fits in sign extended 12 bits, let the target
6127   // independent code apply it.
6128   if (ShrunkMask.isSignedIntN(12))
6129     return false;
6130 
6131   // Preserve (and X, 0xffff) when zext.h is supported.
6132   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
6133     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
6134     if (IsLegalMask(NewMask))
6135       return UseMask(NewMask);
6136   }
6137 
6138   // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
6139   if (VT == MVT::i64) {
6140     APInt NewMask = APInt(64, 0xffffffff);
6141     if (IsLegalMask(NewMask))
6142       return UseMask(NewMask);
6143   }
6144 
6145   // For the remaining optimizations, we need to be able to make a negative
6146   // number through a combination of mask and undemanded bits.
6147   if (!ExpandedMask.isNegative())
6148     return false;
6149 
6150   // What is the fewest number of bits we need to represent the negative number.
6151   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
6152 
6153   // Try to make a 12 bit negative immediate. If that fails try to make a 32
6154   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
6155   APInt NewMask = ShrunkMask;
6156   if (MinSignedBits <= 12)
6157     NewMask.setBitsFrom(11);
6158   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
6159     NewMask.setBitsFrom(31);
6160   else
6161     return false;
6162 
6163   // Sanity check that our new mask is a subset of the demanded mask.
6164   assert(IsLegalMask(NewMask));
6165   return UseMask(NewMask);
6166 }
6167 
6168 static void computeGREV(APInt &Src, unsigned ShAmt) {
6169   ShAmt &= Src.getBitWidth() - 1;
6170   uint64_t x = Src.getZExtValue();
6171   if (ShAmt & 1)
6172     x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
6173   if (ShAmt & 2)
6174     x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2);
6175   if (ShAmt & 4)
6176     x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4);
6177   if (ShAmt & 8)
6178     x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8);
6179   if (ShAmt & 16)
6180     x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16);
6181   if (ShAmt & 32)
6182     x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32);
6183   Src = x;
6184 }
6185 
6186 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
6187                                                         KnownBits &Known,
6188                                                         const APInt &DemandedElts,
6189                                                         const SelectionDAG &DAG,
6190                                                         unsigned Depth) const {
6191   unsigned BitWidth = Known.getBitWidth();
6192   unsigned Opc = Op.getOpcode();
6193   assert((Opc >= ISD::BUILTIN_OP_END ||
6194           Opc == ISD::INTRINSIC_WO_CHAIN ||
6195           Opc == ISD::INTRINSIC_W_CHAIN ||
6196           Opc == ISD::INTRINSIC_VOID) &&
6197          "Should use MaskedValueIsZero if you don't know whether Op"
6198          " is a target node!");
6199 
6200   Known.resetAll();
6201   switch (Opc) {
6202   default: break;
6203   case RISCVISD::SELECT_CC: {
6204     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
6205     // If we don't know any bits, early out.
6206     if (Known.isUnknown())
6207       break;
6208     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
6209 
6210     // Only known if known in both the LHS and RHS.
6211     Known = KnownBits::commonBits(Known, Known2);
6212     break;
6213   }
6214   case RISCVISD::REMUW: {
6215     KnownBits Known2;
6216     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
6217     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
6218     // We only care about the lower 32 bits.
6219     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
6220     // Restore the original width by sign extending.
6221     Known = Known.sext(BitWidth);
6222     break;
6223   }
6224   case RISCVISD::DIVUW: {
6225     KnownBits Known2;
6226     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
6227     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
6228     // We only care about the lower 32 bits.
6229     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
6230     // Restore the original width by sign extending.
6231     Known = Known.sext(BitWidth);
6232     break;
6233   }
6234   case RISCVISD::CTZW: {
6235     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
6236     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
6237     unsigned LowBits = Log2_32(PossibleTZ) + 1;
6238     Known.Zero.setBitsFrom(LowBits);
6239     break;
6240   }
6241   case RISCVISD::CLZW: {
6242     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
6243     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
6244     unsigned LowBits = Log2_32(PossibleLZ) + 1;
6245     Known.Zero.setBitsFrom(LowBits);
6246     break;
6247   }
6248   case RISCVISD::GREV:
6249   case RISCVISD::GREVW: {
6250     if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6251       Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
6252       if (Opc == RISCVISD::GREVW)
6253         Known = Known.trunc(32);
6254       unsigned ShAmt = C->getZExtValue();
6255       computeGREV(Known.Zero, ShAmt);
6256       computeGREV(Known.One, ShAmt);
6257       if (Opc == RISCVISD::GREVW)
6258         Known = Known.sext(BitWidth);
6259     }
6260     break;
6261   }
6262   case RISCVISD::READ_VLENB:
6263     // We assume VLENB is at least 16 bytes.
6264     Known.Zero.setLowBits(4);
6265     break;
6266   case ISD::INTRINSIC_W_CHAIN: {
6267     unsigned IntNo = Op.getConstantOperandVal(1);
6268     switch (IntNo) {
6269     default:
6270       // We can't do anything for most intrinsics.
6271       break;
6272     case Intrinsic::riscv_vsetvli:
6273     case Intrinsic::riscv_vsetvlimax:
6274       // Assume that VL output is positive and would fit in an int32_t.
6275       // TODO: VLEN might be capped at 16 bits in a future V spec update.
6276       if (BitWidth >= 32)
6277         Known.Zero.setBitsFrom(31);
6278       break;
6279     }
6280     break;
6281   }
6282   }
6283 }
6284 
6285 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
6286     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
6287     unsigned Depth) const {
6288   switch (Op.getOpcode()) {
6289   default:
6290     break;
6291   case RISCVISD::SLLW:
6292   case RISCVISD::SRAW:
6293   case RISCVISD::SRLW:
6294   case RISCVISD::DIVW:
6295   case RISCVISD::DIVUW:
6296   case RISCVISD::REMUW:
6297   case RISCVISD::ROLW:
6298   case RISCVISD::RORW:
6299   case RISCVISD::GREVW:
6300   case RISCVISD::GORCW:
6301   case RISCVISD::FSLW:
6302   case RISCVISD::FSRW:
6303   case RISCVISD::SHFLW:
6304   case RISCVISD::UNSHFLW:
6305   case RISCVISD::BCOMPRESSW:
6306   case RISCVISD::BDECOMPRESSW:
6307     // TODO: As the result is sign-extended, this is conservatively correct. A
6308     // more precise answer could be calculated for SRAW depending on known
6309     // bits in the shift amount.
6310     return 33;
6311   case RISCVISD::SHFL:
6312   case RISCVISD::UNSHFL: {
6313     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
6314     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
6315     // will stay within the upper 32 bits. If there were more than 32 sign bits
6316     // before there will be at least 33 sign bits after.
6317     if (Op.getValueType() == MVT::i64 &&
6318         isa<ConstantSDNode>(Op.getOperand(1)) &&
6319         (Op.getConstantOperandVal(1) & 0x10) == 0) {
6320       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
6321       if (Tmp > 32)
6322         return 33;
6323     }
6324     break;
6325   }
6326   case RISCVISD::VMV_X_S:
6327     // The number of sign bits of the scalar result is computed by obtaining the
6328     // element type of the input vector operand, subtracting its width from the
6329     // XLEN, and then adding one (sign bit within the element type). If the
6330     // element type is wider than XLen, the least-significant XLEN bits are
6331     // taken.
6332     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
6333       return 1;
6334     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
6335   }
6336 
6337   return 1;
6338 }
6339 
6340 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
6341                                                   MachineBasicBlock *BB) {
6342   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
6343 
6344   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
6345   // Should the count have wrapped while it was being read, we need to try
6346   // again.
6347   // ...
6348   // read:
6349   // rdcycleh x3 # load high word of cycle
6350   // rdcycle  x2 # load low word of cycle
6351   // rdcycleh x4 # load high word of cycle
6352   // bne x3, x4, read # check if high word reads match, otherwise try again
6353   // ...
6354 
6355   MachineFunction &MF = *BB->getParent();
6356   const BasicBlock *LLVM_BB = BB->getBasicBlock();
6357   MachineFunction::iterator It = ++BB->getIterator();
6358 
6359   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
6360   MF.insert(It, LoopMBB);
6361 
6362   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
6363   MF.insert(It, DoneMBB);
6364 
6365   // Transfer the remainder of BB and its successor edges to DoneMBB.
6366   DoneMBB->splice(DoneMBB->begin(), BB,
6367                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
6368   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
6369 
6370   BB->addSuccessor(LoopMBB);
6371 
6372   MachineRegisterInfo &RegInfo = MF.getRegInfo();
6373   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
6374   Register LoReg = MI.getOperand(0).getReg();
6375   Register HiReg = MI.getOperand(1).getReg();
6376   DebugLoc DL = MI.getDebugLoc();
6377 
6378   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
6379   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
6380       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
6381       .addReg(RISCV::X0);
6382   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
6383       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
6384       .addReg(RISCV::X0);
6385   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
6386       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
6387       .addReg(RISCV::X0);
6388 
6389   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
6390       .addReg(HiReg)
6391       .addReg(ReadAgainReg)
6392       .addMBB(LoopMBB);
6393 
6394   LoopMBB->addSuccessor(LoopMBB);
6395   LoopMBB->addSuccessor(DoneMBB);
6396 
6397   MI.eraseFromParent();
6398 
6399   return DoneMBB;
6400 }
6401 
6402 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
6403                                              MachineBasicBlock *BB) {
6404   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
6405 
6406   MachineFunction &MF = *BB->getParent();
6407   DebugLoc DL = MI.getDebugLoc();
6408   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6409   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
6410   Register LoReg = MI.getOperand(0).getReg();
6411   Register HiReg = MI.getOperand(1).getReg();
6412   Register SrcReg = MI.getOperand(2).getReg();
6413   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
6414   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
6415 
6416   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
6417                           RI);
6418   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
6419   MachineMemOperand *MMOLo =
6420       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
6421   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
6422       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
6423   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
6424       .addFrameIndex(FI)
6425       .addImm(0)
6426       .addMemOperand(MMOLo);
6427   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
6428       .addFrameIndex(FI)
6429       .addImm(4)
6430       .addMemOperand(MMOHi);
6431   MI.eraseFromParent(); // The pseudo instruction is gone now.
6432   return BB;
6433 }
6434 
6435 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
6436                                                  MachineBasicBlock *BB) {
6437   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
6438          "Unexpected instruction");
6439 
6440   MachineFunction &MF = *BB->getParent();
6441   DebugLoc DL = MI.getDebugLoc();
6442   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6443   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
6444   Register DstReg = MI.getOperand(0).getReg();
6445   Register LoReg = MI.getOperand(1).getReg();
6446   Register HiReg = MI.getOperand(2).getReg();
6447   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
6448   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
6449 
6450   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
6451   MachineMemOperand *MMOLo =
6452       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
6453   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
6454       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
6455   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
6456       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
6457       .addFrameIndex(FI)
6458       .addImm(0)
6459       .addMemOperand(MMOLo);
6460   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
6461       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
6462       .addFrameIndex(FI)
6463       .addImm(4)
6464       .addMemOperand(MMOHi);
6465   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
6466   MI.eraseFromParent(); // The pseudo instruction is gone now.
6467   return BB;
6468 }
6469 
6470 static bool isSelectPseudo(MachineInstr &MI) {
6471   switch (MI.getOpcode()) {
6472   default:
6473     return false;
6474   case RISCV::Select_GPR_Using_CC_GPR:
6475   case RISCV::Select_FPR16_Using_CC_GPR:
6476   case RISCV::Select_FPR32_Using_CC_GPR:
6477   case RISCV::Select_FPR64_Using_CC_GPR:
6478     return true;
6479   }
6480 }
6481 
6482 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
6483                                            MachineBasicBlock *BB) {
6484   // To "insert" Select_* instructions, we actually have to insert the triangle
6485   // control-flow pattern.  The incoming instructions know the destination vreg
6486   // to set, the condition code register to branch on, the true/false values to
6487   // select between, and the condcode to use to select the appropriate branch.
6488   //
6489   // We produce the following control flow:
6490   //     HeadMBB
6491   //     |  \
6492   //     |  IfFalseMBB
6493   //     | /
6494   //    TailMBB
6495   //
6496   // When we find a sequence of selects we attempt to optimize their emission
6497   // by sharing the control flow. Currently we only handle cases where we have
6498   // multiple selects with the exact same condition (same LHS, RHS and CC).
6499   // The selects may be interleaved with other instructions if the other
6500   // instructions meet some requirements we deem safe:
6501   // - They are debug instructions. Otherwise,
6502   // - They do not have side-effects, do not access memory and their inputs do
6503   //   not depend on the results of the select pseudo-instructions.
6504   // The TrueV/FalseV operands of the selects cannot depend on the result of
6505   // previous selects in the sequence.
6506   // These conditions could be further relaxed. See the X86 target for a
6507   // related approach and more information.
6508   Register LHS = MI.getOperand(1).getReg();
6509   Register RHS = MI.getOperand(2).getReg();
6510   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
6511 
6512   SmallVector<MachineInstr *, 4> SelectDebugValues;
6513   SmallSet<Register, 4> SelectDests;
6514   SelectDests.insert(MI.getOperand(0).getReg());
6515 
6516   MachineInstr *LastSelectPseudo = &MI;
6517 
6518   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6519        SequenceMBBI != E; ++SequenceMBBI) {
6520     if (SequenceMBBI->isDebugInstr())
6521       continue;
6522     else if (isSelectPseudo(*SequenceMBBI)) {
6523       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6524           SequenceMBBI->getOperand(2).getReg() != RHS ||
6525           SequenceMBBI->getOperand(3).getImm() != CC ||
6526           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6527           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6528         break;
6529       LastSelectPseudo = &*SequenceMBBI;
6530       SequenceMBBI->collectDebugValues(SelectDebugValues);
6531       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6532     } else {
6533       if (SequenceMBBI->hasUnmodeledSideEffects() ||
6534           SequenceMBBI->mayLoadOrStore())
6535         break;
6536       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6537             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6538           }))
6539         break;
6540     }
6541   }
6542 
6543   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
6544   const BasicBlock *LLVM_BB = BB->getBasicBlock();
6545   DebugLoc DL = MI.getDebugLoc();
6546   MachineFunction::iterator I = ++BB->getIterator();
6547 
6548   MachineBasicBlock *HeadMBB = BB;
6549   MachineFunction *F = BB->getParent();
6550   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6551   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6552 
6553   F->insert(I, IfFalseMBB);
6554   F->insert(I, TailMBB);
6555 
6556   // Transfer debug instructions associated with the selects to TailMBB.
6557   for (MachineInstr *DebugInstr : SelectDebugValues) {
6558     TailMBB->push_back(DebugInstr->removeFromParent());
6559   }
6560 
6561   // Move all instructions after the sequence to TailMBB.
6562   TailMBB->splice(TailMBB->end(), HeadMBB,
6563                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6564   // Update machine-CFG edges by transferring all successors of the current
6565   // block to the new block which will contain the Phi nodes for the selects.
6566   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6567   // Set the successors for HeadMBB.
6568   HeadMBB->addSuccessor(IfFalseMBB);
6569   HeadMBB->addSuccessor(TailMBB);
6570 
6571   // Insert appropriate branch.
6572   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
6573 
6574   BuildMI(HeadMBB, DL, TII.get(Opcode))
6575     .addReg(LHS)
6576     .addReg(RHS)
6577     .addMBB(TailMBB);
6578 
6579   // IfFalseMBB just falls through to TailMBB.
6580   IfFalseMBB->addSuccessor(TailMBB);
6581 
6582   // Create PHIs for all of the select pseudo-instructions.
6583   auto SelectMBBI = MI.getIterator();
6584   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6585   auto InsertionPoint = TailMBB->begin();
6586   while (SelectMBBI != SelectEnd) {
6587     auto Next = std::next(SelectMBBI);
6588     if (isSelectPseudo(*SelectMBBI)) {
6589       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6590       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6591               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
6592           .addReg(SelectMBBI->getOperand(4).getReg())
6593           .addMBB(HeadMBB)
6594           .addReg(SelectMBBI->getOperand(5).getReg())
6595           .addMBB(IfFalseMBB);
6596       SelectMBBI->eraseFromParent();
6597     }
6598     SelectMBBI = Next;
6599   }
6600 
6601   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
6602   return TailMBB;
6603 }
6604 
6605 MachineBasicBlock *
6606 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
6607                                                  MachineBasicBlock *BB) const {
6608   switch (MI.getOpcode()) {
6609   default:
6610     llvm_unreachable("Unexpected instr type to insert");
6611   case RISCV::ReadCycleWide:
6612     assert(!Subtarget.is64Bit() &&
6613            "ReadCycleWrite is only to be used on riscv32");
6614     return emitReadCycleWidePseudo(MI, BB);
6615   case RISCV::Select_GPR_Using_CC_GPR:
6616   case RISCV::Select_FPR16_Using_CC_GPR:
6617   case RISCV::Select_FPR32_Using_CC_GPR:
6618   case RISCV::Select_FPR64_Using_CC_GPR:
6619     return emitSelectPseudo(MI, BB);
6620   case RISCV::BuildPairF64Pseudo:
6621     return emitBuildPairF64Pseudo(MI, BB);
6622   case RISCV::SplitF64Pseudo:
6623     return emitSplitF64Pseudo(MI, BB);
6624   }
6625 }
6626 
6627 // Calling Convention Implementation.
6628 // The expectations for frontend ABI lowering vary from target to target.
6629 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
6630 // details, but this is a longer term goal. For now, we simply try to keep the
6631 // role of the frontend as simple and well-defined as possible. The rules can
6632 // be summarised as:
6633 // * Never split up large scalar arguments. We handle them here.
6634 // * If a hardfloat calling convention is being used, and the struct may be
6635 // passed in a pair of registers (fp+fp, int+fp), and both registers are
6636 // available, then pass as two separate arguments. If either the GPRs or FPRs
6637 // are exhausted, then pass according to the rule below.
6638 // * If a struct could never be passed in registers or directly in a stack
6639 // slot (as it is larger than 2*XLEN and the floating point rules don't
6640 // apply), then pass it using a pointer with the byval attribute.
6641 // * If a struct is less than 2*XLEN, then coerce to either a two-element
6642 // word-sized array or a 2*XLEN scalar (depending on alignment).
6643 // * The frontend can determine whether a struct is returned by reference or
6644 // not based on its size and fields. If it will be returned by reference, the
6645 // frontend must modify the prototype so a pointer with the sret annotation is
6646 // passed as the first argument. This is not necessary for large scalar
6647 // returns.
6648 // * Struct return values and varargs should be coerced to structs containing
6649 // register-size fields in the same situations they would be for fixed
6650 // arguments.
6651 
6652 static const MCPhysReg ArgGPRs[] = {
6653   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
6654   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
6655 };
6656 static const MCPhysReg ArgFPR16s[] = {
6657   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
6658   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
6659 };
6660 static const MCPhysReg ArgFPR32s[] = {
6661   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
6662   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
6663 };
6664 static const MCPhysReg ArgFPR64s[] = {
6665   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
6666   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
6667 };
6668 // This is an interim calling convention and it may be changed in the future.
6669 static const MCPhysReg ArgVRs[] = {
6670     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
6671     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
6672     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
6673 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
6674                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
6675                                      RISCV::V20M2, RISCV::V22M2};
6676 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
6677                                      RISCV::V20M4};
6678 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
6679 
6680 // Pass a 2*XLEN argument that has been split into two XLEN values through
6681 // registers or the stack as necessary.
6682 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
6683                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
6684                                 MVT ValVT2, MVT LocVT2,
6685                                 ISD::ArgFlagsTy ArgFlags2) {
6686   unsigned XLenInBytes = XLen / 8;
6687   if (Register Reg = State.AllocateReg(ArgGPRs)) {
6688     // At least one half can be passed via register.
6689     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
6690                                      VA1.getLocVT(), CCValAssign::Full));
6691   } else {
6692     // Both halves must be passed on the stack, with proper alignment.
6693     Align StackAlign =
6694         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
6695     State.addLoc(
6696         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
6697                             State.AllocateStack(XLenInBytes, StackAlign),
6698                             VA1.getLocVT(), CCValAssign::Full));
6699     State.addLoc(CCValAssign::getMem(
6700         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
6701         LocVT2, CCValAssign::Full));
6702     return false;
6703   }
6704 
6705   if (Register Reg = State.AllocateReg(ArgGPRs)) {
6706     // The second half can also be passed via register.
6707     State.addLoc(
6708         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
6709   } else {
6710     // The second half is passed via the stack, without additional alignment.
6711     State.addLoc(CCValAssign::getMem(
6712         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
6713         LocVT2, CCValAssign::Full));
6714   }
6715 
6716   return false;
6717 }
6718 
6719 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
6720                                Optional<unsigned> FirstMaskArgument,
6721                                CCState &State, const RISCVTargetLowering &TLI) {
6722   const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
6723   if (RC == &RISCV::VRRegClass) {
6724     // Assign the first mask argument to V0.
6725     // This is an interim calling convention and it may be changed in the
6726     // future.
6727     if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue())
6728       return State.AllocateReg(RISCV::V0);
6729     return State.AllocateReg(ArgVRs);
6730   }
6731   if (RC == &RISCV::VRM2RegClass)
6732     return State.AllocateReg(ArgVRM2s);
6733   if (RC == &RISCV::VRM4RegClass)
6734     return State.AllocateReg(ArgVRM4s);
6735   if (RC == &RISCV::VRM8RegClass)
6736     return State.AllocateReg(ArgVRM8s);
6737   llvm_unreachable("Unhandled register class for ValueType");
6738 }
6739 
6740 // Implements the RISC-V calling convention. Returns true upon failure.
6741 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
6742                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
6743                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
6744                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
6745                      Optional<unsigned> FirstMaskArgument) {
6746   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
6747   assert(XLen == 32 || XLen == 64);
6748   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
6749 
6750   // Any return value split in to more than two values can't be returned
6751   // directly. Vectors are returned via the available vector registers.
6752   if (!LocVT.isVector() && IsRet && ValNo > 1)
6753     return true;
6754 
6755   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
6756   // variadic argument, or if no F16/F32 argument registers are available.
6757   bool UseGPRForF16_F32 = true;
6758   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
6759   // variadic argument, or if no F64 argument registers are available.
6760   bool UseGPRForF64 = true;
6761 
6762   switch (ABI) {
6763   default:
6764     llvm_unreachable("Unexpected ABI");
6765   case RISCVABI::ABI_ILP32:
6766   case RISCVABI::ABI_LP64:
6767     break;
6768   case RISCVABI::ABI_ILP32F:
6769   case RISCVABI::ABI_LP64F:
6770     UseGPRForF16_F32 = !IsFixed;
6771     break;
6772   case RISCVABI::ABI_ILP32D:
6773   case RISCVABI::ABI_LP64D:
6774     UseGPRForF16_F32 = !IsFixed;
6775     UseGPRForF64 = !IsFixed;
6776     break;
6777   }
6778 
6779   // FPR16, FPR32, and FPR64 alias each other.
6780   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
6781     UseGPRForF16_F32 = true;
6782     UseGPRForF64 = true;
6783   }
6784 
6785   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
6786   // similar local variables rather than directly checking against the target
6787   // ABI.
6788 
6789   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
6790     LocVT = XLenVT;
6791     LocInfo = CCValAssign::BCvt;
6792   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
6793     LocVT = MVT::i64;
6794     LocInfo = CCValAssign::BCvt;
6795   }
6796 
6797   // If this is a variadic argument, the RISC-V calling convention requires
6798   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
6799   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
6800   // be used regardless of whether the original argument was split during
6801   // legalisation or not. The argument will not be passed by registers if the
6802   // original type is larger than 2*XLEN, so the register alignment rule does
6803   // not apply.
6804   unsigned TwoXLenInBytes = (2 * XLen) / 8;
6805   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
6806       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
6807     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
6808     // Skip 'odd' register if necessary.
6809     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
6810       State.AllocateReg(ArgGPRs);
6811   }
6812 
6813   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
6814   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
6815       State.getPendingArgFlags();
6816 
6817   assert(PendingLocs.size() == PendingArgFlags.size() &&
6818          "PendingLocs and PendingArgFlags out of sync");
6819 
6820   // Handle passing f64 on RV32D with a soft float ABI or when floating point
6821   // registers are exhausted.
6822   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
6823     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
6824            "Can't lower f64 if it is split");
6825     // Depending on available argument GPRS, f64 may be passed in a pair of
6826     // GPRs, split between a GPR and the stack, or passed completely on the
6827     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
6828     // cases.
6829     Register Reg = State.AllocateReg(ArgGPRs);
6830     LocVT = MVT::i32;
6831     if (!Reg) {
6832       unsigned StackOffset = State.AllocateStack(8, Align(8));
6833       State.addLoc(
6834           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6835       return false;
6836     }
6837     if (!State.AllocateReg(ArgGPRs))
6838       State.AllocateStack(4, Align(4));
6839     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6840     return false;
6841   }
6842 
6843   // Fixed-length vectors are located in the corresponding scalable-vector
6844   // container types.
6845   if (ValVT.isFixedLengthVector())
6846     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
6847 
6848   // Split arguments might be passed indirectly, so keep track of the pending
6849   // values. Split vectors are passed via a mix of registers and indirectly, so
6850   // treat them as we would any other argument.
6851   if (!LocVT.isVector() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
6852     LocVT = XLenVT;
6853     LocInfo = CCValAssign::Indirect;
6854     PendingLocs.push_back(
6855         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
6856     PendingArgFlags.push_back(ArgFlags);
6857     if (!ArgFlags.isSplitEnd()) {
6858       return false;
6859     }
6860   }
6861 
6862   // If the split argument only had two elements, it should be passed directly
6863   // in registers or on the stack.
6864   if (!LocVT.isVector() && ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
6865     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
6866     // Apply the normal calling convention rules to the first half of the
6867     // split argument.
6868     CCValAssign VA = PendingLocs[0];
6869     ISD::ArgFlagsTy AF = PendingArgFlags[0];
6870     PendingLocs.clear();
6871     PendingArgFlags.clear();
6872     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
6873                                ArgFlags);
6874   }
6875 
6876   // Allocate to a register if possible, or else a stack slot.
6877   Register Reg;
6878   unsigned StoreSizeBytes = XLen / 8;
6879   Align StackAlign = Align(XLen / 8);
6880 
6881   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
6882     Reg = State.AllocateReg(ArgFPR16s);
6883   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
6884     Reg = State.AllocateReg(ArgFPR32s);
6885   else if (ValVT == MVT::f64 && !UseGPRForF64)
6886     Reg = State.AllocateReg(ArgFPR64s);
6887   else if (ValVT.isVector()) {
6888     Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
6889     if (!Reg) {
6890       // For return values, the vector must be passed fully via registers or
6891       // via the stack.
6892       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
6893       // but we're using all of them.
6894       if (IsRet)
6895         return true;
6896       // Try using a GPR to pass the address
6897       if ((Reg = State.AllocateReg(ArgGPRs))) {
6898         LocVT = XLenVT;
6899         LocInfo = CCValAssign::Indirect;
6900       } else if (ValVT.isScalableVector()) {
6901         report_fatal_error("Unable to pass scalable vector types on the stack");
6902       } else {
6903         // Pass fixed-length vectors on the stack.
6904         LocVT = ValVT;
6905         StoreSizeBytes = ValVT.getStoreSize();
6906         // Align vectors to their element sizes, being careful for vXi1
6907         // vectors.
6908         StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
6909       }
6910     }
6911   } else {
6912     Reg = State.AllocateReg(ArgGPRs);
6913   }
6914 
6915   unsigned StackOffset =
6916       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
6917 
6918   // If we reach this point and PendingLocs is non-empty, we must be at the
6919   // end of a split argument that must be passed indirectly.
6920   if (!PendingLocs.empty()) {
6921     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
6922     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
6923 
6924     for (auto &It : PendingLocs) {
6925       if (Reg)
6926         It.convertToReg(Reg);
6927       else
6928         It.convertToMem(StackOffset);
6929       State.addLoc(It);
6930     }
6931     PendingLocs.clear();
6932     PendingArgFlags.clear();
6933     return false;
6934   }
6935 
6936   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
6937           (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) &&
6938          "Expected an XLenVT or vector types at this stage");
6939 
6940   if (Reg) {
6941     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6942     return false;
6943   }
6944 
6945   // When a floating-point value is passed on the stack, no bit-conversion is
6946   // needed.
6947   if (ValVT.isFloatingPoint()) {
6948     LocVT = ValVT;
6949     LocInfo = CCValAssign::Full;
6950   }
6951   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6952   return false;
6953 }
6954 
6955 template <typename ArgTy>
6956 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
6957   for (const auto &ArgIdx : enumerate(Args)) {
6958     MVT ArgVT = ArgIdx.value().VT;
6959     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
6960       return ArgIdx.index();
6961   }
6962   return None;
6963 }
6964 
6965 void RISCVTargetLowering::analyzeInputArgs(
6966     MachineFunction &MF, CCState &CCInfo,
6967     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
6968     RISCVCCAssignFn Fn) const {
6969   unsigned NumArgs = Ins.size();
6970   FunctionType *FType = MF.getFunction().getFunctionType();
6971 
6972   Optional<unsigned> FirstMaskArgument;
6973   if (Subtarget.hasStdExtV())
6974     FirstMaskArgument = preAssignMask(Ins);
6975 
6976   for (unsigned i = 0; i != NumArgs; ++i) {
6977     MVT ArgVT = Ins[i].VT;
6978     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
6979 
6980     Type *ArgTy = nullptr;
6981     if (IsRet)
6982       ArgTy = FType->getReturnType();
6983     else if (Ins[i].isOrigArg())
6984       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
6985 
6986     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
6987     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
6988            ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
6989            FirstMaskArgument)) {
6990       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
6991                         << EVT(ArgVT).getEVTString() << '\n');
6992       llvm_unreachable(nullptr);
6993     }
6994   }
6995 }
6996 
6997 void RISCVTargetLowering::analyzeOutputArgs(
6998     MachineFunction &MF, CCState &CCInfo,
6999     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7000     CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
7001   unsigned NumArgs = Outs.size();
7002 
7003   Optional<unsigned> FirstMaskArgument;
7004   if (Subtarget.hasStdExtV())
7005     FirstMaskArgument = preAssignMask(Outs);
7006 
7007   for (unsigned i = 0; i != NumArgs; i++) {
7008     MVT ArgVT = Outs[i].VT;
7009     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
7010     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7011 
7012     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
7013     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
7014            ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
7015            FirstMaskArgument)) {
7016       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
7017                         << EVT(ArgVT).getEVTString() << "\n");
7018       llvm_unreachable(nullptr);
7019     }
7020   }
7021 }
7022 
7023 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7024 // values.
7025 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
7026                                    const CCValAssign &VA, const SDLoc &DL,
7027                                    const RISCVSubtarget &Subtarget) {
7028   switch (VA.getLocInfo()) {
7029   default:
7030     llvm_unreachable("Unexpected CCValAssign::LocInfo");
7031   case CCValAssign::Full:
7032     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
7033       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
7034     break;
7035   case CCValAssign::BCvt:
7036     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
7037       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
7038     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7039       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
7040     else
7041       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7042     break;
7043   }
7044   return Val;
7045 }
7046 
7047 // The caller is responsible for loading the full value if the argument is
7048 // passed with CCValAssign::Indirect.
7049 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
7050                                 const CCValAssign &VA, const SDLoc &DL,
7051                                 const RISCVTargetLowering &TLI) {
7052   MachineFunction &MF = DAG.getMachineFunction();
7053   MachineRegisterInfo &RegInfo = MF.getRegInfo();
7054   EVT LocVT = VA.getLocVT();
7055   SDValue Val;
7056   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7057   Register VReg = RegInfo.createVirtualRegister(RC);
7058   RegInfo.addLiveIn(VA.getLocReg(), VReg);
7059   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7060 
7061   if (VA.getLocInfo() == CCValAssign::Indirect)
7062     return Val;
7063 
7064   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
7065 }
7066 
7067 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
7068                                    const CCValAssign &VA, const SDLoc &DL,
7069                                    const RISCVSubtarget &Subtarget) {
7070   EVT LocVT = VA.getLocVT();
7071 
7072   switch (VA.getLocInfo()) {
7073   default:
7074     llvm_unreachable("Unexpected CCValAssign::LocInfo");
7075   case CCValAssign::Full:
7076     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
7077       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
7078     break;
7079   case CCValAssign::BCvt:
7080     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
7081       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
7082     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7083       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
7084     else
7085       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7086     break;
7087   }
7088   return Val;
7089 }
7090 
7091 // The caller is responsible for loading the full value if the argument is
7092 // passed with CCValAssign::Indirect.
7093 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
7094                                 const CCValAssign &VA, const SDLoc &DL) {
7095   MachineFunction &MF = DAG.getMachineFunction();
7096   MachineFrameInfo &MFI = MF.getFrameInfo();
7097   EVT LocVT = VA.getLocVT();
7098   EVT ValVT = VA.getValVT();
7099   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
7100   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7101                                  /*Immutable=*/true);
7102   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7103   SDValue Val;
7104 
7105   ISD::LoadExtType ExtType;
7106   switch (VA.getLocInfo()) {
7107   default:
7108     llvm_unreachable("Unexpected CCValAssign::LocInfo");
7109   case CCValAssign::Full:
7110   case CCValAssign::Indirect:
7111   case CCValAssign::BCvt:
7112     ExtType = ISD::NON_EXTLOAD;
7113     break;
7114   }
7115   Val = DAG.getExtLoad(
7116       ExtType, DL, LocVT, Chain, FIN,
7117       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
7118   return Val;
7119 }
7120 
7121 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
7122                                        const CCValAssign &VA, const SDLoc &DL) {
7123   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7124          "Unexpected VA");
7125   MachineFunction &MF = DAG.getMachineFunction();
7126   MachineFrameInfo &MFI = MF.getFrameInfo();
7127   MachineRegisterInfo &RegInfo = MF.getRegInfo();
7128 
7129   if (VA.isMemLoc()) {
7130     // f64 is passed on the stack.
7131     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
7132     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7133     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
7134                        MachinePointerInfo::getFixedStack(MF, FI));
7135   }
7136 
7137   assert(VA.isRegLoc() && "Expected register VA assignment");
7138 
7139   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
7140   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7141   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7142   SDValue Hi;
7143   if (VA.getLocReg() == RISCV::X17) {
7144     // Second half of f64 is passed on the stack.
7145     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
7146     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7147     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7148                      MachinePointerInfo::getFixedStack(MF, FI));
7149   } else {
7150     // Second half of f64 is passed in another GPR.
7151     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
7152     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
7153     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7154   }
7155   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7156 }
7157 
7158 // FastCC has less than 1% performance improvement for some particular
7159 // benchmark. But theoretically, it may has benenfit for some cases.
7160 static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
7161                             unsigned ValNo, MVT ValVT, MVT LocVT,
7162                             CCValAssign::LocInfo LocInfo,
7163                             ISD::ArgFlagsTy ArgFlags, CCState &State,
7164                             bool IsFixed, bool IsRet, Type *OrigTy,
7165                             const RISCVTargetLowering &TLI,
7166                             Optional<unsigned> FirstMaskArgument) {
7167 
7168   // X5 and X6 might be used for save-restore libcall.
7169   static const MCPhysReg GPRList[] = {
7170       RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
7171       RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
7172       RISCV::X29, RISCV::X30, RISCV::X31};
7173 
7174   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7175     if (unsigned Reg = State.AllocateReg(GPRList)) {
7176       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7177       return false;
7178     }
7179   }
7180 
7181   if (LocVT == MVT::f16) {
7182     static const MCPhysReg FPR16List[] = {
7183         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
7184         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
7185         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
7186         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
7187     if (unsigned Reg = State.AllocateReg(FPR16List)) {
7188       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7189       return false;
7190     }
7191   }
7192 
7193   if (LocVT == MVT::f32) {
7194     static const MCPhysReg FPR32List[] = {
7195         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
7196         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
7197         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
7198         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
7199     if (unsigned Reg = State.AllocateReg(FPR32List)) {
7200       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7201       return false;
7202     }
7203   }
7204 
7205   if (LocVT == MVT::f64) {
7206     static const MCPhysReg FPR64List[] = {
7207         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
7208         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
7209         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
7210         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
7211     if (unsigned Reg = State.AllocateReg(FPR64List)) {
7212       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7213       return false;
7214     }
7215   }
7216 
7217   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
7218     unsigned Offset4 = State.AllocateStack(4, Align(4));
7219     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
7220     return false;
7221   }
7222 
7223   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
7224     unsigned Offset5 = State.AllocateStack(8, Align(8));
7225     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
7226     return false;
7227   }
7228 
7229   if (LocVT.isVector()) {
7230     if (unsigned Reg =
7231             allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
7232       // Fixed-length vectors are located in the corresponding scalable-vector
7233       // container types.
7234       if (ValVT.isFixedLengthVector())
7235         LocVT = TLI.getContainerForFixedLengthVector(LocVT);
7236       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7237     } else {
7238       // Try and pass the address via a "fast" GPR.
7239       if (unsigned GPRReg = State.AllocateReg(GPRList)) {
7240         LocInfo = CCValAssign::Indirect;
7241         LocVT = TLI.getSubtarget().getXLenVT();
7242         State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
7243       } else if (ValVT.isFixedLengthVector()) {
7244         auto StackAlign =
7245             MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
7246         unsigned StackOffset =
7247             State.AllocateStack(ValVT.getStoreSize(), StackAlign);
7248         State.addLoc(
7249             CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7250       } else {
7251         // Can't pass scalable vectors on the stack.
7252         return true;
7253       }
7254     }
7255 
7256     return false;
7257   }
7258 
7259   return true; // CC didn't match.
7260 }
7261 
7262 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7263                          CCValAssign::LocInfo LocInfo,
7264                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
7265 
7266   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7267     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
7268     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
7269     static const MCPhysReg GPRList[] = {
7270         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
7271         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
7272     if (unsigned Reg = State.AllocateReg(GPRList)) {
7273       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7274       return false;
7275     }
7276   }
7277 
7278   if (LocVT == MVT::f32) {
7279     // Pass in STG registers: F1, ..., F6
7280     //                        fs0 ... fs5
7281     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
7282                                           RISCV::F18_F, RISCV::F19_F,
7283                                           RISCV::F20_F, RISCV::F21_F};
7284     if (unsigned Reg = State.AllocateReg(FPR32List)) {
7285       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7286       return false;
7287     }
7288   }
7289 
7290   if (LocVT == MVT::f64) {
7291     // Pass in STG registers: D1, ..., D6
7292     //                        fs6 ... fs11
7293     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
7294                                           RISCV::F24_D, RISCV::F25_D,
7295                                           RISCV::F26_D, RISCV::F27_D};
7296     if (unsigned Reg = State.AllocateReg(FPR64List)) {
7297       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7298       return false;
7299     }
7300   }
7301 
7302   report_fatal_error("No registers left in GHC calling convention");
7303   return true;
7304 }
7305 
7306 // Transform physical registers into virtual registers.
7307 SDValue RISCVTargetLowering::LowerFormalArguments(
7308     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7309     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7310     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7311 
7312   MachineFunction &MF = DAG.getMachineFunction();
7313 
7314   switch (CallConv) {
7315   default:
7316     report_fatal_error("Unsupported calling convention");
7317   case CallingConv::C:
7318   case CallingConv::Fast:
7319     break;
7320   case CallingConv::GHC:
7321     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
7322         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
7323       report_fatal_error(
7324         "GHC calling convention requires the F and D instruction set extensions");
7325   }
7326 
7327   const Function &Func = MF.getFunction();
7328   if (Func.hasFnAttribute("interrupt")) {
7329     if (!Func.arg_empty())
7330       report_fatal_error(
7331         "Functions with the interrupt attribute cannot have arguments!");
7332 
7333     StringRef Kind =
7334       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
7335 
7336     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
7337       report_fatal_error(
7338         "Function interrupt attribute argument not supported!");
7339   }
7340 
7341   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7342   MVT XLenVT = Subtarget.getXLenVT();
7343   unsigned XLenInBytes = Subtarget.getXLen() / 8;
7344   // Used with vargs to acumulate store chains.
7345   std::vector<SDValue> OutChains;
7346 
7347   // Assign locations to all of the incoming arguments.
7348   SmallVector<CCValAssign, 16> ArgLocs;
7349   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7350 
7351   if (CallConv == CallingConv::GHC)
7352     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
7353   else
7354     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
7355                      CallConv == CallingConv::Fast ? CC_RISCV_FastCC
7356                                                    : CC_RISCV);
7357 
7358   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
7359     CCValAssign &VA = ArgLocs[i];
7360     SDValue ArgValue;
7361     // Passing f64 on RV32D with a soft float ABI must be handled as a special
7362     // case.
7363     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
7364       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
7365     else if (VA.isRegLoc())
7366       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
7367     else
7368       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7369 
7370     if (VA.getLocInfo() == CCValAssign::Indirect) {
7371       // If the original argument was split and passed by reference (e.g. i128
7372       // on RV32), we need to load all parts of it here (using the same
7373       // address). Vectors may be partly split to registers and partly to the
7374       // stack, in which case the base address is partly offset and subsequent
7375       // stores are relative to that.
7376       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7377                                    MachinePointerInfo()));
7378       unsigned ArgIndex = Ins[i].OrigArgIndex;
7379       unsigned ArgPartOffset = Ins[i].PartOffset;
7380       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
7381       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
7382         CCValAssign &PartVA = ArgLocs[i + 1];
7383         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
7384         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7385         if (PartVA.getValVT().isScalableVector())
7386           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
7387         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7388         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7389                                      MachinePointerInfo()));
7390         ++i;
7391       }
7392       continue;
7393     }
7394     InVals.push_back(ArgValue);
7395   }
7396 
7397   if (IsVarArg) {
7398     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
7399     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7400     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
7401     MachineFrameInfo &MFI = MF.getFrameInfo();
7402     MachineRegisterInfo &RegInfo = MF.getRegInfo();
7403     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
7404 
7405     // Offset of the first variable argument from stack pointer, and size of
7406     // the vararg save area. For now, the varargs save area is either zero or
7407     // large enough to hold a0-a7.
7408     int VaArgOffset, VarArgsSaveSize;
7409 
7410     // If all registers are allocated, then all varargs must be passed on the
7411     // stack and we don't need to save any argregs.
7412     if (ArgRegs.size() == Idx) {
7413       VaArgOffset = CCInfo.getNextStackOffset();
7414       VarArgsSaveSize = 0;
7415     } else {
7416       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
7417       VaArgOffset = -VarArgsSaveSize;
7418     }
7419 
7420     // Record the frame index of the first variable argument
7421     // which is a value necessary to VASTART.
7422     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
7423     RVFI->setVarArgsFrameIndex(FI);
7424 
7425     // If saving an odd number of registers then create an extra stack slot to
7426     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
7427     // offsets to even-numbered registered remain 2*XLEN-aligned.
7428     if (Idx % 2) {
7429       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
7430       VarArgsSaveSize += XLenInBytes;
7431     }
7432 
7433     // Copy the integer registers that may have been used for passing varargs
7434     // to the vararg save area.
7435     for (unsigned I = Idx; I < ArgRegs.size();
7436          ++I, VaArgOffset += XLenInBytes) {
7437       const Register Reg = RegInfo.createVirtualRegister(RC);
7438       RegInfo.addLiveIn(ArgRegs[I], Reg);
7439       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
7440       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
7441       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7442       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7443                                    MachinePointerInfo::getFixedStack(MF, FI));
7444       cast<StoreSDNode>(Store.getNode())
7445           ->getMemOperand()
7446           ->setValue((Value *)nullptr);
7447       OutChains.push_back(Store);
7448     }
7449     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
7450   }
7451 
7452   // All stores are grouped in one node to allow the matching between
7453   // the size of Ins and InVals. This only happens for vararg functions.
7454   if (!OutChains.empty()) {
7455     OutChains.push_back(Chain);
7456     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7457   }
7458 
7459   return Chain;
7460 }
7461 
7462 /// isEligibleForTailCallOptimization - Check whether the call is eligible
7463 /// for tail call optimization.
7464 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
7465 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
7466     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7467     const SmallVector<CCValAssign, 16> &ArgLocs) const {
7468 
7469   auto &Callee = CLI.Callee;
7470   auto CalleeCC = CLI.CallConv;
7471   auto &Outs = CLI.Outs;
7472   auto &Caller = MF.getFunction();
7473   auto CallerCC = Caller.getCallingConv();
7474 
7475   // Exception-handling functions need a special set of instructions to
7476   // indicate a return to the hardware. Tail-calling another function would
7477   // probably break this.
7478   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
7479   // should be expanded as new function attributes are introduced.
7480   if (Caller.hasFnAttribute("interrupt"))
7481     return false;
7482 
7483   // Do not tail call opt if the stack is used to pass parameters.
7484   if (CCInfo.getNextStackOffset() != 0)
7485     return false;
7486 
7487   // Do not tail call opt if any parameters need to be passed indirectly.
7488   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
7489   // passed indirectly. So the address of the value will be passed in a
7490   // register, or if not available, then the address is put on the stack. In
7491   // order to pass indirectly, space on the stack often needs to be allocated
7492   // in order to store the value. In this case the CCInfo.getNextStackOffset()
7493   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
7494   // are passed CCValAssign::Indirect.
7495   for (auto &VA : ArgLocs)
7496     if (VA.getLocInfo() == CCValAssign::Indirect)
7497       return false;
7498 
7499   // Do not tail call opt if either caller or callee uses struct return
7500   // semantics.
7501   auto IsCallerStructRet = Caller.hasStructRetAttr();
7502   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7503   if (IsCallerStructRet || IsCalleeStructRet)
7504     return false;
7505 
7506   // Externally-defined functions with weak linkage should not be
7507   // tail-called. The behaviour of branch instructions in this situation (as
7508   // used for tail calls) is implementation-defined, so we cannot rely on the
7509   // linker replacing the tail call with a return.
7510   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
7511     const GlobalValue *GV = G->getGlobal();
7512     if (GV->hasExternalWeakLinkage())
7513       return false;
7514   }
7515 
7516   // The callee has to preserve all registers the caller needs to preserve.
7517   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
7518   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7519   if (CalleeCC != CallerCC) {
7520     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7521     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7522       return false;
7523   }
7524 
7525   // Byval parameters hand the function a pointer directly into the stack area
7526   // we want to reuse during a tail call. Working around this *is* possible
7527   // but less efficient and uglier in LowerCall.
7528   for (auto &Arg : Outs)
7529     if (Arg.Flags.isByVal())
7530       return false;
7531 
7532   return true;
7533 }
7534 
7535 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
7536   return DAG.getDataLayout().getPrefTypeAlign(
7537       VT.getTypeForEVT(*DAG.getContext()));
7538 }
7539 
7540 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7541 // and output parameter nodes.
7542 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
7543                                        SmallVectorImpl<SDValue> &InVals) const {
7544   SelectionDAG &DAG = CLI.DAG;
7545   SDLoc &DL = CLI.DL;
7546   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
7547   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7548   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
7549   SDValue Chain = CLI.Chain;
7550   SDValue Callee = CLI.Callee;
7551   bool &IsTailCall = CLI.IsTailCall;
7552   CallingConv::ID CallConv = CLI.CallConv;
7553   bool IsVarArg = CLI.IsVarArg;
7554   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7555   MVT XLenVT = Subtarget.getXLenVT();
7556 
7557   MachineFunction &MF = DAG.getMachineFunction();
7558 
7559   // Analyze the operands of the call, assigning locations to each operand.
7560   SmallVector<CCValAssign, 16> ArgLocs;
7561   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7562 
7563   if (CallConv == CallingConv::GHC)
7564     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
7565   else
7566     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
7567                       CallConv == CallingConv::Fast ? CC_RISCV_FastCC
7568                                                     : CC_RISCV);
7569 
7570   // Check if it's really possible to do a tail call.
7571   if (IsTailCall)
7572     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7573 
7574   if (IsTailCall)
7575     ++NumTailCalls;
7576   else if (CLI.CB && CLI.CB->isMustTailCall())
7577     report_fatal_error("failed to perform tail call elimination on a call "
7578                        "site marked musttail");
7579 
7580   // Get a count of how many bytes are to be pushed on the stack.
7581   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
7582 
7583   // Create local copies for byval args
7584   SmallVector<SDValue, 8> ByValArgs;
7585   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7586     ISD::ArgFlagsTy Flags = Outs[i].Flags;
7587     if (!Flags.isByVal())
7588       continue;
7589 
7590     SDValue Arg = OutVals[i];
7591     unsigned Size = Flags.getByValSize();
7592     Align Alignment = Flags.getNonZeroByValAlign();
7593 
7594     int FI =
7595         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7596     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7597     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
7598 
7599     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7600                           /*IsVolatile=*/false,
7601                           /*AlwaysInline=*/false, IsTailCall,
7602                           MachinePointerInfo(), MachinePointerInfo());
7603     ByValArgs.push_back(FIPtr);
7604   }
7605 
7606   if (!IsTailCall)
7607     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7608 
7609   // Copy argument values to their designated locations.
7610   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
7611   SmallVector<SDValue, 8> MemOpChains;
7612   SDValue StackPtr;
7613   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
7614     CCValAssign &VA = ArgLocs[i];
7615     SDValue ArgValue = OutVals[i];
7616     ISD::ArgFlagsTy Flags = Outs[i].Flags;
7617 
7618     // Handle passing f64 on RV32D with a soft float ABI as a special case.
7619     bool IsF64OnRV32DSoftABI =
7620         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
7621     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
7622       SDValue SplitF64 = DAG.getNode(
7623           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7624       SDValue Lo = SplitF64.getValue(0);
7625       SDValue Hi = SplitF64.getValue(1);
7626 
7627       Register RegLo = VA.getLocReg();
7628       RegsToPass.push_back(std::make_pair(RegLo, Lo));
7629 
7630       if (RegLo == RISCV::X17) {
7631         // Second half of f64 is passed on the stack.
7632         // Work out the address of the stack slot.
7633         if (!StackPtr.getNode())
7634           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
7635         // Emit the store.
7636         MemOpChains.push_back(
7637             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
7638       } else {
7639         // Second half of f64 is passed in another GPR.
7640         assert(RegLo < RISCV::X31 && "Invalid register pair");
7641         Register RegHigh = RegLo + 1;
7642         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7643       }
7644       continue;
7645     }
7646 
7647     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
7648     // as any other MemLoc.
7649 
7650     // Promote the value if needed.
7651     // For now, only handle fully promoted and indirect arguments.
7652     if (VA.getLocInfo() == CCValAssign::Indirect) {
7653       // Store the argument in a stack slot and pass its address.
7654       Align StackAlign =
7655           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
7656                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
7657       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7658       // If the original argument was split (e.g. i128), we need
7659       // to store the required parts of it here (and pass just one address).
7660       // Vectors may be partly split to registers and partly to the stack, in
7661       // which case the base address is partly offset and subsequent stores are
7662       // relative to that.
7663       unsigned ArgIndex = Outs[i].OrigArgIndex;
7664       unsigned ArgPartOffset = Outs[i].PartOffset;
7665       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
7666       // Calculate the total size to store. We don't have access to what we're
7667       // actually storing other than performing the loop and collecting the
7668       // info.
7669       SmallVector<std::pair<SDValue, SDValue>> Parts;
7670       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
7671         SDValue PartValue = OutVals[i + 1];
7672         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
7673         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7674         EVT PartVT = PartValue.getValueType();
7675         if (PartVT.isScalableVector())
7676           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
7677         StoredSize += PartVT.getStoreSize();
7678         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
7679         Parts.push_back(std::make_pair(PartValue, Offset));
7680         ++i;
7681       }
7682       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
7683       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
7684       MemOpChains.push_back(
7685           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
7686                        MachinePointerInfo::getFixedStack(MF, FI)));
7687       for (const auto &Part : Parts) {
7688         SDValue PartValue = Part.first;
7689         SDValue PartOffset = Part.second;
7690         SDValue Address =
7691             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
7692         MemOpChains.push_back(
7693             DAG.getStore(Chain, DL, PartValue, Address,
7694                          MachinePointerInfo::getFixedStack(MF, FI)));
7695       }
7696       ArgValue = SpillSlot;
7697     } else {
7698       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
7699     }
7700 
7701     // Use local copy if it is a byval arg.
7702     if (Flags.isByVal())
7703       ArgValue = ByValArgs[j++];
7704 
7705     if (VA.isRegLoc()) {
7706       // Queue up the argument copies and emit them at the end.
7707       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
7708     } else {
7709       assert(VA.isMemLoc() && "Argument not register or memory");
7710       assert(!IsTailCall && "Tail call not allowed if stack is used "
7711                             "for passing parameters");
7712 
7713       // Work out the address of the stack slot.
7714       if (!StackPtr.getNode())
7715         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
7716       SDValue Address =
7717           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7718                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
7719 
7720       // Emit the store.
7721       MemOpChains.push_back(
7722           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
7723     }
7724   }
7725 
7726   // Join the stores, which are independent of one another.
7727   if (!MemOpChains.empty())
7728     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
7729 
7730   SDValue Glue;
7731 
7732   // Build a sequence of copy-to-reg nodes, chained and glued together.
7733   for (auto &Reg : RegsToPass) {
7734     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
7735     Glue = Chain.getValue(1);
7736   }
7737 
7738   // Validate that none of the argument registers have been marked as
7739   // reserved, if so report an error. Do the same for the return address if this
7740   // is not a tailcall.
7741   validateCCReservedRegs(RegsToPass, MF);
7742   if (!IsTailCall &&
7743       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
7744     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
7745         MF.getFunction(),
7746         "Return address register required, but has been reserved."});
7747 
7748   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
7749   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
7750   // split it and then direct call can be matched by PseudoCALL.
7751   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
7752     const GlobalValue *GV = S->getGlobal();
7753 
7754     unsigned OpFlags = RISCVII::MO_CALL;
7755     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
7756       OpFlags = RISCVII::MO_PLT;
7757 
7758     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
7759   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
7760     unsigned OpFlags = RISCVII::MO_CALL;
7761 
7762     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
7763                                                  nullptr))
7764       OpFlags = RISCVII::MO_PLT;
7765 
7766     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
7767   }
7768 
7769   // The first call operand is the chain and the second is the target address.
7770   SmallVector<SDValue, 8> Ops;
7771   Ops.push_back(Chain);
7772   Ops.push_back(Callee);
7773 
7774   // Add argument registers to the end of the list so that they are
7775   // known live into the call.
7776   for (auto &Reg : RegsToPass)
7777     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
7778 
7779   if (!IsTailCall) {
7780     // Add a register mask operand representing the call-preserved registers.
7781     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
7782     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
7783     assert(Mask && "Missing call preserved mask for calling convention");
7784     Ops.push_back(DAG.getRegisterMask(Mask));
7785   }
7786 
7787   // Glue the call to the argument copies, if any.
7788   if (Glue.getNode())
7789     Ops.push_back(Glue);
7790 
7791   // Emit the call.
7792   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7793 
7794   if (IsTailCall) {
7795     MF.getFrameInfo().setHasTailCall();
7796     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
7797   }
7798 
7799   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
7800   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
7801   Glue = Chain.getValue(1);
7802 
7803   // Mark the end of the call, which is glued to the call itself.
7804   Chain = DAG.getCALLSEQ_END(Chain,
7805                              DAG.getConstant(NumBytes, DL, PtrVT, true),
7806                              DAG.getConstant(0, DL, PtrVT, true),
7807                              Glue, DL);
7808   Glue = Chain.getValue(1);
7809 
7810   // Assign locations to each value returned by this call.
7811   SmallVector<CCValAssign, 16> RVLocs;
7812   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
7813   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
7814 
7815   // Copy all of the result registers out of their specified physreg.
7816   for (auto &VA : RVLocs) {
7817     // Copy the value out
7818     SDValue RetValue =
7819         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
7820     // Glue the RetValue to the end of the call sequence
7821     Chain = RetValue.getValue(1);
7822     Glue = RetValue.getValue(2);
7823 
7824     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7825       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
7826       SDValue RetValue2 =
7827           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
7828       Chain = RetValue2.getValue(1);
7829       Glue = RetValue2.getValue(2);
7830       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
7831                              RetValue2);
7832     }
7833 
7834     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
7835 
7836     InVals.push_back(RetValue);
7837   }
7838 
7839   return Chain;
7840 }
7841 
7842 bool RISCVTargetLowering::CanLowerReturn(
7843     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
7844     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
7845   SmallVector<CCValAssign, 16> RVLocs;
7846   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
7847 
7848   Optional<unsigned> FirstMaskArgument;
7849   if (Subtarget.hasStdExtV())
7850     FirstMaskArgument = preAssignMask(Outs);
7851 
7852   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7853     MVT VT = Outs[i].VT;
7854     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
7855     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
7856     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
7857                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
7858                  *this, FirstMaskArgument))
7859       return false;
7860   }
7861   return true;
7862 }
7863 
7864 SDValue
7865 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7866                                  bool IsVarArg,
7867                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
7868                                  const SmallVectorImpl<SDValue> &OutVals,
7869                                  const SDLoc &DL, SelectionDAG &DAG) const {
7870   const MachineFunction &MF = DAG.getMachineFunction();
7871   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
7872 
7873   // Stores the assignment of the return value to a location.
7874   SmallVector<CCValAssign, 16> RVLocs;
7875 
7876   // Info about the registers and stack slot.
7877   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
7878                  *DAG.getContext());
7879 
7880   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
7881                     nullptr, CC_RISCV);
7882 
7883   if (CallConv == CallingConv::GHC && !RVLocs.empty())
7884     report_fatal_error("GHC functions return void only");
7885 
7886   SDValue Glue;
7887   SmallVector<SDValue, 4> RetOps(1, Chain);
7888 
7889   // Copy the result values into the output registers.
7890   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
7891     SDValue Val = OutVals[i];
7892     CCValAssign &VA = RVLocs[i];
7893     assert(VA.isRegLoc() && "Can only return in registers!");
7894 
7895     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7896       // Handle returning f64 on RV32D with a soft float ABI.
7897       assert(VA.isRegLoc() && "Expected return via registers");
7898       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
7899                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
7900       SDValue Lo = SplitF64.getValue(0);
7901       SDValue Hi = SplitF64.getValue(1);
7902       Register RegLo = VA.getLocReg();
7903       assert(RegLo < RISCV::X31 && "Invalid register pair");
7904       Register RegHi = RegLo + 1;
7905 
7906       if (STI.isRegisterReservedByUser(RegLo) ||
7907           STI.isRegisterReservedByUser(RegHi))
7908         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
7909             MF.getFunction(),
7910             "Return value register required, but has been reserved."});
7911 
7912       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
7913       Glue = Chain.getValue(1);
7914       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
7915       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
7916       Glue = Chain.getValue(1);
7917       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
7918     } else {
7919       // Handle a 'normal' return.
7920       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
7921       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
7922 
7923       if (STI.isRegisterReservedByUser(VA.getLocReg()))
7924         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
7925             MF.getFunction(),
7926             "Return value register required, but has been reserved."});
7927 
7928       // Guarantee that all emitted copies are stuck together.
7929       Glue = Chain.getValue(1);
7930       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7931     }
7932   }
7933 
7934   RetOps[0] = Chain; // Update chain.
7935 
7936   // Add the glue node if we have it.
7937   if (Glue.getNode()) {
7938     RetOps.push_back(Glue);
7939   }
7940 
7941   // Interrupt service routines use different return instructions.
7942   const Function &Func = DAG.getMachineFunction().getFunction();
7943   if (Func.hasFnAttribute("interrupt")) {
7944     if (!Func.getReturnType()->isVoidTy())
7945       report_fatal_error(
7946           "Functions with the interrupt attribute must have void return type!");
7947 
7948     MachineFunction &MF = DAG.getMachineFunction();
7949     StringRef Kind =
7950       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
7951 
7952     unsigned RetOpc;
7953     if (Kind == "user")
7954       RetOpc = RISCVISD::URET_FLAG;
7955     else if (Kind == "supervisor")
7956       RetOpc = RISCVISD::SRET_FLAG;
7957     else
7958       RetOpc = RISCVISD::MRET_FLAG;
7959 
7960     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
7961   }
7962 
7963   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
7964 }
7965 
7966 void RISCVTargetLowering::validateCCReservedRegs(
7967     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
7968     MachineFunction &MF) const {
7969   const Function &F = MF.getFunction();
7970   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
7971 
7972   if (llvm::any_of(Regs, [&STI](auto Reg) {
7973         return STI.isRegisterReservedByUser(Reg.first);
7974       }))
7975     F.getContext().diagnose(DiagnosticInfoUnsupported{
7976         F, "Argument register required, but has been reserved."});
7977 }
7978 
7979 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
7980   return CI->isTailCall();
7981 }
7982 
7983 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
7984 #define NODE_NAME_CASE(NODE)                                                   \
7985   case RISCVISD::NODE:                                                         \
7986     return "RISCVISD::" #NODE;
7987   // clang-format off
7988   switch ((RISCVISD::NodeType)Opcode) {
7989   case RISCVISD::FIRST_NUMBER:
7990     break;
7991   NODE_NAME_CASE(RET_FLAG)
7992   NODE_NAME_CASE(URET_FLAG)
7993   NODE_NAME_CASE(SRET_FLAG)
7994   NODE_NAME_CASE(MRET_FLAG)
7995   NODE_NAME_CASE(CALL)
7996   NODE_NAME_CASE(SELECT_CC)
7997   NODE_NAME_CASE(BR_CC)
7998   NODE_NAME_CASE(BuildPairF64)
7999   NODE_NAME_CASE(SplitF64)
8000   NODE_NAME_CASE(TAIL)
8001   NODE_NAME_CASE(MULHSU)
8002   NODE_NAME_CASE(SLLW)
8003   NODE_NAME_CASE(SRAW)
8004   NODE_NAME_CASE(SRLW)
8005   NODE_NAME_CASE(DIVW)
8006   NODE_NAME_CASE(DIVUW)
8007   NODE_NAME_CASE(REMUW)
8008   NODE_NAME_CASE(ROLW)
8009   NODE_NAME_CASE(RORW)
8010   NODE_NAME_CASE(CLZW)
8011   NODE_NAME_CASE(CTZW)
8012   NODE_NAME_CASE(FSLW)
8013   NODE_NAME_CASE(FSRW)
8014   NODE_NAME_CASE(FSL)
8015   NODE_NAME_CASE(FSR)
8016   NODE_NAME_CASE(FMV_H_X)
8017   NODE_NAME_CASE(FMV_X_ANYEXTH)
8018   NODE_NAME_CASE(FMV_W_X_RV64)
8019   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
8020   NODE_NAME_CASE(READ_CYCLE_WIDE)
8021   NODE_NAME_CASE(GREV)
8022   NODE_NAME_CASE(GREVW)
8023   NODE_NAME_CASE(GORC)
8024   NODE_NAME_CASE(GORCW)
8025   NODE_NAME_CASE(SHFL)
8026   NODE_NAME_CASE(SHFLW)
8027   NODE_NAME_CASE(UNSHFL)
8028   NODE_NAME_CASE(UNSHFLW)
8029   NODE_NAME_CASE(BCOMPRESS)
8030   NODE_NAME_CASE(BCOMPRESSW)
8031   NODE_NAME_CASE(BDECOMPRESS)
8032   NODE_NAME_CASE(BDECOMPRESSW)
8033   NODE_NAME_CASE(VMV_V_X_VL)
8034   NODE_NAME_CASE(VFMV_V_F_VL)
8035   NODE_NAME_CASE(VMV_X_S)
8036   NODE_NAME_CASE(VMV_S_X_VL)
8037   NODE_NAME_CASE(VFMV_S_F_VL)
8038   NODE_NAME_CASE(SPLAT_VECTOR_I64)
8039   NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
8040   NODE_NAME_CASE(READ_VLENB)
8041   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
8042   NODE_NAME_CASE(VSLIDEUP_VL)
8043   NODE_NAME_CASE(VSLIDE1UP_VL)
8044   NODE_NAME_CASE(VSLIDEDOWN_VL)
8045   NODE_NAME_CASE(VSLIDE1DOWN_VL)
8046   NODE_NAME_CASE(VID_VL)
8047   NODE_NAME_CASE(VFNCVT_ROD_VL)
8048   NODE_NAME_CASE(VECREDUCE_ADD_VL)
8049   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
8050   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
8051   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
8052   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
8053   NODE_NAME_CASE(VECREDUCE_AND_VL)
8054   NODE_NAME_CASE(VECREDUCE_OR_VL)
8055   NODE_NAME_CASE(VECREDUCE_XOR_VL)
8056   NODE_NAME_CASE(VECREDUCE_FADD_VL)
8057   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
8058   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
8059   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
8060   NODE_NAME_CASE(ADD_VL)
8061   NODE_NAME_CASE(AND_VL)
8062   NODE_NAME_CASE(MUL_VL)
8063   NODE_NAME_CASE(OR_VL)
8064   NODE_NAME_CASE(SDIV_VL)
8065   NODE_NAME_CASE(SHL_VL)
8066   NODE_NAME_CASE(SREM_VL)
8067   NODE_NAME_CASE(SRA_VL)
8068   NODE_NAME_CASE(SRL_VL)
8069   NODE_NAME_CASE(SUB_VL)
8070   NODE_NAME_CASE(UDIV_VL)
8071   NODE_NAME_CASE(UREM_VL)
8072   NODE_NAME_CASE(XOR_VL)
8073   NODE_NAME_CASE(FADD_VL)
8074   NODE_NAME_CASE(FSUB_VL)
8075   NODE_NAME_CASE(FMUL_VL)
8076   NODE_NAME_CASE(FDIV_VL)
8077   NODE_NAME_CASE(FNEG_VL)
8078   NODE_NAME_CASE(FABS_VL)
8079   NODE_NAME_CASE(FSQRT_VL)
8080   NODE_NAME_CASE(FMA_VL)
8081   NODE_NAME_CASE(FCOPYSIGN_VL)
8082   NODE_NAME_CASE(SMIN_VL)
8083   NODE_NAME_CASE(SMAX_VL)
8084   NODE_NAME_CASE(UMIN_VL)
8085   NODE_NAME_CASE(UMAX_VL)
8086   NODE_NAME_CASE(FMINNUM_VL)
8087   NODE_NAME_CASE(FMAXNUM_VL)
8088   NODE_NAME_CASE(MULHS_VL)
8089   NODE_NAME_CASE(MULHU_VL)
8090   NODE_NAME_CASE(FP_TO_SINT_VL)
8091   NODE_NAME_CASE(FP_TO_UINT_VL)
8092   NODE_NAME_CASE(SINT_TO_FP_VL)
8093   NODE_NAME_CASE(UINT_TO_FP_VL)
8094   NODE_NAME_CASE(FP_EXTEND_VL)
8095   NODE_NAME_CASE(FP_ROUND_VL)
8096   NODE_NAME_CASE(SETCC_VL)
8097   NODE_NAME_CASE(VSELECT_VL)
8098   NODE_NAME_CASE(VMAND_VL)
8099   NODE_NAME_CASE(VMOR_VL)
8100   NODE_NAME_CASE(VMXOR_VL)
8101   NODE_NAME_CASE(VMCLR_VL)
8102   NODE_NAME_CASE(VMSET_VL)
8103   NODE_NAME_CASE(VRGATHER_VX_VL)
8104   NODE_NAME_CASE(VRGATHER_VV_VL)
8105   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
8106   NODE_NAME_CASE(VSEXT_VL)
8107   NODE_NAME_CASE(VZEXT_VL)
8108   NODE_NAME_CASE(VPOPC_VL)
8109   NODE_NAME_CASE(VLE_VL)
8110   NODE_NAME_CASE(VSE_VL)
8111   NODE_NAME_CASE(READ_CSR)
8112   NODE_NAME_CASE(WRITE_CSR)
8113   NODE_NAME_CASE(SWAP_CSR)
8114   }
8115   // clang-format on
8116   return nullptr;
8117 #undef NODE_NAME_CASE
8118 }
8119 
8120 /// getConstraintType - Given a constraint letter, return the type of
8121 /// constraint it is for this target.
8122 RISCVTargetLowering::ConstraintType
8123 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
8124   if (Constraint.size() == 1) {
8125     switch (Constraint[0]) {
8126     default:
8127       break;
8128     case 'f':
8129     case 'v':
8130       return C_RegisterClass;
8131     case 'I':
8132     case 'J':
8133     case 'K':
8134       return C_Immediate;
8135     case 'A':
8136       return C_Memory;
8137     }
8138   }
8139   return TargetLowering::getConstraintType(Constraint);
8140 }
8141 
8142 std::pair<unsigned, const TargetRegisterClass *>
8143 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
8144                                                   StringRef Constraint,
8145                                                   MVT VT) const {
8146   // First, see if this is a constraint that directly corresponds to a
8147   // RISCV register class.
8148   if (Constraint.size() == 1) {
8149     switch (Constraint[0]) {
8150     case 'r':
8151       return std::make_pair(0U, &RISCV::GPRRegClass);
8152     case 'f':
8153       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
8154         return std::make_pair(0U, &RISCV::FPR16RegClass);
8155       if (Subtarget.hasStdExtF() && VT == MVT::f32)
8156         return std::make_pair(0U, &RISCV::FPR32RegClass);
8157       if (Subtarget.hasStdExtD() && VT == MVT::f64)
8158         return std::make_pair(0U, &RISCV::FPR64RegClass);
8159       break;
8160     case 'v':
8161       for (const auto *RC :
8162            {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
8163             &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
8164         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
8165           return std::make_pair(0U, RC);
8166       }
8167       break;
8168     default:
8169       break;
8170     }
8171   }
8172 
8173   // Clang will correctly decode the usage of register name aliases into their
8174   // official names. However, other frontends like `rustc` do not. This allows
8175   // users of these frontends to use the ABI names for registers in LLVM-style
8176   // register constraints.
8177   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
8178                                .Case("{zero}", RISCV::X0)
8179                                .Case("{ra}", RISCV::X1)
8180                                .Case("{sp}", RISCV::X2)
8181                                .Case("{gp}", RISCV::X3)
8182                                .Case("{tp}", RISCV::X4)
8183                                .Case("{t0}", RISCV::X5)
8184                                .Case("{t1}", RISCV::X6)
8185                                .Case("{t2}", RISCV::X7)
8186                                .Cases("{s0}", "{fp}", RISCV::X8)
8187                                .Case("{s1}", RISCV::X9)
8188                                .Case("{a0}", RISCV::X10)
8189                                .Case("{a1}", RISCV::X11)
8190                                .Case("{a2}", RISCV::X12)
8191                                .Case("{a3}", RISCV::X13)
8192                                .Case("{a4}", RISCV::X14)
8193                                .Case("{a5}", RISCV::X15)
8194                                .Case("{a6}", RISCV::X16)
8195                                .Case("{a7}", RISCV::X17)
8196                                .Case("{s2}", RISCV::X18)
8197                                .Case("{s3}", RISCV::X19)
8198                                .Case("{s4}", RISCV::X20)
8199                                .Case("{s5}", RISCV::X21)
8200                                .Case("{s6}", RISCV::X22)
8201                                .Case("{s7}", RISCV::X23)
8202                                .Case("{s8}", RISCV::X24)
8203                                .Case("{s9}", RISCV::X25)
8204                                .Case("{s10}", RISCV::X26)
8205                                .Case("{s11}", RISCV::X27)
8206                                .Case("{t3}", RISCV::X28)
8207                                .Case("{t4}", RISCV::X29)
8208                                .Case("{t5}", RISCV::X30)
8209                                .Case("{t6}", RISCV::X31)
8210                                .Default(RISCV::NoRegister);
8211   if (XRegFromAlias != RISCV::NoRegister)
8212     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
8213 
8214   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
8215   // TableGen record rather than the AsmName to choose registers for InlineAsm
8216   // constraints, plus we want to match those names to the widest floating point
8217   // register type available, manually select floating point registers here.
8218   //
8219   // The second case is the ABI name of the register, so that frontends can also
8220   // use the ABI names in register constraint lists.
8221   if (Subtarget.hasStdExtF()) {
8222     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
8223                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
8224                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
8225                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
8226                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
8227                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
8228                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
8229                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
8230                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
8231                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
8232                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
8233                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
8234                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
8235                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
8236                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
8237                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
8238                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
8239                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
8240                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
8241                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
8242                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
8243                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
8244                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
8245                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
8246                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
8247                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
8248                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
8249                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
8250                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
8251                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
8252                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
8253                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
8254                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
8255                         .Default(RISCV::NoRegister);
8256     if (FReg != RISCV::NoRegister) {
8257       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
8258       if (Subtarget.hasStdExtD()) {
8259         unsigned RegNo = FReg - RISCV::F0_F;
8260         unsigned DReg = RISCV::F0_D + RegNo;
8261         return std::make_pair(DReg, &RISCV::FPR64RegClass);
8262       }
8263       return std::make_pair(FReg, &RISCV::FPR32RegClass);
8264     }
8265   }
8266 
8267   if (Subtarget.hasStdExtV()) {
8268     Register VReg = StringSwitch<Register>(Constraint.lower())
8269                         .Case("{v0}", RISCV::V0)
8270                         .Case("{v1}", RISCV::V1)
8271                         .Case("{v2}", RISCV::V2)
8272                         .Case("{v3}", RISCV::V3)
8273                         .Case("{v4}", RISCV::V4)
8274                         .Case("{v5}", RISCV::V5)
8275                         .Case("{v6}", RISCV::V6)
8276                         .Case("{v7}", RISCV::V7)
8277                         .Case("{v8}", RISCV::V8)
8278                         .Case("{v9}", RISCV::V9)
8279                         .Case("{v10}", RISCV::V10)
8280                         .Case("{v11}", RISCV::V11)
8281                         .Case("{v12}", RISCV::V12)
8282                         .Case("{v13}", RISCV::V13)
8283                         .Case("{v14}", RISCV::V14)
8284                         .Case("{v15}", RISCV::V15)
8285                         .Case("{v16}", RISCV::V16)
8286                         .Case("{v17}", RISCV::V17)
8287                         .Case("{v18}", RISCV::V18)
8288                         .Case("{v19}", RISCV::V19)
8289                         .Case("{v20}", RISCV::V20)
8290                         .Case("{v21}", RISCV::V21)
8291                         .Case("{v22}", RISCV::V22)
8292                         .Case("{v23}", RISCV::V23)
8293                         .Case("{v24}", RISCV::V24)
8294                         .Case("{v25}", RISCV::V25)
8295                         .Case("{v26}", RISCV::V26)
8296                         .Case("{v27}", RISCV::V27)
8297                         .Case("{v28}", RISCV::V28)
8298                         .Case("{v29}", RISCV::V29)
8299                         .Case("{v30}", RISCV::V30)
8300                         .Case("{v31}", RISCV::V31)
8301                         .Default(RISCV::NoRegister);
8302     if (VReg != RISCV::NoRegister) {
8303       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
8304         return std::make_pair(VReg, &RISCV::VMRegClass);
8305       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
8306         return std::make_pair(VReg, &RISCV::VRRegClass);
8307       for (const auto *RC :
8308            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
8309         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
8310           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
8311           return std::make_pair(VReg, RC);
8312         }
8313       }
8314     }
8315   }
8316 
8317   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8318 }
8319 
8320 unsigned
8321 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
8322   // Currently only support length 1 constraints.
8323   if (ConstraintCode.size() == 1) {
8324     switch (ConstraintCode[0]) {
8325     case 'A':
8326       return InlineAsm::Constraint_A;
8327     default:
8328       break;
8329     }
8330   }
8331 
8332   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
8333 }
8334 
8335 void RISCVTargetLowering::LowerAsmOperandForConstraint(
8336     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
8337     SelectionDAG &DAG) const {
8338   // Currently only support length 1 constraints.
8339   if (Constraint.length() == 1) {
8340     switch (Constraint[0]) {
8341     case 'I':
8342       // Validate & create a 12-bit signed immediate operand.
8343       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8344         uint64_t CVal = C->getSExtValue();
8345         if (isInt<12>(CVal))
8346           Ops.push_back(
8347               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
8348       }
8349       return;
8350     case 'J':
8351       // Validate & create an integer zero operand.
8352       if (auto *C = dyn_cast<ConstantSDNode>(Op))
8353         if (C->getZExtValue() == 0)
8354           Ops.push_back(
8355               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
8356       return;
8357     case 'K':
8358       // Validate & create a 5-bit unsigned immediate operand.
8359       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8360         uint64_t CVal = C->getZExtValue();
8361         if (isUInt<5>(CVal))
8362           Ops.push_back(
8363               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
8364       }
8365       return;
8366     default:
8367       break;
8368     }
8369   }
8370   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8371 }
8372 
8373 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
8374                                                    Instruction *Inst,
8375                                                    AtomicOrdering Ord) const {
8376   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
8377     return Builder.CreateFence(Ord);
8378   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
8379     return Builder.CreateFence(AtomicOrdering::Release);
8380   return nullptr;
8381 }
8382 
8383 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
8384                                                     Instruction *Inst,
8385                                                     AtomicOrdering Ord) const {
8386   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
8387     return Builder.CreateFence(AtomicOrdering::Acquire);
8388   return nullptr;
8389 }
8390 
8391 TargetLowering::AtomicExpansionKind
8392 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
8393   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
8394   // point operations can't be used in an lr/sc sequence without breaking the
8395   // forward-progress guarantee.
8396   if (AI->isFloatingPointOperation())
8397     return AtomicExpansionKind::CmpXChg;
8398 
8399   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8400   if (Size == 8 || Size == 16)
8401     return AtomicExpansionKind::MaskedIntrinsic;
8402   return AtomicExpansionKind::None;
8403 }
8404 
8405 static Intrinsic::ID
8406 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
8407   if (XLen == 32) {
8408     switch (BinOp) {
8409     default:
8410       llvm_unreachable("Unexpected AtomicRMW BinOp");
8411     case AtomicRMWInst::Xchg:
8412       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
8413     case AtomicRMWInst::Add:
8414       return Intrinsic::riscv_masked_atomicrmw_add_i32;
8415     case AtomicRMWInst::Sub:
8416       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
8417     case AtomicRMWInst::Nand:
8418       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
8419     case AtomicRMWInst::Max:
8420       return Intrinsic::riscv_masked_atomicrmw_max_i32;
8421     case AtomicRMWInst::Min:
8422       return Intrinsic::riscv_masked_atomicrmw_min_i32;
8423     case AtomicRMWInst::UMax:
8424       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
8425     case AtomicRMWInst::UMin:
8426       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
8427     }
8428   }
8429 
8430   if (XLen == 64) {
8431     switch (BinOp) {
8432     default:
8433       llvm_unreachable("Unexpected AtomicRMW BinOp");
8434     case AtomicRMWInst::Xchg:
8435       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
8436     case AtomicRMWInst::Add:
8437       return Intrinsic::riscv_masked_atomicrmw_add_i64;
8438     case AtomicRMWInst::Sub:
8439       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
8440     case AtomicRMWInst::Nand:
8441       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
8442     case AtomicRMWInst::Max:
8443       return Intrinsic::riscv_masked_atomicrmw_max_i64;
8444     case AtomicRMWInst::Min:
8445       return Intrinsic::riscv_masked_atomicrmw_min_i64;
8446     case AtomicRMWInst::UMax:
8447       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
8448     case AtomicRMWInst::UMin:
8449       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
8450     }
8451   }
8452 
8453   llvm_unreachable("Unexpected XLen\n");
8454 }
8455 
8456 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
8457     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8458     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8459   unsigned XLen = Subtarget.getXLen();
8460   Value *Ordering =
8461       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
8462   Type *Tys[] = {AlignedAddr->getType()};
8463   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
8464       AI->getModule(),
8465       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
8466 
8467   if (XLen == 64) {
8468     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8469     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8470     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8471   }
8472 
8473   Value *Result;
8474 
8475   // Must pass the shift amount needed to sign extend the loaded value prior
8476   // to performing a signed comparison for min/max. ShiftAmt is the number of
8477   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
8478   // is the number of bits to left+right shift the value in order to
8479   // sign-extend.
8480   if (AI->getOperation() == AtomicRMWInst::Min ||
8481       AI->getOperation() == AtomicRMWInst::Max) {
8482     const DataLayout &DL = AI->getModule()->getDataLayout();
8483     unsigned ValWidth =
8484         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8485     Value *SextShamt =
8486         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
8487     Result = Builder.CreateCall(LrwOpScwLoop,
8488                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8489   } else {
8490     Result =
8491         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8492   }
8493 
8494   if (XLen == 64)
8495     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8496   return Result;
8497 }
8498 
8499 TargetLowering::AtomicExpansionKind
8500 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
8501     AtomicCmpXchgInst *CI) const {
8502   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
8503   if (Size == 8 || Size == 16)
8504     return AtomicExpansionKind::MaskedIntrinsic;
8505   return AtomicExpansionKind::None;
8506 }
8507 
8508 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
8509     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8510     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8511   unsigned XLen = Subtarget.getXLen();
8512   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
8513   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
8514   if (XLen == 64) {
8515     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8516     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8517     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8518     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
8519   }
8520   Type *Tys[] = {AlignedAddr->getType()};
8521   Function *MaskedCmpXchg =
8522       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
8523   Value *Result = Builder.CreateCall(
8524       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
8525   if (XLen == 64)
8526     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8527   return Result;
8528 }
8529 
8530 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
8531   return false;
8532 }
8533 
8534 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
8535                                                      EVT VT) const {
8536   VT = VT.getScalarType();
8537 
8538   if (!VT.isSimple())
8539     return false;
8540 
8541   switch (VT.getSimpleVT().SimpleTy) {
8542   case MVT::f16:
8543     return Subtarget.hasStdExtZfh();
8544   case MVT::f32:
8545     return Subtarget.hasStdExtF();
8546   case MVT::f64:
8547     return Subtarget.hasStdExtD();
8548   default:
8549     break;
8550   }
8551 
8552   return false;
8553 }
8554 
8555 Register RISCVTargetLowering::getExceptionPointerRegister(
8556     const Constant *PersonalityFn) const {
8557   return RISCV::X10;
8558 }
8559 
8560 Register RISCVTargetLowering::getExceptionSelectorRegister(
8561     const Constant *PersonalityFn) const {
8562   return RISCV::X11;
8563 }
8564 
8565 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
8566   // Return false to suppress the unnecessary extensions if the LibCall
8567   // arguments or return value is f32 type for LP64 ABI.
8568   RISCVABI::ABI ABI = Subtarget.getTargetABI();
8569   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
8570     return false;
8571 
8572   return true;
8573 }
8574 
8575 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
8576   if (Subtarget.is64Bit() && Type == MVT::i32)
8577     return true;
8578 
8579   return IsSigned;
8580 }
8581 
8582 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
8583                                                  SDValue C) const {
8584   // Check integral scalar types.
8585   if (VT.isScalarInteger()) {
8586     // Omit the optimization if the sub target has the M extension and the data
8587     // size exceeds XLen.
8588     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
8589       return false;
8590     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8591       // Break the MUL to a SLLI and an ADD/SUB.
8592       const APInt &Imm = ConstNode->getAPIntValue();
8593       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8594           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8595         return true;
8596       // Omit the following optimization if the sub target has the M extension
8597       // and the data size >= XLen.
8598       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
8599         return false;
8600       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
8601       // a pair of LUI/ADDI.
8602       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
8603         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
8604         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
8605             (1 - ImmS).isPowerOf2())
8606         return true;
8607       }
8608     }
8609   }
8610 
8611   return false;
8612 }
8613 
8614 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
8615     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
8616     bool *Fast) const {
8617   if (!VT.isVector())
8618     return false;
8619 
8620   EVT ElemVT = VT.getVectorElementType();
8621   if (Alignment >= ElemVT.getStoreSize()) {
8622     if (Fast)
8623       *Fast = true;
8624     return true;
8625   }
8626 
8627   return false;
8628 }
8629 
8630 bool RISCVTargetLowering::splitValueIntoRegisterParts(
8631     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8632     unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
8633   bool IsABIRegCopy = CC.hasValue();
8634   EVT ValueVT = Val.getValueType();
8635   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
8636     // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
8637     // and cast to f32.
8638     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
8639     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
8640     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
8641                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
8642     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
8643     Parts[0] = Val;
8644     return true;
8645   }
8646 
8647   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
8648     LLVMContext &Context = *DAG.getContext();
8649     EVT ValueEltVT = ValueVT.getVectorElementType();
8650     EVT PartEltVT = PartVT.getVectorElementType();
8651     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
8652     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
8653     if (PartVTBitSize % ValueVTBitSize == 0) {
8654       // If the element types are different, bitcast to the same element type of
8655       // PartVT first.
8656       if (ValueEltVT != PartEltVT) {
8657         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
8658         assert(Count != 0 && "The number of element should not be zero.");
8659         EVT SameEltTypeVT =
8660             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
8661         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
8662       }
8663       Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
8664                         Val, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
8665       Parts[0] = Val;
8666       return true;
8667     }
8668   }
8669   return false;
8670 }
8671 
8672 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
8673     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
8674     MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
8675   bool IsABIRegCopy = CC.hasValue();
8676   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
8677     SDValue Val = Parts[0];
8678 
8679     // Cast the f32 to i32, truncate to i16, and cast back to f16.
8680     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
8681     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
8682     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
8683     return Val;
8684   }
8685 
8686   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
8687     LLVMContext &Context = *DAG.getContext();
8688     SDValue Val = Parts[0];
8689     EVT ValueEltVT = ValueVT.getVectorElementType();
8690     EVT PartEltVT = PartVT.getVectorElementType();
8691     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
8692     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
8693     if (PartVTBitSize % ValueVTBitSize == 0) {
8694       EVT SameEltTypeVT = ValueVT;
8695       // If the element types are different, convert it to the same element type
8696       // of PartVT.
8697       if (ValueEltVT != PartEltVT) {
8698         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
8699         assert(Count != 0 && "The number of element should not be zero.");
8700         SameEltTypeVT =
8701             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
8702       }
8703       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val,
8704                         DAG.getConstant(0, DL, Subtarget.getXLenVT()));
8705       if (ValueEltVT != PartEltVT)
8706         Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
8707       return Val;
8708     }
8709   }
8710   return SDValue();
8711 }
8712 
8713 #define GET_REGISTER_MATCHER
8714 #include "RISCVGenAsmMatcher.inc"
8715 
8716 Register
8717 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
8718                                        const MachineFunction &MF) const {
8719   Register Reg = MatchRegisterAltName(RegName);
8720   if (Reg == RISCV::NoRegister)
8721     Reg = MatchRegisterName(RegName);
8722   if (Reg == RISCV::NoRegister)
8723     report_fatal_error(
8724         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
8725   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8726   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
8727     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8728                              StringRef(RegName) + "\"."));
8729   return Reg;
8730 }
8731 
8732 namespace llvm {
8733 namespace RISCVVIntrinsicsTable {
8734 
8735 #define GET_RISCVVIntrinsicsTable_IMPL
8736 #include "RISCVGenSearchableTables.inc"
8737 
8738 } // namespace RISCVVIntrinsicsTable
8739 
8740 } // namespace llvm
8741