1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtZfh())
87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88   if (Subtarget.hasStdExtF())
89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90   if (Subtarget.hasStdExtD())
91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93   static const MVT::SimpleValueType BoolVecVTs[] = {
94       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
95       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
96   static const MVT::SimpleValueType IntVecVTs[] = {
97       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
98       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
99       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
100       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
101       MVT::nxv4i64, MVT::nxv8i64};
102   static const MVT::SimpleValueType F16VecVTs[] = {
103       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
104       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
105   static const MVT::SimpleValueType F32VecVTs[] = {
106       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
107   static const MVT::SimpleValueType F64VecVTs[] = {
108       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
109 
110   if (Subtarget.hasStdExtV()) {
111     auto addRegClassForRVV = [this](MVT VT) {
112       unsigned Size = VT.getSizeInBits().getKnownMinValue();
113       assert(Size <= 512 && isPowerOf2_32(Size));
114       const TargetRegisterClass *RC;
115       if (Size <= 64)
116         RC = &RISCV::VRRegClass;
117       else if (Size == 128)
118         RC = &RISCV::VRM2RegClass;
119       else if (Size == 256)
120         RC = &RISCV::VRM4RegClass;
121       else
122         RC = &RISCV::VRM8RegClass;
123 
124       addRegisterClass(VT, RC);
125     };
126 
127     for (MVT VT : BoolVecVTs)
128       addRegClassForRVV(VT);
129     for (MVT VT : IntVecVTs)
130       addRegClassForRVV(VT);
131 
132     if (Subtarget.hasStdExtZfh())
133       for (MVT VT : F16VecVTs)
134         addRegClassForRVV(VT);
135 
136     if (Subtarget.hasStdExtF())
137       for (MVT VT : F32VecVTs)
138         addRegClassForRVV(VT);
139 
140     if (Subtarget.hasStdExtD())
141       for (MVT VT : F64VecVTs)
142         addRegClassForRVV(VT);
143 
144     if (Subtarget.useRVVForFixedLengthVectors()) {
145       auto addRegClassForFixedVectors = [this](MVT VT) {
146         MVT ContainerVT = getContainerForFixedLengthVector(VT);
147         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
148         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
149         addRegisterClass(VT, TRI.getRegClass(RCID));
150       };
151       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
152         if (useRVVForFixedLengthVectorVT(VT))
153           addRegClassForFixedVectors(VT);
154 
155       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
156         if (useRVVForFixedLengthVectorVT(VT))
157           addRegClassForFixedVectors(VT);
158     }
159   }
160 
161   // Compute derived properties from the register classes.
162   computeRegisterProperties(STI.getRegisterInfo());
163 
164   setStackPointerRegisterToSaveRestore(RISCV::X2);
165 
166   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
167     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
168 
169   // TODO: add all necessary setOperationAction calls.
170   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
171 
172   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
173   setOperationAction(ISD::BR_CC, XLenVT, Expand);
174   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
175   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
176 
177   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
178   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
179 
180   setOperationAction(ISD::VASTART, MVT::Other, Custom);
181   setOperationAction(ISD::VAARG, MVT::Other, Expand);
182   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
183   setOperationAction(ISD::VAEND, MVT::Other, Expand);
184 
185   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
186   if (!Subtarget.hasStdExtZbb()) {
187     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
188     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
189   }
190 
191   if (Subtarget.is64Bit()) {
192     setOperationAction(ISD::ADD, MVT::i32, Custom);
193     setOperationAction(ISD::SUB, MVT::i32, Custom);
194     setOperationAction(ISD::SHL, MVT::i32, Custom);
195     setOperationAction(ISD::SRA, MVT::i32, Custom);
196     setOperationAction(ISD::SRL, MVT::i32, Custom);
197 
198     setOperationAction(ISD::UADDO, MVT::i32, Custom);
199     setOperationAction(ISD::USUBO, MVT::i32, Custom);
200     setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
201     setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
202   }
203 
204   if (!Subtarget.hasStdExtM()) {
205     setOperationAction(ISD::MUL, XLenVT, Expand);
206     setOperationAction(ISD::MULHS, XLenVT, Expand);
207     setOperationAction(ISD::MULHU, XLenVT, Expand);
208     setOperationAction(ISD::SDIV, XLenVT, Expand);
209     setOperationAction(ISD::UDIV, XLenVT, Expand);
210     setOperationAction(ISD::SREM, XLenVT, Expand);
211     setOperationAction(ISD::UREM, XLenVT, Expand);
212   } else {
213     if (Subtarget.is64Bit()) {
214       setOperationAction(ISD::MUL, MVT::i32, Custom);
215       setOperationAction(ISD::MUL, MVT::i128, Custom);
216 
217       setOperationAction(ISD::SDIV, MVT::i8, Custom);
218       setOperationAction(ISD::UDIV, MVT::i8, Custom);
219       setOperationAction(ISD::UREM, MVT::i8, Custom);
220       setOperationAction(ISD::SDIV, MVT::i16, Custom);
221       setOperationAction(ISD::UDIV, MVT::i16, Custom);
222       setOperationAction(ISD::UREM, MVT::i16, Custom);
223       setOperationAction(ISD::SDIV, MVT::i32, Custom);
224       setOperationAction(ISD::UDIV, MVT::i32, Custom);
225       setOperationAction(ISD::UREM, MVT::i32, Custom);
226     } else {
227       setOperationAction(ISD::MUL, MVT::i64, Custom);
228     }
229   }
230 
231   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
232   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
233   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
234   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
235 
236   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
237   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
238   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
239 
240   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
241     if (Subtarget.is64Bit()) {
242       setOperationAction(ISD::ROTL, MVT::i32, Custom);
243       setOperationAction(ISD::ROTR, MVT::i32, Custom);
244     }
245   } else {
246     setOperationAction(ISD::ROTL, XLenVT, Expand);
247     setOperationAction(ISD::ROTR, XLenVT, Expand);
248   }
249 
250   if (Subtarget.hasStdExtZbp()) {
251     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
252     // more combining.
253     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
254     setOperationAction(ISD::BSWAP, XLenVT, Custom);
255 
256     if (Subtarget.is64Bit()) {
257       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
258       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
259     }
260   } else {
261     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
262     // pattern match it directly in isel.
263     setOperationAction(ISD::BSWAP, XLenVT,
264                        Subtarget.hasStdExtZbb() ? Legal : Expand);
265   }
266 
267   if (Subtarget.hasStdExtZbb()) {
268     setOperationAction(ISD::SMIN, XLenVT, Legal);
269     setOperationAction(ISD::SMAX, XLenVT, Legal);
270     setOperationAction(ISD::UMIN, XLenVT, Legal);
271     setOperationAction(ISD::UMAX, XLenVT, Legal);
272 
273     if (Subtarget.is64Bit()) {
274       setOperationAction(ISD::CTTZ, MVT::i32, Custom);
275       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
276       setOperationAction(ISD::CTLZ, MVT::i32, Custom);
277       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
278     }
279   } else {
280     setOperationAction(ISD::CTTZ, XLenVT, Expand);
281     setOperationAction(ISD::CTLZ, XLenVT, Expand);
282     setOperationAction(ISD::CTPOP, XLenVT, Expand);
283   }
284 
285   if (Subtarget.hasStdExtZbt()) {
286     setOperationAction(ISD::FSHL, XLenVT, Custom);
287     setOperationAction(ISD::FSHR, XLenVT, Custom);
288     setOperationAction(ISD::SELECT, XLenVT, Legal);
289 
290     if (Subtarget.is64Bit()) {
291       setOperationAction(ISD::FSHL, MVT::i32, Custom);
292       setOperationAction(ISD::FSHR, MVT::i32, Custom);
293     }
294   } else {
295     setOperationAction(ISD::SELECT, XLenVT, Custom);
296   }
297 
298   ISD::CondCode FPCCToExpand[] = {
299       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
300       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
301       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
302 
303   ISD::NodeType FPOpToExpand[] = {
304       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
305       ISD::FP_TO_FP16};
306 
307   if (Subtarget.hasStdExtZfh())
308     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
309 
310   if (Subtarget.hasStdExtZfh()) {
311     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
312     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
313     for (auto CC : FPCCToExpand)
314       setCondCodeAction(CC, MVT::f16, Expand);
315     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
316     setOperationAction(ISD::SELECT, MVT::f16, Custom);
317     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
318     for (auto Op : FPOpToExpand)
319       setOperationAction(Op, MVT::f16, Expand);
320   }
321 
322   if (Subtarget.hasStdExtF()) {
323     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
324     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
325     for (auto CC : FPCCToExpand)
326       setCondCodeAction(CC, MVT::f32, Expand);
327     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
328     setOperationAction(ISD::SELECT, MVT::f32, Custom);
329     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
330     for (auto Op : FPOpToExpand)
331       setOperationAction(Op, MVT::f32, Expand);
332     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
333     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
334   }
335 
336   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
337     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
338 
339   if (Subtarget.hasStdExtD()) {
340     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
341     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
342     for (auto CC : FPCCToExpand)
343       setCondCodeAction(CC, MVT::f64, Expand);
344     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
345     setOperationAction(ISD::SELECT, MVT::f64, Custom);
346     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
347     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
348     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
349     for (auto Op : FPOpToExpand)
350       setOperationAction(Op, MVT::f64, Expand);
351     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
352     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
353   }
354 
355   if (Subtarget.is64Bit()) {
356     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
357     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
358     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
359     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
360   }
361 
362   if (Subtarget.hasStdExtF()) {
363     setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
364     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
365   }
366 
367   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
368   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
369   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
370   setOperationAction(ISD::JumpTable, XLenVT, Custom);
371 
372   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
373 
374   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
375   // Unfortunately this can't be determined just from the ISA naming string.
376   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
377                      Subtarget.is64Bit() ? Legal : Custom);
378 
379   setOperationAction(ISD::TRAP, MVT::Other, Legal);
380   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
381   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
382   if (Subtarget.is64Bit())
383     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
384 
385   if (Subtarget.hasStdExtA()) {
386     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
387     setMinCmpXchgSizeInBits(32);
388   } else {
389     setMaxAtomicSizeInBitsSupported(0);
390   }
391 
392   setBooleanContents(ZeroOrOneBooleanContent);
393 
394   if (Subtarget.hasStdExtV()) {
395     setBooleanVectorContents(ZeroOrOneBooleanContent);
396 
397     setOperationAction(ISD::VSCALE, XLenVT, Custom);
398 
399     // RVV intrinsics may have illegal operands.
400     // We also need to custom legalize vmv.x.s.
401     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
402     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
403     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
404     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
405     if (Subtarget.is64Bit()) {
406       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
407     } else {
408       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
409       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
410     }
411 
412     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
413 
414     static unsigned IntegerVPOps[] = {
415         ISD::VP_ADD,  ISD::VP_SUB,  ISD::VP_MUL, ISD::VP_SDIV, ISD::VP_UDIV,
416         ISD::VP_SREM, ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,   ISD::VP_XOR,
417         ISD::VP_ASHR, ISD::VP_LSHR, ISD::VP_SHL};
418 
419     if (!Subtarget.is64Bit()) {
420       // We must custom-lower certain vXi64 operations on RV32 due to the vector
421       // element type being illegal.
422       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
423       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
424 
425       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
426       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
427       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
428       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
429       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
430       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
431       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
432       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
433     }
434 
435     for (MVT VT : BoolVecVTs) {
436       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
437 
438       // Mask VTs are custom-expanded into a series of standard nodes
439       setOperationAction(ISD::TRUNCATE, VT, Custom);
440       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
441       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
442 
443       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
444       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
445 
446       setOperationAction(ISD::SELECT, VT, Expand);
447       setOperationAction(ISD::SELECT_CC, VT, Expand);
448       setOperationAction(ISD::VSELECT, VT, Expand);
449 
450       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
451       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
452       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
453 
454       // RVV has native int->float & float->int conversions where the
455       // element type sizes are within one power-of-two of each other. Any
456       // wider distances between type sizes have to be lowered as sequences
457       // which progressively narrow the gap in stages.
458       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
459       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
460       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
461       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
462 
463       // Expand all extending loads to types larger than this, and truncating
464       // stores from types larger than this.
465       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
466         setTruncStoreAction(OtherVT, VT, Expand);
467         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
468         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
469         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
470       }
471     }
472 
473     for (MVT VT : IntVecVTs) {
474       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
475       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
476 
477       setOperationAction(ISD::SMIN, VT, Legal);
478       setOperationAction(ISD::SMAX, VT, Legal);
479       setOperationAction(ISD::UMIN, VT, Legal);
480       setOperationAction(ISD::UMAX, VT, Legal);
481 
482       setOperationAction(ISD::ROTL, VT, Expand);
483       setOperationAction(ISD::ROTR, VT, Expand);
484 
485       // Custom-lower extensions and truncations from/to mask types.
486       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
487       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
488       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
489 
490       // RVV has native int->float & float->int conversions where the
491       // element type sizes are within one power-of-two of each other. Any
492       // wider distances between type sizes have to be lowered as sequences
493       // which progressively narrow the gap in stages.
494       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
495       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
496       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
497       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
498 
499       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
500       // nodes which truncate by one power of two at a time.
501       setOperationAction(ISD::TRUNCATE, VT, Custom);
502 
503       // Custom-lower insert/extract operations to simplify patterns.
504       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
505       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
506 
507       // Custom-lower reduction operations to set up the corresponding custom
508       // nodes' operands.
509       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
510       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
511       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
512       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
513       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
514       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
515       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
516       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
517 
518       for (unsigned VPOpc : IntegerVPOps)
519         setOperationAction(VPOpc, VT, Custom);
520 
521       setOperationAction(ISD::MLOAD, VT, Custom);
522       setOperationAction(ISD::MSTORE, VT, Custom);
523       setOperationAction(ISD::MGATHER, VT, Custom);
524       setOperationAction(ISD::MSCATTER, VT, Custom);
525 
526       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
527       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
528       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
529 
530       setOperationAction(ISD::SELECT, VT, Expand);
531       setOperationAction(ISD::SELECT_CC, VT, Expand);
532 
533       setOperationAction(ISD::STEP_VECTOR, VT, Custom);
534       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
535 
536       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
537         setTruncStoreAction(VT, OtherVT, Expand);
538         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
539         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
540         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
541       }
542     }
543 
544     // Expand various CCs to best match the RVV ISA, which natively supports UNE
545     // but no other unordered comparisons, and supports all ordered comparisons
546     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
547     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
548     // and we pattern-match those back to the "original", swapping operands once
549     // more. This way we catch both operations and both "vf" and "fv" forms with
550     // fewer patterns.
551     ISD::CondCode VFPCCToExpand[] = {
552         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
553         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
554         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
555     };
556 
557     // Sets common operation actions on RVV floating-point vector types.
558     const auto SetCommonVFPActions = [&](MVT VT) {
559       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
560       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
561       // sizes are within one power-of-two of each other. Therefore conversions
562       // between vXf16 and vXf64 must be lowered as sequences which convert via
563       // vXf32.
564       setOperationAction(ISD::FP_ROUND, VT, Custom);
565       setOperationAction(ISD::FP_EXTEND, VT, Custom);
566       // Custom-lower insert/extract operations to simplify patterns.
567       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
568       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
569       // Expand various condition codes (explained above).
570       for (auto CC : VFPCCToExpand)
571         setCondCodeAction(CC, VT, Expand);
572 
573       setOperationAction(ISD::FMINNUM, VT, Legal);
574       setOperationAction(ISD::FMAXNUM, VT, Legal);
575 
576       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
577       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
578       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
579       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
580       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
581 
582       setOperationAction(ISD::MLOAD, VT, Custom);
583       setOperationAction(ISD::MSTORE, VT, Custom);
584       setOperationAction(ISD::MGATHER, VT, Custom);
585       setOperationAction(ISD::MSCATTER, VT, Custom);
586 
587       setOperationAction(ISD::SELECT, VT, Expand);
588       setOperationAction(ISD::SELECT_CC, VT, Expand);
589 
590       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
591       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
592       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
593 
594       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
595     };
596 
597     // Sets common extload/truncstore actions on RVV floating-point vector
598     // types.
599     const auto SetCommonVFPExtLoadTruncStoreActions =
600         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
601           for (auto SmallVT : SmallerVTs) {
602             setTruncStoreAction(VT, SmallVT, Expand);
603             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
604           }
605         };
606 
607     if (Subtarget.hasStdExtZfh())
608       for (MVT VT : F16VecVTs)
609         SetCommonVFPActions(VT);
610 
611     for (MVT VT : F32VecVTs) {
612       if (Subtarget.hasStdExtF())
613         SetCommonVFPActions(VT);
614       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
615     }
616 
617     for (MVT VT : F64VecVTs) {
618       if (Subtarget.hasStdExtD())
619         SetCommonVFPActions(VT);
620       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
621       SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
622     }
623 
624     if (Subtarget.useRVVForFixedLengthVectors()) {
625       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
626         if (!useRVVForFixedLengthVectorVT(VT))
627           continue;
628 
629         // By default everything must be expanded.
630         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
631           setOperationAction(Op, VT, Expand);
632         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
633           setTruncStoreAction(VT, OtherVT, Expand);
634           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
635           setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
636           setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
637         }
638 
639         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
640         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
641         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
642 
643         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
644         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
645 
646         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
647         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
648 
649         setOperationAction(ISD::LOAD, VT, Custom);
650         setOperationAction(ISD::STORE, VT, Custom);
651 
652         setOperationAction(ISD::SETCC, VT, Custom);
653 
654         setOperationAction(ISD::TRUNCATE, VT, Custom);
655 
656         setOperationAction(ISD::BITCAST, VT, Custom);
657 
658         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
659         setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
660         setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
661 
662         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
663         setOperationAction(ISD::UINT_TO_FP, VT, Custom);
664         setOperationAction(ISD::FP_TO_SINT, VT, Custom);
665         setOperationAction(ISD::FP_TO_UINT, VT, Custom);
666 
667         // Operations below are different for between masks and other vectors.
668         if (VT.getVectorElementType() == MVT::i1) {
669           setOperationAction(ISD::AND, VT, Custom);
670           setOperationAction(ISD::OR, VT, Custom);
671           setOperationAction(ISD::XOR, VT, Custom);
672           continue;
673         }
674 
675         // Use SPLAT_VECTOR to prevent type legalization from destroying the
676         // splats when type legalizing i64 scalar on RV32.
677         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
678         // improvements first.
679         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
680           setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
681           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
682         }
683 
684         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
685         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
686 
687         setOperationAction(ISD::MLOAD, VT, Custom);
688         setOperationAction(ISD::MSTORE, VT, Custom);
689         setOperationAction(ISD::MGATHER, VT, Custom);
690         setOperationAction(ISD::MSCATTER, VT, Custom);
691         setOperationAction(ISD::ADD, VT, Custom);
692         setOperationAction(ISD::MUL, VT, Custom);
693         setOperationAction(ISD::SUB, VT, Custom);
694         setOperationAction(ISD::AND, VT, Custom);
695         setOperationAction(ISD::OR, VT, Custom);
696         setOperationAction(ISD::XOR, VT, Custom);
697         setOperationAction(ISD::SDIV, VT, Custom);
698         setOperationAction(ISD::SREM, VT, Custom);
699         setOperationAction(ISD::UDIV, VT, Custom);
700         setOperationAction(ISD::UREM, VT, Custom);
701         setOperationAction(ISD::SHL, VT, Custom);
702         setOperationAction(ISD::SRA, VT, Custom);
703         setOperationAction(ISD::SRL, VT, Custom);
704 
705         setOperationAction(ISD::SMIN, VT, Custom);
706         setOperationAction(ISD::SMAX, VT, Custom);
707         setOperationAction(ISD::UMIN, VT, Custom);
708         setOperationAction(ISD::UMAX, VT, Custom);
709         setOperationAction(ISD::ABS,  VT, Custom);
710 
711         setOperationAction(ISD::MULHS, VT, Custom);
712         setOperationAction(ISD::MULHU, VT, Custom);
713 
714         setOperationAction(ISD::VSELECT, VT, Custom);
715         setOperationAction(ISD::SELECT, VT, Expand);
716         setOperationAction(ISD::SELECT_CC, VT, Expand);
717 
718         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
719         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
720         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
721 
722         // Custom-lower reduction operations to set up the corresponding custom
723         // nodes' operands.
724         setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
725         setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
726         setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
727         setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
728         setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
729 
730         for (unsigned VPOpc : IntegerVPOps)
731           setOperationAction(VPOpc, VT, Custom);
732       }
733 
734       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
735         if (!useRVVForFixedLengthVectorVT(VT))
736           continue;
737 
738         // By default everything must be expanded.
739         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
740           setOperationAction(Op, VT, Expand);
741         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
742           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
743           setTruncStoreAction(VT, OtherVT, Expand);
744         }
745 
746         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
747         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
748         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
749 
750         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
751         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
752         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
753         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
754 
755         setOperationAction(ISD::LOAD, VT, Custom);
756         setOperationAction(ISD::STORE, VT, Custom);
757         setOperationAction(ISD::MLOAD, VT, Custom);
758         setOperationAction(ISD::MSTORE, VT, Custom);
759         setOperationAction(ISD::MGATHER, VT, Custom);
760         setOperationAction(ISD::MSCATTER, VT, Custom);
761         setOperationAction(ISD::FADD, VT, Custom);
762         setOperationAction(ISD::FSUB, VT, Custom);
763         setOperationAction(ISD::FMUL, VT, Custom);
764         setOperationAction(ISD::FDIV, VT, Custom);
765         setOperationAction(ISD::FNEG, VT, Custom);
766         setOperationAction(ISD::FABS, VT, Custom);
767         setOperationAction(ISD::FCOPYSIGN, VT, Custom);
768         setOperationAction(ISD::FSQRT, VT, Custom);
769         setOperationAction(ISD::FMA, VT, Custom);
770         setOperationAction(ISD::FMINNUM, VT, Custom);
771         setOperationAction(ISD::FMAXNUM, VT, Custom);
772 
773         setOperationAction(ISD::FP_ROUND, VT, Custom);
774         setOperationAction(ISD::FP_EXTEND, VT, Custom);
775 
776         for (auto CC : VFPCCToExpand)
777           setCondCodeAction(CC, VT, Expand);
778 
779         setOperationAction(ISD::VSELECT, VT, Custom);
780         setOperationAction(ISD::SELECT, VT, Expand);
781         setOperationAction(ISD::SELECT_CC, VT, Expand);
782 
783         setOperationAction(ISD::BITCAST, VT, Custom);
784 
785         setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
786         setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
787         setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
788         setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
789       }
790 
791       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
792       setOperationAction(ISD::BITCAST, MVT::i8, Custom);
793       setOperationAction(ISD::BITCAST, MVT::i16, Custom);
794       setOperationAction(ISD::BITCAST, MVT::i32, Custom);
795       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
796       setOperationAction(ISD::BITCAST, MVT::f16, Custom);
797       setOperationAction(ISD::BITCAST, MVT::f32, Custom);
798       setOperationAction(ISD::BITCAST, MVT::f64, Custom);
799     }
800   }
801 
802   // Function alignments.
803   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
804   setMinFunctionAlignment(FunctionAlignment);
805   setPrefFunctionAlignment(FunctionAlignment);
806 
807   setMinimumJumpTableEntries(5);
808 
809   // Jumps are expensive, compared to logic
810   setJumpIsExpensive();
811 
812   // We can use any register for comparisons
813   setHasMultipleConditionRegisters();
814 
815   setTargetDAGCombine(ISD::AND);
816   setTargetDAGCombine(ISD::OR);
817   setTargetDAGCombine(ISD::XOR);
818   if (Subtarget.hasStdExtV()) {
819     setTargetDAGCombine(ISD::FCOPYSIGN);
820     setTargetDAGCombine(ISD::MGATHER);
821     setTargetDAGCombine(ISD::MSCATTER);
822     setTargetDAGCombine(ISD::SRA);
823     setTargetDAGCombine(ISD::SRL);
824     setTargetDAGCombine(ISD::SHL);
825   }
826 }
827 
828 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
829                                             LLVMContext &Context,
830                                             EVT VT) const {
831   if (!VT.isVector())
832     return getPointerTy(DL);
833   if (Subtarget.hasStdExtV() &&
834       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
835     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
836   return VT.changeVectorElementTypeToInteger();
837 }
838 
839 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
840   return Subtarget.getXLenVT();
841 }
842 
843 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
844                                              const CallInst &I,
845                                              MachineFunction &MF,
846                                              unsigned Intrinsic) const {
847   switch (Intrinsic) {
848   default:
849     return false;
850   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
851   case Intrinsic::riscv_masked_atomicrmw_add_i32:
852   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
853   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
854   case Intrinsic::riscv_masked_atomicrmw_max_i32:
855   case Intrinsic::riscv_masked_atomicrmw_min_i32:
856   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
857   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
858   case Intrinsic::riscv_masked_cmpxchg_i32:
859     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
860     Info.opc = ISD::INTRINSIC_W_CHAIN;
861     Info.memVT = MVT::getVT(PtrTy->getElementType());
862     Info.ptrVal = I.getArgOperand(0);
863     Info.offset = 0;
864     Info.align = Align(4);
865     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
866                  MachineMemOperand::MOVolatile;
867     return true;
868   }
869 }
870 
871 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
872                                                 const AddrMode &AM, Type *Ty,
873                                                 unsigned AS,
874                                                 Instruction *I) const {
875   // No global is ever allowed as a base.
876   if (AM.BaseGV)
877     return false;
878 
879   // Require a 12-bit signed offset.
880   if (!isInt<12>(AM.BaseOffs))
881     return false;
882 
883   switch (AM.Scale) {
884   case 0: // "r+i" or just "i", depending on HasBaseReg.
885     break;
886   case 1:
887     if (!AM.HasBaseReg) // allow "r+i".
888       break;
889     return false; // disallow "r+r" or "r+r+i".
890   default:
891     return false;
892   }
893 
894   return true;
895 }
896 
897 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
898   return isInt<12>(Imm);
899 }
900 
901 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
902   return isInt<12>(Imm);
903 }
904 
905 // On RV32, 64-bit integers are split into their high and low parts and held
906 // in two different registers, so the trunc is free since the low register can
907 // just be used.
908 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
909   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
910     return false;
911   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
912   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
913   return (SrcBits == 64 && DestBits == 32);
914 }
915 
916 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
917   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
918       !SrcVT.isInteger() || !DstVT.isInteger())
919     return false;
920   unsigned SrcBits = SrcVT.getSizeInBits();
921   unsigned DestBits = DstVT.getSizeInBits();
922   return (SrcBits == 64 && DestBits == 32);
923 }
924 
925 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
926   // Zexts are free if they can be combined with a load.
927   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
928     EVT MemVT = LD->getMemoryVT();
929     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
930          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
931         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
932          LD->getExtensionType() == ISD::ZEXTLOAD))
933       return true;
934   }
935 
936   return TargetLowering::isZExtFree(Val, VT2);
937 }
938 
939 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
940   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
941 }
942 
943 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
944   return Subtarget.hasStdExtZbb();
945 }
946 
947 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
948   return Subtarget.hasStdExtZbb();
949 }
950 
951 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
952                                        bool ForCodeSize) const {
953   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
954     return false;
955   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
956     return false;
957   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
958     return false;
959   if (Imm.isNegZero())
960     return false;
961   return Imm.isZero();
962 }
963 
964 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
965   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
966          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
967          (VT == MVT::f64 && Subtarget.hasStdExtD());
968 }
969 
970 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
971                                                       CallingConv::ID CC,
972                                                       EVT VT) const {
973   // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
974   // end up using a GPR but that will be decided based on ABI.
975   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
976     return MVT::f32;
977 
978   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
979 }
980 
981 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
982                                                            CallingConv::ID CC,
983                                                            EVT VT) const {
984   // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
985   // end up using a GPR but that will be decided based on ABI.
986   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
987     return 1;
988 
989   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
990 }
991 
992 // Changes the condition code and swaps operands if necessary, so the SetCC
993 // operation matches one of the comparisons supported directly by branches
994 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
995 // with 1/-1.
996 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
997                                     ISD::CondCode &CC, SelectionDAG &DAG) {
998   // Convert X > -1 to X >= 0.
999   if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1000     RHS = DAG.getConstant(0, DL, RHS.getValueType());
1001     CC = ISD::SETGE;
1002     return;
1003   }
1004   // Convert X < 1 to 0 >= X.
1005   if (CC == ISD::SETLT && isOneConstant(RHS)) {
1006     RHS = LHS;
1007     LHS = DAG.getConstant(0, DL, RHS.getValueType());
1008     CC = ISD::SETGE;
1009     return;
1010   }
1011 
1012   switch (CC) {
1013   default:
1014     break;
1015   case ISD::SETGT:
1016   case ISD::SETLE:
1017   case ISD::SETUGT:
1018   case ISD::SETULE:
1019     CC = ISD::getSetCCSwappedOperands(CC);
1020     std::swap(LHS, RHS);
1021     break;
1022   }
1023 }
1024 
1025 // Return the RISC-V branch opcode that matches the given DAG integer
1026 // condition code. The CondCode must be one of those supported by the RISC-V
1027 // ISA (see translateSetCCForBranch).
1028 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
1029   switch (CC) {
1030   default:
1031     llvm_unreachable("Unsupported CondCode");
1032   case ISD::SETEQ:
1033     return RISCV::BEQ;
1034   case ISD::SETNE:
1035     return RISCV::BNE;
1036   case ISD::SETLT:
1037     return RISCV::BLT;
1038   case ISD::SETGE:
1039     return RISCV::BGE;
1040   case ISD::SETULT:
1041     return RISCV::BLTU;
1042   case ISD::SETUGE:
1043     return RISCV::BGEU;
1044   }
1045 }
1046 
1047 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
1048   assert(VT.isScalableVector() && "Expecting a scalable vector type");
1049   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1050   if (VT.getVectorElementType() == MVT::i1)
1051     KnownSize *= 8;
1052 
1053   switch (KnownSize) {
1054   default:
1055     llvm_unreachable("Invalid LMUL.");
1056   case 8:
1057     return RISCVII::VLMUL::LMUL_F8;
1058   case 16:
1059     return RISCVII::VLMUL::LMUL_F4;
1060   case 32:
1061     return RISCVII::VLMUL::LMUL_F2;
1062   case 64:
1063     return RISCVII::VLMUL::LMUL_1;
1064   case 128:
1065     return RISCVII::VLMUL::LMUL_2;
1066   case 256:
1067     return RISCVII::VLMUL::LMUL_4;
1068   case 512:
1069     return RISCVII::VLMUL::LMUL_8;
1070   }
1071 }
1072 
1073 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
1074   switch (LMul) {
1075   default:
1076     llvm_unreachable("Invalid LMUL.");
1077   case RISCVII::VLMUL::LMUL_F8:
1078   case RISCVII::VLMUL::LMUL_F4:
1079   case RISCVII::VLMUL::LMUL_F2:
1080   case RISCVII::VLMUL::LMUL_1:
1081     return RISCV::VRRegClassID;
1082   case RISCVII::VLMUL::LMUL_2:
1083     return RISCV::VRM2RegClassID;
1084   case RISCVII::VLMUL::LMUL_4:
1085     return RISCV::VRM4RegClassID;
1086   case RISCVII::VLMUL::LMUL_8:
1087     return RISCV::VRM8RegClassID;
1088   }
1089 }
1090 
1091 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
1092   RISCVII::VLMUL LMUL = getLMUL(VT);
1093   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1094       LMUL == RISCVII::VLMUL::LMUL_F4 ||
1095       LMUL == RISCVII::VLMUL::LMUL_F2 ||
1096       LMUL == RISCVII::VLMUL::LMUL_1) {
1097     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1098                   "Unexpected subreg numbering");
1099     return RISCV::sub_vrm1_0 + Index;
1100   }
1101   if (LMUL == RISCVII::VLMUL::LMUL_2) {
1102     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1103                   "Unexpected subreg numbering");
1104     return RISCV::sub_vrm2_0 + Index;
1105   }
1106   if (LMUL == RISCVII::VLMUL::LMUL_4) {
1107     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1108                   "Unexpected subreg numbering");
1109     return RISCV::sub_vrm4_0 + Index;
1110   }
1111   llvm_unreachable("Invalid vector type.");
1112 }
1113 
1114 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
1115   if (VT.getVectorElementType() == MVT::i1)
1116     return RISCV::VRRegClassID;
1117   return getRegClassIDForLMUL(getLMUL(VT));
1118 }
1119 
1120 // Attempt to decompose a subvector insert/extract between VecVT and
1121 // SubVecVT via subregister indices. Returns the subregister index that
1122 // can perform the subvector insert/extract with the given element index, as
1123 // well as the index corresponding to any leftover subvectors that must be
1124 // further inserted/extracted within the register class for SubVecVT.
1125 std::pair<unsigned, unsigned>
1126 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1127     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1128     const RISCVRegisterInfo *TRI) {
1129   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1130                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1131                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1132                 "Register classes not ordered");
1133   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1134   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1135   // Try to compose a subregister index that takes us from the incoming
1136   // LMUL>1 register class down to the outgoing one. At each step we half
1137   // the LMUL:
1138   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1139   // Note that this is not guaranteed to find a subregister index, such as
1140   // when we are extracting from one VR type to another.
1141   unsigned SubRegIdx = RISCV::NoSubRegister;
1142   for (const unsigned RCID :
1143        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1144     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1145       VecVT = VecVT.getHalfNumVectorElementsVT();
1146       bool IsHi =
1147           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1148       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1149                                             getSubregIndexByMVT(VecVT, IsHi));
1150       if (IsHi)
1151         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1152     }
1153   return {SubRegIdx, InsertExtractIdx};
1154 }
1155 
1156 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1157 // stores for those types.
1158 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1159   return !Subtarget.useRVVForFixedLengthVectors() ||
1160          (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
1161 }
1162 
1163 static bool useRVVForFixedLengthVectorVT(MVT VT,
1164                                          const RISCVSubtarget &Subtarget) {
1165   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1166   if (!Subtarget.useRVVForFixedLengthVectors())
1167     return false;
1168 
1169   // We only support a set of vector types with an equivalent number of
1170   // elements to avoid legalization issues. Therefore -- since we don't have
1171   // v512i8/v512i16/etc -- the longest fixed-length vector type we support has
1172   // 256 elements.
1173   if (VT.getVectorNumElements() > 256)
1174     return false;
1175 
1176   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1177 
1178   // Don't use RVV for vectors we cannot scalarize if required.
1179   switch (VT.getVectorElementType().SimpleTy) {
1180   // i1 is supported but has different rules.
1181   default:
1182     return false;
1183   case MVT::i1:
1184     // Masks can only use a single register.
1185     if (VT.getVectorNumElements() > MinVLen)
1186       return false;
1187     MinVLen /= 8;
1188     break;
1189   case MVT::i8:
1190   case MVT::i16:
1191   case MVT::i32:
1192   case MVT::i64:
1193     break;
1194   case MVT::f16:
1195     if (!Subtarget.hasStdExtZfh())
1196       return false;
1197     break;
1198   case MVT::f32:
1199     if (!Subtarget.hasStdExtF())
1200       return false;
1201     break;
1202   case MVT::f64:
1203     if (!Subtarget.hasStdExtD())
1204       return false;
1205     break;
1206   }
1207 
1208   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1209   // Don't use RVV for types that don't fit.
1210   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1211     return false;
1212 
1213   // TODO: Perhaps an artificial restriction, but worth having whilst getting
1214   // the base fixed length RVV support in place.
1215   if (!VT.isPow2VectorType())
1216     return false;
1217 
1218   return true;
1219 }
1220 
1221 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1222   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1223 }
1224 
1225 // Return the largest legal scalable vector type that matches VT's element type.
1226 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
1227                                             const RISCVSubtarget &Subtarget) {
1228   // This may be called before legal types are setup.
1229   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1230           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1231          "Expected legal fixed length vector!");
1232 
1233   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1234 
1235   MVT EltVT = VT.getVectorElementType();
1236   switch (EltVT.SimpleTy) {
1237   default:
1238     llvm_unreachable("unexpected element type for RVV container");
1239   case MVT::i1:
1240   case MVT::i8:
1241   case MVT::i16:
1242   case MVT::i32:
1243   case MVT::i64:
1244   case MVT::f16:
1245   case MVT::f32:
1246   case MVT::f64: {
1247     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1248     // narrower types, but we can't have a fractional LMUL with demoninator less
1249     // than 64/SEW.
1250     unsigned NumElts =
1251         divideCeil(VT.getVectorNumElements(), MinVLen / RISCV::RVVBitsPerBlock);
1252     return MVT::getScalableVectorVT(EltVT, NumElts);
1253   }
1254   }
1255 }
1256 
1257 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
1258                                             const RISCVSubtarget &Subtarget) {
1259   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1260                                           Subtarget);
1261 }
1262 
1263 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1264   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
1265 }
1266 
1267 // Grow V to consume an entire RVV register.
1268 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1269                                        const RISCVSubtarget &Subtarget) {
1270   assert(VT.isScalableVector() &&
1271          "Expected to convert into a scalable vector!");
1272   assert(V.getValueType().isFixedLengthVector() &&
1273          "Expected a fixed length vector operand!");
1274   SDLoc DL(V);
1275   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1276   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1277 }
1278 
1279 // Shrink V so it's just big enough to maintain a VT's worth of data.
1280 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1281                                          const RISCVSubtarget &Subtarget) {
1282   assert(VT.isFixedLengthVector() &&
1283          "Expected to convert into a fixed length vector!");
1284   assert(V.getValueType().isScalableVector() &&
1285          "Expected a scalable vector operand!");
1286   SDLoc DL(V);
1287   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1288   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1289 }
1290 
1291 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1292 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1293 // the vector type that it is contained in.
1294 static std::pair<SDValue, SDValue>
1295 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1296                 const RISCVSubtarget &Subtarget) {
1297   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1298   MVT XLenVT = Subtarget.getXLenVT();
1299   SDValue VL = VecVT.isFixedLengthVector()
1300                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1301                    : DAG.getRegister(RISCV::X0, XLenVT);
1302   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1303   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1304   return {Mask, VL};
1305 }
1306 
1307 // As above but assuming the given type is a scalable vector type.
1308 static std::pair<SDValue, SDValue>
1309 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1310                         const RISCVSubtarget &Subtarget) {
1311   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1312   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1313 }
1314 
1315 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1316 // of either is (currently) supported. This can get us into an infinite loop
1317 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1318 // as a ..., etc.
1319 // Until either (or both) of these can reliably lower any node, reporting that
1320 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1321 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1322 // which is not desirable.
1323 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1324     EVT VT, unsigned DefinedValues) const {
1325   return false;
1326 }
1327 
1328 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1329   // Only splats are currently supported.
1330   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1331     return true;
1332 
1333   return false;
1334 }
1335 
1336 static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
1337                                  const RISCVSubtarget &Subtarget) {
1338   MVT VT = Op.getSimpleValueType();
1339   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1340 
1341   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1342 
1343   SDLoc DL(Op);
1344   SDValue Mask, VL;
1345   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1346 
1347   unsigned Opc =
1348       VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
1349   SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
1350   return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1351 }
1352 
1353 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
1354                                  const RISCVSubtarget &Subtarget) {
1355   MVT VT = Op.getSimpleValueType();
1356   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1357 
1358   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1359 
1360   SDLoc DL(Op);
1361   SDValue Mask, VL;
1362   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1363 
1364   MVT XLenVT = Subtarget.getXLenVT();
1365   unsigned NumElts = Op.getNumOperands();
1366 
1367   if (VT.getVectorElementType() == MVT::i1) {
1368     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1369       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1370       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1371     }
1372 
1373     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1374       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1375       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1376     }
1377 
1378     // Lower constant mask BUILD_VECTORs via an integer vector type, in
1379     // scalar integer chunks whose bit-width depends on the number of mask
1380     // bits and XLEN.
1381     // First, determine the most appropriate scalar integer type to use. This
1382     // is at most XLenVT, but may be shrunk to a smaller vector element type
1383     // according to the size of the final vector - use i8 chunks rather than
1384     // XLenVT if we're producing a v8i1. This results in more consistent
1385     // codegen across RV32 and RV64.
1386     unsigned NumViaIntegerBits =
1387         std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1388     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1389       // If we have to use more than one INSERT_VECTOR_ELT then this
1390       // optimization is likely to increase code size; avoid peforming it in
1391       // such a case. We can use a load from a constant pool in this case.
1392       if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1393         return SDValue();
1394       // Now we can create our integer vector type. Note that it may be larger
1395       // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1396       MVT IntegerViaVecVT =
1397           MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1398                            divideCeil(NumElts, NumViaIntegerBits));
1399 
1400       uint64_t Bits = 0;
1401       unsigned BitPos = 0, IntegerEltIdx = 0;
1402       SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1403 
1404       for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1405         // Once we accumulate enough bits to fill our scalar type, insert into
1406         // our vector and clear our accumulated data.
1407         if (I != 0 && I % NumViaIntegerBits == 0) {
1408           if (NumViaIntegerBits <= 32)
1409             Bits = SignExtend64(Bits, 32);
1410           SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1411           Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1412                             Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1413           Bits = 0;
1414           BitPos = 0;
1415           IntegerEltIdx++;
1416         }
1417         SDValue V = Op.getOperand(I);
1418         bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
1419         Bits |= ((uint64_t)BitValue << BitPos);
1420       }
1421 
1422       // Insert the (remaining) scalar value into position in our integer
1423       // vector type.
1424       if (NumViaIntegerBits <= 32)
1425         Bits = SignExtend64(Bits, 32);
1426       SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1427       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
1428                         DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1429 
1430       if (NumElts < NumViaIntegerBits) {
1431         // If we're producing a smaller vector than our minimum legal integer
1432         // type, bitcast to the equivalent (known-legal) mask type, and extract
1433         // our final mask.
1434         assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
1435         Vec = DAG.getBitcast(MVT::v8i1, Vec);
1436         Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
1437                           DAG.getConstant(0, DL, XLenVT));
1438       } else {
1439         // Else we must have produced an integer type with the same size as the
1440         // mask type; bitcast for the final result.
1441         assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
1442         Vec = DAG.getBitcast(VT, Vec);
1443       }
1444 
1445       return Vec;
1446     }
1447 
1448     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
1449     // vector type, we have a legal equivalently-sized i8 type, so we can use
1450     // that.
1451     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
1452     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
1453 
1454     SDValue WideVec;
1455     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1456       // For a splat, perform a scalar truncate before creating the wider
1457       // vector.
1458       assert(Splat.getValueType() == XLenVT &&
1459              "Unexpected type for i1 splat value");
1460       Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
1461                           DAG.getConstant(1, DL, XLenVT));
1462       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
1463     } else {
1464       SmallVector<SDValue, 8> Ops(Op->op_values());
1465       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
1466       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
1467       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
1468     }
1469 
1470     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
1471   }
1472 
1473   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1474     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1475                                         : RISCVISD::VMV_V_X_VL;
1476     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1477     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1478   }
1479 
1480   // Try and match an index sequence, which we can lower directly to the vid
1481   // instruction. An all-undef vector is matched by getSplatValue, above.
1482   if (VT.isInteger()) {
1483     bool IsVID = true;
1484     for (unsigned I = 0; I < NumElts && IsVID; I++)
1485       IsVID &= Op.getOperand(I).isUndef() ||
1486                (isa<ConstantSDNode>(Op.getOperand(I)) &&
1487                 Op.getConstantOperandVal(I) == I);
1488 
1489     if (IsVID) {
1490       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1491       return convertFromScalableVector(VT, VID, DAG, Subtarget);
1492     }
1493   }
1494 
1495   // Attempt to detect "hidden" splats, which only reveal themselves as splats
1496   // when re-interpreted as a vector with a larger element type. For example,
1497   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
1498   // could be instead splat as
1499   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
1500   // TODO: This optimization could also work on non-constant splats, but it
1501   // would require bit-manipulation instructions to construct the splat value.
1502   SmallVector<SDValue> Sequence;
1503   unsigned EltBitSize = VT.getScalarSizeInBits();
1504   const auto *BV = cast<BuildVectorSDNode>(Op);
1505   if (VT.isInteger() && EltBitSize < 64 &&
1506       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
1507       BV->getRepeatedSequence(Sequence) &&
1508       (Sequence.size() * EltBitSize) <= 64) {
1509     unsigned SeqLen = Sequence.size();
1510     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
1511     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
1512     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
1513             ViaIntVT == MVT::i64) &&
1514            "Unexpected sequence type");
1515 
1516     unsigned EltIdx = 0;
1517     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
1518     uint64_t SplatValue = 0;
1519     // Construct the amalgamated value which can be splatted as this larger
1520     // vector type.
1521     for (const auto &SeqV : Sequence) {
1522       if (!SeqV.isUndef())
1523         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
1524                        << (EltIdx * EltBitSize));
1525       EltIdx++;
1526     }
1527 
1528     // On RV64, sign-extend from 32 to 64 bits where possible in order to
1529     // achieve better constant materializion.
1530     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
1531       SplatValue = SignExtend64(SplatValue, 32);
1532 
1533     // Since we can't introduce illegal i64 types at this stage, we can only
1534     // perform an i64 splat on RV32 if it is its own sign-extended value. That
1535     // way we can use RVV instructions to splat.
1536     assert((ViaIntVT.bitsLE(XLenVT) ||
1537             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
1538            "Unexpected bitcast sequence");
1539     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
1540       SDValue ViaVL =
1541           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
1542       MVT ViaContainerVT =
1543           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
1544       SDValue Splat =
1545           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
1546                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
1547       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
1548       return DAG.getBitcast(VT, Splat);
1549     }
1550   }
1551 
1552   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
1553   // which constitute a large proportion of the elements. In such cases we can
1554   // splat a vector with the dominant element and make up the shortfall with
1555   // INSERT_VECTOR_ELTs.
1556   // Note that this includes vectors of 2 elements by association. The
1557   // upper-most element is the "dominant" one, allowing us to use a splat to
1558   // "insert" the upper element, and an insert of the lower element at position
1559   // 0, which improves codegen.
1560   SDValue DominantValue;
1561   unsigned MostCommonCount = 0;
1562   DenseMap<SDValue, unsigned> ValueCounts;
1563   unsigned NumUndefElts =
1564       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
1565 
1566   for (SDValue V : Op->op_values()) {
1567     if (V.isUndef())
1568       continue;
1569 
1570     ValueCounts.insert(std::make_pair(V, 0));
1571     unsigned &Count = ValueCounts[V];
1572 
1573     // Is this value dominant? In case of a tie, prefer the highest element as
1574     // it's cheaper to insert near the beginning of a vector than it is at the
1575     // end.
1576     if (++Count >= MostCommonCount) {
1577       DominantValue = V;
1578       MostCommonCount = Count;
1579     }
1580   }
1581 
1582   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
1583   unsigned NumDefElts = NumElts - NumUndefElts;
1584   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
1585 
1586   // Don't perform this optimization when optimizing for size, since
1587   // materializing elements and inserting them tends to cause code bloat.
1588   if (!DAG.shouldOptForSize() &&
1589       ((MostCommonCount > DominantValueCountThreshold) ||
1590        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
1591     // Start by splatting the most common element.
1592     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
1593 
1594     DenseSet<SDValue> Processed{DominantValue};
1595     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
1596     for (const auto &OpIdx : enumerate(Op->ops())) {
1597       const SDValue &V = OpIdx.value();
1598       if (V.isUndef() || !Processed.insert(V).second)
1599         continue;
1600       if (ValueCounts[V] == 1) {
1601         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
1602                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
1603       } else {
1604         // Blend in all instances of this value using a VSELECT, using a
1605         // mask where each bit signals whether that element is the one
1606         // we're after.
1607         SmallVector<SDValue> Ops;
1608         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
1609           return DAG.getConstant(V == V1, DL, XLenVT);
1610         });
1611         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
1612                           DAG.getBuildVector(SelMaskTy, DL, Ops),
1613                           DAG.getSplatBuildVector(VT, DL, V), Vec);
1614       }
1615     }
1616 
1617     return Vec;
1618   }
1619 
1620   return SDValue();
1621 }
1622 
1623 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
1624                                    SDValue Hi, SDValue VL, SelectionDAG &DAG) {
1625   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
1626     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
1627     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
1628     // If Hi constant is all the same sign bit as Lo, lower this as a custom
1629     // node in order to try and match RVV vector/scalar instructions.
1630     if ((LoC >> 31) == HiC)
1631       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
1632   }
1633 
1634   // Fall back to a stack store and stride x0 vector load.
1635   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
1636 }
1637 
1638 // Called by type legalization to handle splat of i64 on RV32.
1639 // FIXME: We can optimize this when the type has sign or zero bits in one
1640 // of the halves.
1641 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
1642                                    SDValue VL, SelectionDAG &DAG) {
1643   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
1644   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
1645                            DAG.getConstant(0, DL, MVT::i32));
1646   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
1647                            DAG.getConstant(1, DL, MVT::i32));
1648   return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
1649 }
1650 
1651 // This function lowers a splat of a scalar operand Splat with the vector
1652 // length VL. It ensures the final sequence is type legal, which is useful when
1653 // lowering a splat after type legalization.
1654 static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
1655                                 SelectionDAG &DAG,
1656                                 const RISCVSubtarget &Subtarget) {
1657   if (VT.isFloatingPoint())
1658     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
1659 
1660   MVT XLenVT = Subtarget.getXLenVT();
1661 
1662   // Simplest case is that the operand needs to be promoted to XLenVT.
1663   if (Scalar.getValueType().bitsLE(XLenVT)) {
1664     // If the operand is a constant, sign extend to increase our chances
1665     // of being able to use a .vi instruction. ANY_EXTEND would become a
1666     // a zero extend and the simm5 check in isel would fail.
1667     // FIXME: Should we ignore the upper bits in isel instead?
1668     unsigned ExtOpc =
1669         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
1670     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
1671     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
1672   }
1673 
1674   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
1675          "Unexpected scalar for splat lowering!");
1676 
1677   // Otherwise use the more complicated splatting algorithm.
1678   return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
1679 }
1680 
1681 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
1682                                    const RISCVSubtarget &Subtarget) {
1683   SDValue V1 = Op.getOperand(0);
1684   SDValue V2 = Op.getOperand(1);
1685   SDLoc DL(Op);
1686   MVT XLenVT = Subtarget.getXLenVT();
1687   MVT VT = Op.getSimpleValueType();
1688   unsigned NumElts = VT.getVectorNumElements();
1689   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
1690 
1691   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1692 
1693   SDValue TrueMask, VL;
1694   std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1695 
1696   if (SVN->isSplat()) {
1697     const int Lane = SVN->getSplatIndex();
1698     if (Lane >= 0) {
1699       MVT SVT = VT.getVectorElementType();
1700 
1701       // Turn splatted vector load into a strided load with an X0 stride.
1702       SDValue V = V1;
1703       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
1704       // with undef.
1705       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
1706       int Offset = Lane;
1707       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
1708         int OpElements =
1709             V.getOperand(0).getSimpleValueType().getVectorNumElements();
1710         V = V.getOperand(Offset / OpElements);
1711         Offset %= OpElements;
1712       }
1713 
1714       // We need to ensure the load isn't atomic or volatile.
1715       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
1716         auto *Ld = cast<LoadSDNode>(V);
1717         Offset *= SVT.getStoreSize();
1718         SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
1719                                                    TypeSize::Fixed(Offset), DL);
1720 
1721         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
1722         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
1723           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
1724           SDValue IntID =
1725               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
1726           SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
1727                            DAG.getRegister(RISCV::X0, XLenVT), VL};
1728           SDValue NewLoad = DAG.getMemIntrinsicNode(
1729               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
1730               DAG.getMachineFunction().getMachineMemOperand(
1731                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
1732           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
1733           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
1734         }
1735 
1736         // Otherwise use a scalar load and splat. This will give the best
1737         // opportunity to fold a splat into the operation. ISel can turn it into
1738         // the x0 strided load if we aren't able to fold away the select.
1739         if (SVT.isFloatingPoint())
1740           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
1741                           Ld->getPointerInfo().getWithOffset(Offset),
1742                           Ld->getOriginalAlign(),
1743                           Ld->getMemOperand()->getFlags());
1744         else
1745           V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
1746                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
1747                              Ld->getOriginalAlign(),
1748                              Ld->getMemOperand()->getFlags());
1749         DAG.makeEquivalentMemoryOrdering(Ld, V);
1750 
1751         unsigned Opc =
1752             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
1753         SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
1754         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1755       }
1756 
1757       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
1758       assert(Lane < (int)NumElts && "Unexpected lane!");
1759       SDValue Gather =
1760           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
1761                       DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
1762       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1763     }
1764   }
1765 
1766   // Detect shuffles which can be re-expressed as vector selects; these are
1767   // shuffles in which each element in the destination is taken from an element
1768   // at the corresponding index in either source vectors.
1769   bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
1770     int MaskIndex = MaskIdx.value();
1771     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
1772   });
1773 
1774   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
1775 
1776   SmallVector<SDValue> MaskVals;
1777   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
1778   // merged with a second vrgather.
1779   SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
1780 
1781   // By default we preserve the original operand order, and use a mask to
1782   // select LHS as true and RHS as false. However, since RVV vector selects may
1783   // feature splats but only on the LHS, we may choose to invert our mask and
1784   // instead select between RHS and LHS.
1785   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
1786   bool InvertMask = IsSelect == SwapOps;
1787 
1788   // Now construct the mask that will be used by the vselect or blended
1789   // vrgather operation. For vrgathers, construct the appropriate indices into
1790   // each vector.
1791   for (int MaskIndex : SVN->getMask()) {
1792     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
1793     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
1794     if (!IsSelect) {
1795       bool IsLHS = MaskIndex < (int)NumElts;
1796       // For "undef" elements of -1, shuffle in element 0 instead.
1797       GatherIndicesLHS.push_back(
1798           DAG.getConstant(IsLHS ? std::max(MaskIndex, 0) : 0, DL, XLenVT));
1799       // TODO: If we're masking out unused elements anyway, it might produce
1800       // better code if we use the most-common element index instead of 0.
1801       GatherIndicesRHS.push_back(
1802           DAG.getConstant(IsLHS ? 0 : MaskIndex - NumElts, DL, XLenVT));
1803     }
1804   }
1805 
1806   if (SwapOps) {
1807     std::swap(V1, V2);
1808     std::swap(GatherIndicesLHS, GatherIndicesRHS);
1809   }
1810 
1811   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
1812   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
1813   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
1814 
1815   if (IsSelect)
1816     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
1817 
1818   if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
1819     // On such a large vector we're unable to use i8 as the index type.
1820     // FIXME: We could promote the index to i16 and use vrgatherei16, but that
1821     // may involve vector splitting if we're already at LMUL=8, or our
1822     // user-supplied maximum fixed-length LMUL.
1823     return SDValue();
1824   }
1825 
1826   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
1827   MVT IndexVT = VT.changeTypeToInteger();
1828   // Since we can't introduce illegal index types at this stage, use i16 and
1829   // vrgatherei16 if the corresponding index type for plain vrgather is greater
1830   // than XLenVT.
1831   if (IndexVT.getScalarType().bitsGT(XLenVT)) {
1832     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
1833     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
1834   }
1835 
1836   MVT IndexContainerVT =
1837       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
1838 
1839   SDValue Gather;
1840   // TODO: This doesn't trigger for i64 vectors on RV32, since there we
1841   // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
1842   if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
1843     Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
1844   } else {
1845     SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
1846     LHSIndices =
1847         convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
1848 
1849     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
1850     Gather =
1851         DAG.getNode(GatherOpc, DL, ContainerVT, V1, LHSIndices, TrueMask, VL);
1852   }
1853 
1854   // If a second vector operand is used by this shuffle, blend it in with an
1855   // additional vrgather.
1856   if (!V2.isUndef()) {
1857     MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
1858     SelectMask =
1859         convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
1860 
1861     SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
1862     RHSIndices =
1863         convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
1864 
1865     V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
1866     V2 = DAG.getNode(GatherOpc, DL, ContainerVT, V2, RHSIndices, TrueMask, VL);
1867     Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
1868                          Gather, VL);
1869   }
1870 
1871   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1872 }
1873 
1874 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
1875                                      SDLoc DL, SelectionDAG &DAG,
1876                                      const RISCVSubtarget &Subtarget) {
1877   if (VT.isScalableVector())
1878     return DAG.getFPExtendOrRound(Op, DL, VT);
1879   assert(VT.isFixedLengthVector() &&
1880          "Unexpected value type for RVV FP extend/round lowering");
1881   SDValue Mask, VL;
1882   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1883   unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
1884                         ? RISCVISD::FP_EXTEND_VL
1885                         : RISCVISD::FP_ROUND_VL;
1886   return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
1887 }
1888 
1889 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
1890                                             SelectionDAG &DAG) const {
1891   switch (Op.getOpcode()) {
1892   default:
1893     report_fatal_error("unimplemented operand");
1894   case ISD::GlobalAddress:
1895     return lowerGlobalAddress(Op, DAG);
1896   case ISD::BlockAddress:
1897     return lowerBlockAddress(Op, DAG);
1898   case ISD::ConstantPool:
1899     return lowerConstantPool(Op, DAG);
1900   case ISD::JumpTable:
1901     return lowerJumpTable(Op, DAG);
1902   case ISD::GlobalTLSAddress:
1903     return lowerGlobalTLSAddress(Op, DAG);
1904   case ISD::SELECT:
1905     return lowerSELECT(Op, DAG);
1906   case ISD::BRCOND:
1907     return lowerBRCOND(Op, DAG);
1908   case ISD::VASTART:
1909     return lowerVASTART(Op, DAG);
1910   case ISD::FRAMEADDR:
1911     return lowerFRAMEADDR(Op, DAG);
1912   case ISD::RETURNADDR:
1913     return lowerRETURNADDR(Op, DAG);
1914   case ISD::SHL_PARTS:
1915     return lowerShiftLeftParts(Op, DAG);
1916   case ISD::SRA_PARTS:
1917     return lowerShiftRightParts(Op, DAG, true);
1918   case ISD::SRL_PARTS:
1919     return lowerShiftRightParts(Op, DAG, false);
1920   case ISD::BITCAST: {
1921     SDLoc DL(Op);
1922     EVT VT = Op.getValueType();
1923     SDValue Op0 = Op.getOperand(0);
1924     EVT Op0VT = Op0.getValueType();
1925     MVT XLenVT = Subtarget.getXLenVT();
1926     if (VT.isFixedLengthVector()) {
1927       // We can handle fixed length vector bitcasts with a simple replacement
1928       // in isel.
1929       if (Op0VT.isFixedLengthVector())
1930         return Op;
1931       // When bitcasting from scalar to fixed-length vector, insert the scalar
1932       // into a one-element vector of the result type, and perform a vector
1933       // bitcast.
1934       if (!Op0VT.isVector()) {
1935         auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
1936         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
1937                                               DAG.getUNDEF(BVT), Op0,
1938                                               DAG.getConstant(0, DL, XLenVT)));
1939       }
1940       return SDValue();
1941     }
1942     // Custom-legalize bitcasts from fixed-length vector types to scalar types
1943     // thus: bitcast the vector to a one-element vector type whose element type
1944     // is the same as the result type, and extract the first element.
1945     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
1946       LLVMContext &Context = *DAG.getContext();
1947       SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
1948       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
1949                          DAG.getConstant(0, DL, XLenVT));
1950     }
1951     if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
1952       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
1953       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
1954       return FPConv;
1955     }
1956     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
1957         Subtarget.hasStdExtF()) {
1958       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1959       SDValue FPConv =
1960           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
1961       return FPConv;
1962     }
1963     return SDValue();
1964   }
1965   case ISD::INTRINSIC_WO_CHAIN:
1966     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
1967   case ISD::INTRINSIC_W_CHAIN:
1968     return LowerINTRINSIC_W_CHAIN(Op, DAG);
1969   case ISD::BSWAP:
1970   case ISD::BITREVERSE: {
1971     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
1972     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1973     MVT VT = Op.getSimpleValueType();
1974     SDLoc DL(Op);
1975     // Start with the maximum immediate value which is the bitwidth - 1.
1976     unsigned Imm = VT.getSizeInBits() - 1;
1977     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
1978     if (Op.getOpcode() == ISD::BSWAP)
1979       Imm &= ~0x7U;
1980     return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
1981                        DAG.getConstant(Imm, DL, VT));
1982   }
1983   case ISD::FSHL:
1984   case ISD::FSHR: {
1985     MVT VT = Op.getSimpleValueType();
1986     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
1987     SDLoc DL(Op);
1988     if (Op.getOperand(2).getOpcode() == ISD::Constant)
1989       return Op;
1990     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
1991     // use log(XLen) bits. Mask the shift amount accordingly.
1992     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
1993     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
1994                                 DAG.getConstant(ShAmtWidth, DL, VT));
1995     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
1996     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
1997   }
1998   case ISD::TRUNCATE: {
1999     SDLoc DL(Op);
2000     MVT VT = Op.getSimpleValueType();
2001     // Only custom-lower vector truncates
2002     if (!VT.isVector())
2003       return Op;
2004 
2005     // Truncates to mask types are handled differently
2006     if (VT.getVectorElementType() == MVT::i1)
2007       return lowerVectorMaskTrunc(Op, DAG);
2008 
2009     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
2010     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
2011     // truncate by one power of two at a time.
2012     MVT DstEltVT = VT.getVectorElementType();
2013 
2014     SDValue Src = Op.getOperand(0);
2015     MVT SrcVT = Src.getSimpleValueType();
2016     MVT SrcEltVT = SrcVT.getVectorElementType();
2017 
2018     assert(DstEltVT.bitsLT(SrcEltVT) &&
2019            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
2020            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
2021            "Unexpected vector truncate lowering");
2022 
2023     MVT ContainerVT = SrcVT;
2024     if (SrcVT.isFixedLengthVector()) {
2025       ContainerVT = getContainerForFixedLengthVector(SrcVT);
2026       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2027     }
2028 
2029     SDValue Result = Src;
2030     SDValue Mask, VL;
2031     std::tie(Mask, VL) =
2032         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
2033     LLVMContext &Context = *DAG.getContext();
2034     const ElementCount Count = ContainerVT.getVectorElementCount();
2035     do {
2036       SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2037       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
2038       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
2039                            Mask, VL);
2040     } while (SrcEltVT != DstEltVT);
2041 
2042     if (SrcVT.isFixedLengthVector())
2043       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
2044 
2045     return Result;
2046   }
2047   case ISD::ANY_EXTEND:
2048   case ISD::ZERO_EXTEND:
2049     if (Op.getOperand(0).getValueType().isVector() &&
2050         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2051       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
2052     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
2053   case ISD::SIGN_EXTEND:
2054     if (Op.getOperand(0).getValueType().isVector() &&
2055         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2056       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
2057     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
2058   case ISD::SPLAT_VECTOR_PARTS:
2059     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
2060   case ISD::INSERT_VECTOR_ELT:
2061     return lowerINSERT_VECTOR_ELT(Op, DAG);
2062   case ISD::EXTRACT_VECTOR_ELT:
2063     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
2064   case ISD::VSCALE: {
2065     MVT VT = Op.getSimpleValueType();
2066     SDLoc DL(Op);
2067     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
2068     // We define our scalable vector types for lmul=1 to use a 64 bit known
2069     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
2070     // vscale as VLENB / 8.
2071     assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!");
2072     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
2073                                  DAG.getConstant(3, DL, VT));
2074     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
2075   }
2076   case ISD::FP_EXTEND: {
2077     // RVV can only do fp_extend to types double the size as the source. We
2078     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
2079     // via f32.
2080     SDLoc DL(Op);
2081     MVT VT = Op.getSimpleValueType();
2082     SDValue Src = Op.getOperand(0);
2083     MVT SrcVT = Src.getSimpleValueType();
2084 
2085     // Prepare any fixed-length vector operands.
2086     MVT ContainerVT = VT;
2087     if (SrcVT.isFixedLengthVector()) {
2088       ContainerVT = getContainerForFixedLengthVector(VT);
2089       MVT SrcContainerVT =
2090           ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
2091       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2092     }
2093 
2094     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
2095         SrcVT.getVectorElementType() != MVT::f16) {
2096       // For scalable vectors, we only need to close the gap between
2097       // vXf16->vXf64.
2098       if (!VT.isFixedLengthVector())
2099         return Op;
2100       // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
2101       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2102       return convertFromScalableVector(VT, Src, DAG, Subtarget);
2103     }
2104 
2105     MVT InterVT = VT.changeVectorElementType(MVT::f32);
2106     MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
2107     SDValue IntermediateExtend = getRVVFPExtendOrRound(
2108         Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
2109 
2110     SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
2111                                            DL, DAG, Subtarget);
2112     if (VT.isFixedLengthVector())
2113       return convertFromScalableVector(VT, Extend, DAG, Subtarget);
2114     return Extend;
2115   }
2116   case ISD::FP_ROUND: {
2117     // RVV can only do fp_round to types half the size as the source. We
2118     // custom-lower f64->f16 rounds via RVV's round-to-odd float
2119     // conversion instruction.
2120     SDLoc DL(Op);
2121     MVT VT = Op.getSimpleValueType();
2122     SDValue Src = Op.getOperand(0);
2123     MVT SrcVT = Src.getSimpleValueType();
2124 
2125     // Prepare any fixed-length vector operands.
2126     MVT ContainerVT = VT;
2127     if (VT.isFixedLengthVector()) {
2128       MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2129       ContainerVT =
2130           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2131       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2132     }
2133 
2134     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
2135         SrcVT.getVectorElementType() != MVT::f64) {
2136       // For scalable vectors, we only need to close the gap between
2137       // vXf64<->vXf16.
2138       if (!VT.isFixedLengthVector())
2139         return Op;
2140       // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
2141       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2142       return convertFromScalableVector(VT, Src, DAG, Subtarget);
2143     }
2144 
2145     SDValue Mask, VL;
2146     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2147 
2148     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
2149     SDValue IntermediateRound =
2150         DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
2151     SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
2152                                           DL, DAG, Subtarget);
2153 
2154     if (VT.isFixedLengthVector())
2155       return convertFromScalableVector(VT, Round, DAG, Subtarget);
2156     return Round;
2157   }
2158   case ISD::FP_TO_SINT:
2159   case ISD::FP_TO_UINT:
2160   case ISD::SINT_TO_FP:
2161   case ISD::UINT_TO_FP: {
2162     // RVV can only do fp<->int conversions to types half/double the size as
2163     // the source. We custom-lower any conversions that do two hops into
2164     // sequences.
2165     MVT VT = Op.getSimpleValueType();
2166     if (!VT.isVector())
2167       return Op;
2168     SDLoc DL(Op);
2169     SDValue Src = Op.getOperand(0);
2170     MVT EltVT = VT.getVectorElementType();
2171     MVT SrcVT = Src.getSimpleValueType();
2172     MVT SrcEltVT = SrcVT.getVectorElementType();
2173     unsigned EltSize = EltVT.getSizeInBits();
2174     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2175     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
2176            "Unexpected vector element types");
2177 
2178     bool IsInt2FP = SrcEltVT.isInteger();
2179     // Widening conversions
2180     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
2181       if (IsInt2FP) {
2182         // Do a regular integer sign/zero extension then convert to float.
2183         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
2184                                       VT.getVectorElementCount());
2185         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
2186                                  ? ISD::ZERO_EXTEND
2187                                  : ISD::SIGN_EXTEND;
2188         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
2189         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
2190       }
2191       // FP2Int
2192       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
2193       // Do one doubling fp_extend then complete the operation by converting
2194       // to int.
2195       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2196       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
2197       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
2198     }
2199 
2200     // Narrowing conversions
2201     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
2202       if (IsInt2FP) {
2203         // One narrowing int_to_fp, then an fp_round.
2204         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
2205         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2206         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
2207         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
2208       }
2209       // FP2Int
2210       // One narrowing fp_to_int, then truncate the integer. If the float isn't
2211       // representable by the integer, the result is poison.
2212       MVT IVecVT =
2213           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
2214                            VT.getVectorElementCount());
2215       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
2216       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
2217     }
2218 
2219     // Scalable vectors can exit here. Patterns will handle equally-sized
2220     // conversions halving/doubling ones.
2221     if (!VT.isFixedLengthVector())
2222       return Op;
2223 
2224     // For fixed-length vectors we lower to a custom "VL" node.
2225     unsigned RVVOpc = 0;
2226     switch (Op.getOpcode()) {
2227     default:
2228       llvm_unreachable("Impossible opcode");
2229     case ISD::FP_TO_SINT:
2230       RVVOpc = RISCVISD::FP_TO_SINT_VL;
2231       break;
2232     case ISD::FP_TO_UINT:
2233       RVVOpc = RISCVISD::FP_TO_UINT_VL;
2234       break;
2235     case ISD::SINT_TO_FP:
2236       RVVOpc = RISCVISD::SINT_TO_FP_VL;
2237       break;
2238     case ISD::UINT_TO_FP:
2239       RVVOpc = RISCVISD::UINT_TO_FP_VL;
2240       break;
2241     }
2242 
2243     MVT ContainerVT, SrcContainerVT;
2244     // Derive the reference container type from the larger vector type.
2245     if (SrcEltSize > EltSize) {
2246       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2247       ContainerVT =
2248           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2249     } else {
2250       ContainerVT = getContainerForFixedLengthVector(VT);
2251       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
2252     }
2253 
2254     SDValue Mask, VL;
2255     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2256 
2257     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2258     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
2259     return convertFromScalableVector(VT, Src, DAG, Subtarget);
2260   }
2261   case ISD::VECREDUCE_ADD:
2262   case ISD::VECREDUCE_UMAX:
2263   case ISD::VECREDUCE_SMAX:
2264   case ISD::VECREDUCE_UMIN:
2265   case ISD::VECREDUCE_SMIN:
2266     return lowerVECREDUCE(Op, DAG);
2267   case ISD::VECREDUCE_AND:
2268   case ISD::VECREDUCE_OR:
2269   case ISD::VECREDUCE_XOR:
2270     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2271       return lowerVectorMaskVECREDUCE(Op, DAG);
2272     return lowerVECREDUCE(Op, DAG);
2273   case ISD::VECREDUCE_FADD:
2274   case ISD::VECREDUCE_SEQ_FADD:
2275   case ISD::VECREDUCE_FMIN:
2276   case ISD::VECREDUCE_FMAX:
2277     return lowerFPVECREDUCE(Op, DAG);
2278   case ISD::INSERT_SUBVECTOR:
2279     return lowerINSERT_SUBVECTOR(Op, DAG);
2280   case ISD::EXTRACT_SUBVECTOR:
2281     return lowerEXTRACT_SUBVECTOR(Op, DAG);
2282   case ISD::STEP_VECTOR:
2283     return lowerSTEP_VECTOR(Op, DAG);
2284   case ISD::VECTOR_REVERSE:
2285     return lowerVECTOR_REVERSE(Op, DAG);
2286   case ISD::BUILD_VECTOR:
2287     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
2288   case ISD::SPLAT_VECTOR:
2289     if (Op.getValueType().getVectorElementType() == MVT::i1)
2290       return lowerVectorMaskSplat(Op, DAG);
2291     return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
2292   case ISD::VECTOR_SHUFFLE:
2293     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
2294   case ISD::CONCAT_VECTORS: {
2295     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
2296     // better than going through the stack, as the default expansion does.
2297     SDLoc DL(Op);
2298     MVT VT = Op.getSimpleValueType();
2299     unsigned NumOpElts =
2300         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
2301     SDValue Vec = DAG.getUNDEF(VT);
2302     for (const auto &OpIdx : enumerate(Op->ops()))
2303       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
2304                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
2305     return Vec;
2306   }
2307   case ISD::LOAD:
2308     return lowerFixedLengthVectorLoadToRVV(Op, DAG);
2309   case ISD::STORE:
2310     return lowerFixedLengthVectorStoreToRVV(Op, DAG);
2311   case ISD::MLOAD:
2312     return lowerMLOAD(Op, DAG);
2313   case ISD::MSTORE:
2314     return lowerMSTORE(Op, DAG);
2315   case ISD::SETCC:
2316     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
2317   case ISD::ADD:
2318     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
2319   case ISD::SUB:
2320     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
2321   case ISD::MUL:
2322     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
2323   case ISD::MULHS:
2324     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
2325   case ISD::MULHU:
2326     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
2327   case ISD::AND:
2328     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
2329                                               RISCVISD::AND_VL);
2330   case ISD::OR:
2331     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
2332                                               RISCVISD::OR_VL);
2333   case ISD::XOR:
2334     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
2335                                               RISCVISD::XOR_VL);
2336   case ISD::SDIV:
2337     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
2338   case ISD::SREM:
2339     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
2340   case ISD::UDIV:
2341     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
2342   case ISD::UREM:
2343     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
2344   case ISD::SHL:
2345     return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
2346   case ISD::SRA:
2347     return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
2348   case ISD::SRL:
2349     return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
2350   case ISD::FADD:
2351     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
2352   case ISD::FSUB:
2353     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
2354   case ISD::FMUL:
2355     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
2356   case ISD::FDIV:
2357     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
2358   case ISD::FNEG:
2359     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
2360   case ISD::FABS:
2361     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
2362   case ISD::FSQRT:
2363     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
2364   case ISD::FMA:
2365     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
2366   case ISD::SMIN:
2367     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
2368   case ISD::SMAX:
2369     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
2370   case ISD::UMIN:
2371     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
2372   case ISD::UMAX:
2373     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
2374   case ISD::FMINNUM:
2375     return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
2376   case ISD::FMAXNUM:
2377     return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
2378   case ISD::ABS:
2379     return lowerABS(Op, DAG);
2380   case ISD::VSELECT:
2381     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
2382   case ISD::FCOPYSIGN:
2383     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
2384   case ISD::MGATHER:
2385     return lowerMGATHER(Op, DAG);
2386   case ISD::MSCATTER:
2387     return lowerMSCATTER(Op, DAG);
2388   case ISD::FLT_ROUNDS_:
2389     return lowerGET_ROUNDING(Op, DAG);
2390   case ISD::SET_ROUNDING:
2391     return lowerSET_ROUNDING(Op, DAG);
2392   case ISD::VP_ADD:
2393     return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
2394   case ISD::VP_SUB:
2395     return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
2396   case ISD::VP_MUL:
2397     return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
2398   case ISD::VP_SDIV:
2399     return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
2400   case ISD::VP_UDIV:
2401     return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
2402   case ISD::VP_SREM:
2403     return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
2404   case ISD::VP_UREM:
2405     return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
2406   case ISD::VP_AND:
2407     return lowerVPOp(Op, DAG, RISCVISD::AND_VL);
2408   case ISD::VP_OR:
2409     return lowerVPOp(Op, DAG, RISCVISD::OR_VL);
2410   case ISD::VP_XOR:
2411     return lowerVPOp(Op, DAG, RISCVISD::XOR_VL);
2412   case ISD::VP_ASHR:
2413     return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
2414   case ISD::VP_LSHR:
2415     return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
2416   case ISD::VP_SHL:
2417     return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
2418   }
2419 }
2420 
2421 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
2422                              SelectionDAG &DAG, unsigned Flags) {
2423   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
2424 }
2425 
2426 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
2427                              SelectionDAG &DAG, unsigned Flags) {
2428   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
2429                                    Flags);
2430 }
2431 
2432 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
2433                              SelectionDAG &DAG, unsigned Flags) {
2434   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
2435                                    N->getOffset(), Flags);
2436 }
2437 
2438 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
2439                              SelectionDAG &DAG, unsigned Flags) {
2440   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
2441 }
2442 
2443 template <class NodeTy>
2444 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2445                                      bool IsLocal) const {
2446   SDLoc DL(N);
2447   EVT Ty = getPointerTy(DAG.getDataLayout());
2448 
2449   if (isPositionIndependent()) {
2450     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2451     if (IsLocal)
2452       // Use PC-relative addressing to access the symbol. This generates the
2453       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
2454       // %pcrel_lo(auipc)).
2455       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2456 
2457     // Use PC-relative addressing to access the GOT for this symbol, then load
2458     // the address from the GOT. This generates the pattern (PseudoLA sym),
2459     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
2460     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
2461   }
2462 
2463   switch (getTargetMachine().getCodeModel()) {
2464   default:
2465     report_fatal_error("Unsupported code model for lowering");
2466   case CodeModel::Small: {
2467     // Generate a sequence for accessing addresses within the first 2 GiB of
2468     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
2469     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
2470     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
2471     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2472     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
2473   }
2474   case CodeModel::Medium: {
2475     // Generate a sequence for accessing addresses within any 2GiB range within
2476     // the address space. This generates the pattern (PseudoLLA sym), which
2477     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
2478     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2479     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2480   }
2481   }
2482 }
2483 
2484 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
2485                                                 SelectionDAG &DAG) const {
2486   SDLoc DL(Op);
2487   EVT Ty = Op.getValueType();
2488   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2489   int64_t Offset = N->getOffset();
2490   MVT XLenVT = Subtarget.getXLenVT();
2491 
2492   const GlobalValue *GV = N->getGlobal();
2493   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
2494   SDValue Addr = getAddr(N, DAG, IsLocal);
2495 
2496   // In order to maximise the opportunity for common subexpression elimination,
2497   // emit a separate ADD node for the global address offset instead of folding
2498   // it in the global address node. Later peephole optimisations may choose to
2499   // fold it back in when profitable.
2500   if (Offset != 0)
2501     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2502                        DAG.getConstant(Offset, DL, XLenVT));
2503   return Addr;
2504 }
2505 
2506 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
2507                                                SelectionDAG &DAG) const {
2508   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
2509 
2510   return getAddr(N, DAG);
2511 }
2512 
2513 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
2514                                                SelectionDAG &DAG) const {
2515   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
2516 
2517   return getAddr(N, DAG);
2518 }
2519 
2520 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
2521                                             SelectionDAG &DAG) const {
2522   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
2523 
2524   return getAddr(N, DAG);
2525 }
2526 
2527 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
2528                                               SelectionDAG &DAG,
2529                                               bool UseGOT) const {
2530   SDLoc DL(N);
2531   EVT Ty = getPointerTy(DAG.getDataLayout());
2532   const GlobalValue *GV = N->getGlobal();
2533   MVT XLenVT = Subtarget.getXLenVT();
2534 
2535   if (UseGOT) {
2536     // Use PC-relative addressing to access the GOT for this TLS symbol, then
2537     // load the address from the GOT and add the thread pointer. This generates
2538     // the pattern (PseudoLA_TLS_IE sym), which expands to
2539     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
2540     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2541     SDValue Load =
2542         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
2543 
2544     // Add the thread pointer.
2545     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2546     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
2547   }
2548 
2549   // Generate a sequence for accessing the address relative to the thread
2550   // pointer, with the appropriate adjustment for the thread pointer offset.
2551   // This generates the pattern
2552   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
2553   SDValue AddrHi =
2554       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
2555   SDValue AddrAdd =
2556       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
2557   SDValue AddrLo =
2558       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
2559 
2560   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2561   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2562   SDValue MNAdd = SDValue(
2563       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
2564       0);
2565   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
2566 }
2567 
2568 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
2569                                                SelectionDAG &DAG) const {
2570   SDLoc DL(N);
2571   EVT Ty = getPointerTy(DAG.getDataLayout());
2572   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
2573   const GlobalValue *GV = N->getGlobal();
2574 
2575   // Use a PC-relative addressing mode to access the global dynamic GOT address.
2576   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
2577   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
2578   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2579   SDValue Load =
2580       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
2581 
2582   // Prepare argument list to generate call.
2583   ArgListTy Args;
2584   ArgListEntry Entry;
2585   Entry.Node = Load;
2586   Entry.Ty = CallTy;
2587   Args.push_back(Entry);
2588 
2589   // Setup call to __tls_get_addr.
2590   TargetLowering::CallLoweringInfo CLI(DAG);
2591   CLI.setDebugLoc(DL)
2592       .setChain(DAG.getEntryNode())
2593       .setLibCallee(CallingConv::C, CallTy,
2594                     DAG.getExternalSymbol("__tls_get_addr", Ty),
2595                     std::move(Args));
2596 
2597   return LowerCallTo(CLI).first;
2598 }
2599 
2600 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
2601                                                    SelectionDAG &DAG) const {
2602   SDLoc DL(Op);
2603   EVT Ty = Op.getValueType();
2604   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2605   int64_t Offset = N->getOffset();
2606   MVT XLenVT = Subtarget.getXLenVT();
2607 
2608   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
2609 
2610   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
2611       CallingConv::GHC)
2612     report_fatal_error("In GHC calling convention TLS is not supported");
2613 
2614   SDValue Addr;
2615   switch (Model) {
2616   case TLSModel::LocalExec:
2617     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
2618     break;
2619   case TLSModel::InitialExec:
2620     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
2621     break;
2622   case TLSModel::LocalDynamic:
2623   case TLSModel::GeneralDynamic:
2624     Addr = getDynamicTLSAddr(N, DAG);
2625     break;
2626   }
2627 
2628   // In order to maximise the opportunity for common subexpression elimination,
2629   // emit a separate ADD node for the global address offset instead of folding
2630   // it in the global address node. Later peephole optimisations may choose to
2631   // fold it back in when profitable.
2632   if (Offset != 0)
2633     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2634                        DAG.getConstant(Offset, DL, XLenVT));
2635   return Addr;
2636 }
2637 
2638 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2639   SDValue CondV = Op.getOperand(0);
2640   SDValue TrueV = Op.getOperand(1);
2641   SDValue FalseV = Op.getOperand(2);
2642   SDLoc DL(Op);
2643   MVT XLenVT = Subtarget.getXLenVT();
2644 
2645   // If the result type is XLenVT and CondV is the output of a SETCC node
2646   // which also operated on XLenVT inputs, then merge the SETCC node into the
2647   // lowered RISCVISD::SELECT_CC to take advantage of the integer
2648   // compare+branch instructions. i.e.:
2649   // (select (setcc lhs, rhs, cc), truev, falsev)
2650   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
2651   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
2652       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
2653     SDValue LHS = CondV.getOperand(0);
2654     SDValue RHS = CondV.getOperand(1);
2655     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
2656     ISD::CondCode CCVal = CC->get();
2657 
2658     // Special case for a select of 2 constants that have a diffence of 1.
2659     // Normally this is done by DAGCombine, but if the select is introduced by
2660     // type legalization or op legalization, we miss it. Restricting to SETLT
2661     // case for now because that is what signed saturating add/sub need.
2662     // FIXME: We don't need the condition to be SETLT or even a SETCC,
2663     // but we would probably want to swap the true/false values if the condition
2664     // is SETGE/SETLE to avoid an XORI.
2665     if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
2666         CCVal == ISD::SETLT) {
2667       const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
2668       const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
2669       if (TrueVal - 1 == FalseVal)
2670         return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
2671       if (TrueVal + 1 == FalseVal)
2672         return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
2673     }
2674 
2675     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
2676 
2677     SDValue TargetCC = DAG.getTargetConstant(CCVal, DL, XLenVT);
2678     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
2679     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
2680   }
2681 
2682   // Otherwise:
2683   // (select condv, truev, falsev)
2684   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
2685   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
2686   SDValue SetNE = DAG.getTargetConstant(ISD::SETNE, DL, XLenVT);
2687 
2688   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
2689 
2690   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
2691 }
2692 
2693 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
2694   SDValue CondV = Op.getOperand(1);
2695   SDLoc DL(Op);
2696   MVT XLenVT = Subtarget.getXLenVT();
2697 
2698   if (CondV.getOpcode() == ISD::SETCC &&
2699       CondV.getOperand(0).getValueType() == XLenVT) {
2700     SDValue LHS = CondV.getOperand(0);
2701     SDValue RHS = CondV.getOperand(1);
2702     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
2703 
2704     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
2705 
2706     SDValue TargetCC = DAG.getCondCode(CCVal);
2707     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
2708                        LHS, RHS, TargetCC, Op.getOperand(2));
2709   }
2710 
2711   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
2712                      CondV, DAG.getConstant(0, DL, XLenVT),
2713                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
2714 }
2715 
2716 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2717   MachineFunction &MF = DAG.getMachineFunction();
2718   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
2719 
2720   SDLoc DL(Op);
2721   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2722                                  getPointerTy(MF.getDataLayout()));
2723 
2724   // vastart just stores the address of the VarArgsFrameIndex slot into the
2725   // memory location argument.
2726   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2727   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
2728                       MachinePointerInfo(SV));
2729 }
2730 
2731 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
2732                                             SelectionDAG &DAG) const {
2733   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
2734   MachineFunction &MF = DAG.getMachineFunction();
2735   MachineFrameInfo &MFI = MF.getFrameInfo();
2736   MFI.setFrameAddressIsTaken(true);
2737   Register FrameReg = RI.getFrameRegister(MF);
2738   int XLenInBytes = Subtarget.getXLen() / 8;
2739 
2740   EVT VT = Op.getValueType();
2741   SDLoc DL(Op);
2742   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2743   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2744   while (Depth--) {
2745     int Offset = -(XLenInBytes * 2);
2746     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2747                               DAG.getIntPtrConstant(Offset, DL));
2748     FrameAddr =
2749         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2750   }
2751   return FrameAddr;
2752 }
2753 
2754 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
2755                                              SelectionDAG &DAG) const {
2756   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
2757   MachineFunction &MF = DAG.getMachineFunction();
2758   MachineFrameInfo &MFI = MF.getFrameInfo();
2759   MFI.setReturnAddressIsTaken(true);
2760   MVT XLenVT = Subtarget.getXLenVT();
2761   int XLenInBytes = Subtarget.getXLen() / 8;
2762 
2763   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
2764     return SDValue();
2765 
2766   EVT VT = Op.getValueType();
2767   SDLoc DL(Op);
2768   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2769   if (Depth) {
2770     int Off = -XLenInBytes;
2771     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
2772     SDValue Offset = DAG.getConstant(Off, DL, VT);
2773     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
2774                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
2775                        MachinePointerInfo());
2776   }
2777 
2778   // Return the value of the return address register, marking it an implicit
2779   // live-in.
2780   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
2781   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
2782 }
2783 
2784 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
2785                                                  SelectionDAG &DAG) const {
2786   SDLoc DL(Op);
2787   SDValue Lo = Op.getOperand(0);
2788   SDValue Hi = Op.getOperand(1);
2789   SDValue Shamt = Op.getOperand(2);
2790   EVT VT = Lo.getValueType();
2791 
2792   // if Shamt-XLEN < 0: // Shamt < XLEN
2793   //   Lo = Lo << Shamt
2794   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
2795   // else:
2796   //   Lo = 0
2797   //   Hi = Lo << (Shamt-XLEN)
2798 
2799   SDValue Zero = DAG.getConstant(0, DL, VT);
2800   SDValue One = DAG.getConstant(1, DL, VT);
2801   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2802   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2803   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2804   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2805 
2806   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2807   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2808   SDValue ShiftRightLo =
2809       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
2810   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2811   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2812   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
2813 
2814   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2815 
2816   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2817   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2818 
2819   SDValue Parts[2] = {Lo, Hi};
2820   return DAG.getMergeValues(Parts, DL);
2821 }
2822 
2823 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
2824                                                   bool IsSRA) const {
2825   SDLoc DL(Op);
2826   SDValue Lo = Op.getOperand(0);
2827   SDValue Hi = Op.getOperand(1);
2828   SDValue Shamt = Op.getOperand(2);
2829   EVT VT = Lo.getValueType();
2830 
2831   // SRA expansion:
2832   //   if Shamt-XLEN < 0: // Shamt < XLEN
2833   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2834   //     Hi = Hi >>s Shamt
2835   //   else:
2836   //     Lo = Hi >>s (Shamt-XLEN);
2837   //     Hi = Hi >>s (XLEN-1)
2838   //
2839   // SRL expansion:
2840   //   if Shamt-XLEN < 0: // Shamt < XLEN
2841   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2842   //     Hi = Hi >>u Shamt
2843   //   else:
2844   //     Lo = Hi >>u (Shamt-XLEN);
2845   //     Hi = 0;
2846 
2847   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2848 
2849   SDValue Zero = DAG.getConstant(0, DL, VT);
2850   SDValue One = DAG.getConstant(1, DL, VT);
2851   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2852   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2853   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2854   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2855 
2856   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2857   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2858   SDValue ShiftLeftHi =
2859       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
2860   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2861   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2862   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
2863   SDValue HiFalse =
2864       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
2865 
2866   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2867 
2868   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2869   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2870 
2871   SDValue Parts[2] = {Lo, Hi};
2872   return DAG.getMergeValues(Parts, DL);
2873 }
2874 
2875 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
2876 // legal equivalently-sized i8 type, so we can use that as a go-between.
2877 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
2878                                                   SelectionDAG &DAG) const {
2879   SDLoc DL(Op);
2880   MVT VT = Op.getSimpleValueType();
2881   SDValue SplatVal = Op.getOperand(0);
2882   // All-zeros or all-ones splats are handled specially.
2883   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
2884     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
2885     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
2886   }
2887   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
2888     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
2889     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
2890   }
2891   MVT XLenVT = Subtarget.getXLenVT();
2892   assert(SplatVal.getValueType() == XLenVT &&
2893          "Unexpected type for i1 splat value");
2894   MVT InterVT = VT.changeVectorElementType(MVT::i8);
2895   SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
2896                          DAG.getConstant(1, DL, XLenVT));
2897   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
2898   SDValue Zero = DAG.getConstant(0, DL, InterVT);
2899   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
2900 }
2901 
2902 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
2903 // illegal (currently only vXi64 RV32).
2904 // FIXME: We could also catch non-constant sign-extended i32 values and lower
2905 // them to SPLAT_VECTOR_I64
2906 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
2907                                                      SelectionDAG &DAG) const {
2908   SDLoc DL(Op);
2909   MVT VecVT = Op.getSimpleValueType();
2910   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
2911          "Unexpected SPLAT_VECTOR_PARTS lowering");
2912 
2913   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
2914   SDValue Lo = Op.getOperand(0);
2915   SDValue Hi = Op.getOperand(1);
2916 
2917   if (VecVT.isFixedLengthVector()) {
2918     MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
2919     SDLoc DL(Op);
2920     SDValue Mask, VL;
2921     std::tie(Mask, VL) =
2922         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2923 
2924     SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
2925     return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
2926   }
2927 
2928   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2929     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2930     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2931     // If Hi constant is all the same sign bit as Lo, lower this as a custom
2932     // node in order to try and match RVV vector/scalar instructions.
2933     if ((LoC >> 31) == HiC)
2934       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
2935   }
2936 
2937   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
2938   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
2939       isa<ConstantSDNode>(Hi.getOperand(1)) &&
2940       Hi.getConstantOperandVal(1) == 31)
2941     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
2942 
2943   // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
2944   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
2945                      DAG.getRegister(RISCV::X0, MVT::i64));
2946 }
2947 
2948 // Custom-lower extensions from mask vectors by using a vselect either with 1
2949 // for zero/any-extension or -1 for sign-extension:
2950 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
2951 // Note that any-extension is lowered identically to zero-extension.
2952 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
2953                                                 int64_t ExtTrueVal) const {
2954   SDLoc DL(Op);
2955   MVT VecVT = Op.getSimpleValueType();
2956   SDValue Src = Op.getOperand(0);
2957   // Only custom-lower extensions from mask types
2958   assert(Src.getValueType().isVector() &&
2959          Src.getValueType().getVectorElementType() == MVT::i1);
2960 
2961   MVT XLenVT = Subtarget.getXLenVT();
2962   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
2963   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
2964 
2965   if (VecVT.isScalableVector()) {
2966     // Be careful not to introduce illegal scalar types at this stage, and be
2967     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
2968     // illegal and must be expanded. Since we know that the constants are
2969     // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
2970     bool IsRV32E64 =
2971         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
2972 
2973     if (!IsRV32E64) {
2974       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
2975       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
2976     } else {
2977       SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
2978       SplatTrueVal =
2979           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
2980     }
2981 
2982     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
2983   }
2984 
2985   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
2986   MVT I1ContainerVT =
2987       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2988 
2989   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
2990 
2991   SDValue Mask, VL;
2992   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2993 
2994   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
2995   SplatTrueVal =
2996       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
2997   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
2998                                SplatTrueVal, SplatZero, VL);
2999 
3000   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
3001 }
3002 
3003 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
3004     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
3005   MVT ExtVT = Op.getSimpleValueType();
3006   // Only custom-lower extensions from fixed-length vector types.
3007   if (!ExtVT.isFixedLengthVector())
3008     return Op;
3009   MVT VT = Op.getOperand(0).getSimpleValueType();
3010   // Grab the canonical container type for the extended type. Infer the smaller
3011   // type from that to ensure the same number of vector elements, as we know
3012   // the LMUL will be sufficient to hold the smaller type.
3013   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
3014   // Get the extended container type manually to ensure the same number of
3015   // vector elements between source and dest.
3016   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
3017                                      ContainerExtVT.getVectorElementCount());
3018 
3019   SDValue Op1 =
3020       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
3021 
3022   SDLoc DL(Op);
3023   SDValue Mask, VL;
3024   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3025 
3026   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
3027 
3028   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
3029 }
3030 
3031 // Custom-lower truncations from vectors to mask vectors by using a mask and a
3032 // setcc operation:
3033 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
3034 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
3035                                                   SelectionDAG &DAG) const {
3036   SDLoc DL(Op);
3037   EVT MaskVT = Op.getValueType();
3038   // Only expect to custom-lower truncations to mask types
3039   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
3040          "Unexpected type for vector mask lowering");
3041   SDValue Src = Op.getOperand(0);
3042   MVT VecVT = Src.getSimpleValueType();
3043 
3044   // If this is a fixed vector, we need to convert it to a scalable vector.
3045   MVT ContainerVT = VecVT;
3046   if (VecVT.isFixedLengthVector()) {
3047     ContainerVT = getContainerForFixedLengthVector(VecVT);
3048     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3049   }
3050 
3051   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
3052   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
3053 
3054   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
3055   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
3056 
3057   if (VecVT.isScalableVector()) {
3058     SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
3059     return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
3060   }
3061 
3062   SDValue Mask, VL;
3063   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3064 
3065   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
3066   SDValue Trunc =
3067       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
3068   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
3069                       DAG.getCondCode(ISD::SETNE), Mask, VL);
3070   return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
3071 }
3072 
3073 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
3074 // first position of a vector, and that vector is slid up to the insert index.
3075 // By limiting the active vector length to index+1 and merging with the
3076 // original vector (with an undisturbed tail policy for elements >= VL), we
3077 // achieve the desired result of leaving all elements untouched except the one
3078 // at VL-1, which is replaced with the desired value.
3079 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3080                                                     SelectionDAG &DAG) const {
3081   SDLoc DL(Op);
3082   MVT VecVT = Op.getSimpleValueType();
3083   SDValue Vec = Op.getOperand(0);
3084   SDValue Val = Op.getOperand(1);
3085   SDValue Idx = Op.getOperand(2);
3086 
3087   if (VecVT.getVectorElementType() == MVT::i1) {
3088     // FIXME: For now we just promote to an i8 vector and insert into that,
3089     // but this is probably not optimal.
3090     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
3091     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3092     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
3093     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
3094   }
3095 
3096   MVT ContainerVT = VecVT;
3097   // If the operand is a fixed-length vector, convert to a scalable one.
3098   if (VecVT.isFixedLengthVector()) {
3099     ContainerVT = getContainerForFixedLengthVector(VecVT);
3100     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3101   }
3102 
3103   MVT XLenVT = Subtarget.getXLenVT();
3104 
3105   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3106   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
3107   // Even i64-element vectors on RV32 can be lowered without scalar
3108   // legalization if the most-significant 32 bits of the value are not affected
3109   // by the sign-extension of the lower 32 bits.
3110   // TODO: We could also catch sign extensions of a 32-bit value.
3111   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
3112     const auto *CVal = cast<ConstantSDNode>(Val);
3113     if (isInt<32>(CVal->getSExtValue())) {
3114       IsLegalInsert = true;
3115       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3116     }
3117   }
3118 
3119   SDValue Mask, VL;
3120   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3121 
3122   SDValue ValInVec;
3123 
3124   if (IsLegalInsert) {
3125     unsigned Opc =
3126         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
3127     if (isNullConstant(Idx)) {
3128       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
3129       if (!VecVT.isFixedLengthVector())
3130         return Vec;
3131       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
3132     }
3133     ValInVec =
3134         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
3135   } else {
3136     // On RV32, i64-element vectors must be specially handled to place the
3137     // value at element 0, by using two vslide1up instructions in sequence on
3138     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
3139     // this.
3140     SDValue One = DAG.getConstant(1, DL, XLenVT);
3141     SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
3142     SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
3143     MVT I32ContainerVT =
3144         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
3145     SDValue I32Mask =
3146         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
3147     // Limit the active VL to two.
3148     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
3149     // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
3150     // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
3151     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
3152                            InsertI64VL);
3153     // First slide in the hi value, then the lo in underneath it.
3154     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3155                            ValHi, I32Mask, InsertI64VL);
3156     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3157                            ValLo, I32Mask, InsertI64VL);
3158     // Bitcast back to the right container type.
3159     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
3160   }
3161 
3162   // Now that the value is in a vector, slide it into position.
3163   SDValue InsertVL =
3164       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
3165   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
3166                                 ValInVec, Idx, Mask, InsertVL);
3167   if (!VecVT.isFixedLengthVector())
3168     return Slideup;
3169   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
3170 }
3171 
3172 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
3173 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
3174 // types this is done using VMV_X_S to allow us to glean information about the
3175 // sign bits of the result.
3176 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3177                                                      SelectionDAG &DAG) const {
3178   SDLoc DL(Op);
3179   SDValue Idx = Op.getOperand(1);
3180   SDValue Vec = Op.getOperand(0);
3181   EVT EltVT = Op.getValueType();
3182   MVT VecVT = Vec.getSimpleValueType();
3183   MVT XLenVT = Subtarget.getXLenVT();
3184 
3185   if (VecVT.getVectorElementType() == MVT::i1) {
3186     // FIXME: For now we just promote to an i8 vector and extract from that,
3187     // but this is probably not optimal.
3188     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
3189     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3190     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
3191   }
3192 
3193   // If this is a fixed vector, we need to convert it to a scalable vector.
3194   MVT ContainerVT = VecVT;
3195   if (VecVT.isFixedLengthVector()) {
3196     ContainerVT = getContainerForFixedLengthVector(VecVT);
3197     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3198   }
3199 
3200   // If the index is 0, the vector is already in the right position.
3201   if (!isNullConstant(Idx)) {
3202     // Use a VL of 1 to avoid processing more elements than we need.
3203     SDValue VL = DAG.getConstant(1, DL, XLenVT);
3204     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3205     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3206     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3207                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
3208   }
3209 
3210   if (!EltVT.isInteger()) {
3211     // Floating-point extracts are handled in TableGen.
3212     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
3213                        DAG.getConstant(0, DL, XLenVT));
3214   }
3215 
3216   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
3217   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
3218 }
3219 
3220 // Some RVV intrinsics may claim that they want an integer operand to be
3221 // promoted or expanded.
3222 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
3223                                           const RISCVSubtarget &Subtarget) {
3224   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3225           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
3226          "Unexpected opcode");
3227 
3228   if (!Subtarget.hasStdExtV())
3229     return SDValue();
3230 
3231   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
3232   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
3233   SDLoc DL(Op);
3234 
3235   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
3236       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
3237   if (!II || !II->SplatOperand)
3238     return SDValue();
3239 
3240   unsigned SplatOp = II->SplatOperand + HasChain;
3241   assert(SplatOp < Op.getNumOperands());
3242 
3243   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
3244   SDValue &ScalarOp = Operands[SplatOp];
3245   MVT OpVT = ScalarOp.getSimpleValueType();
3246   MVT XLenVT = Subtarget.getXLenVT();
3247 
3248   // If this isn't a scalar, or its type is XLenVT we're done.
3249   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
3250     return SDValue();
3251 
3252   // Simplest case is that the operand needs to be promoted to XLenVT.
3253   if (OpVT.bitsLT(XLenVT)) {
3254     // If the operand is a constant, sign extend to increase our chances
3255     // of being able to use a .vi instruction. ANY_EXTEND would become a
3256     // a zero extend and the simm5 check in isel would fail.
3257     // FIXME: Should we ignore the upper bits in isel instead?
3258     unsigned ExtOpc =
3259         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3260     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
3261     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3262   }
3263 
3264   // Use the previous operand to get the vXi64 VT. The result might be a mask
3265   // VT for compares. Using the previous operand assumes that the previous
3266   // operand will never have a smaller element size than a scalar operand and
3267   // that a widening operation never uses SEW=64.
3268   // NOTE: If this fails the below assert, we can probably just find the
3269   // element count from any operand or result and use it to construct the VT.
3270   assert(II->SplatOperand > 1 && "Unexpected splat operand!");
3271   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
3272 
3273   // The more complex case is when the scalar is larger than XLenVT.
3274   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
3275          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
3276 
3277   // If this is a sign-extended 32-bit constant, we can truncate it and rely
3278   // on the instruction to sign-extend since SEW>XLEN.
3279   if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
3280     if (isInt<32>(CVal->getSExtValue())) {
3281       ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3282       return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3283     }
3284   }
3285 
3286   // We need to convert the scalar to a splat vector.
3287   // FIXME: Can we implicitly truncate the scalar if it is known to
3288   // be sign extended?
3289   // VL should be the last operand.
3290   SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
3291   assert(VL.getValueType() == XLenVT);
3292   ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
3293   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3294 }
3295 
3296 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3297                                                      SelectionDAG &DAG) const {
3298   unsigned IntNo = Op.getConstantOperandVal(0);
3299   SDLoc DL(Op);
3300   MVT XLenVT = Subtarget.getXLenVT();
3301 
3302   switch (IntNo) {
3303   default:
3304     break; // Don't custom lower most intrinsics.
3305   case Intrinsic::thread_pointer: {
3306     EVT PtrVT = getPointerTy(DAG.getDataLayout());
3307     return DAG.getRegister(RISCV::X4, PtrVT);
3308   }
3309   case Intrinsic::riscv_orc_b:
3310     // Lower to the GORCI encoding for orc.b.
3311     return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
3312                        DAG.getConstant(7, DL, XLenVT));
3313   case Intrinsic::riscv_grev:
3314   case Intrinsic::riscv_gorc: {
3315     unsigned Opc =
3316         IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
3317     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3318   }
3319   case Intrinsic::riscv_shfl:
3320   case Intrinsic::riscv_unshfl: {
3321     unsigned Opc =
3322         IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
3323     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3324   }
3325   case Intrinsic::riscv_bcompress:
3326   case Intrinsic::riscv_bdecompress: {
3327     unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
3328                                                        : RISCVISD::BDECOMPRESS;
3329     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3330   }
3331   case Intrinsic::riscv_vmv_x_s:
3332     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
3333     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
3334                        Op.getOperand(1));
3335   case Intrinsic::riscv_vmv_v_x:
3336     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
3337                             Op.getSimpleValueType(), DL, DAG, Subtarget);
3338   case Intrinsic::riscv_vfmv_v_f:
3339     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
3340                        Op.getOperand(1), Op.getOperand(2));
3341   case Intrinsic::riscv_vmv_s_x: {
3342     SDValue Scalar = Op.getOperand(2);
3343 
3344     if (Scalar.getValueType().bitsLE(XLenVT)) {
3345       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
3346       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
3347                          Op.getOperand(1), Scalar, Op.getOperand(3));
3348     }
3349 
3350     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
3351 
3352     // This is an i64 value that lives in two scalar registers. We have to
3353     // insert this in a convoluted way. First we build vXi64 splat containing
3354     // the/ two values that we assemble using some bit math. Next we'll use
3355     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
3356     // to merge element 0 from our splat into the source vector.
3357     // FIXME: This is probably not the best way to do this, but it is
3358     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
3359     // point.
3360     //   sw lo, (a0)
3361     //   sw hi, 4(a0)
3362     //   vlse vX, (a0)
3363     //
3364     //   vid.v      vVid
3365     //   vmseq.vx   mMask, vVid, 0
3366     //   vmerge.vvm vDest, vSrc, vVal, mMask
3367     MVT VT = Op.getSimpleValueType();
3368     SDValue Vec = Op.getOperand(1);
3369     SDValue VL = Op.getOperand(3);
3370 
3371     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
3372     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
3373                                       DAG.getConstant(0, DL, MVT::i32), VL);
3374 
3375     MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
3376     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3377     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
3378     SDValue SelectCond =
3379         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
3380                     DAG.getCondCode(ISD::SETEQ), Mask, VL);
3381     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
3382                        Vec, VL);
3383   }
3384   case Intrinsic::riscv_vslide1up:
3385   case Intrinsic::riscv_vslide1down:
3386   case Intrinsic::riscv_vslide1up_mask:
3387   case Intrinsic::riscv_vslide1down_mask: {
3388     // We need to special case these when the scalar is larger than XLen.
3389     unsigned NumOps = Op.getNumOperands();
3390     bool IsMasked = NumOps == 6;
3391     unsigned OpOffset = IsMasked ? 1 : 0;
3392     SDValue Scalar = Op.getOperand(2 + OpOffset);
3393     if (Scalar.getValueType().bitsLE(XLenVT))
3394       break;
3395 
3396     // Splatting a sign extended constant is fine.
3397     if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
3398       if (isInt<32>(CVal->getSExtValue()))
3399         break;
3400 
3401     MVT VT = Op.getSimpleValueType();
3402     assert(VT.getVectorElementType() == MVT::i64 &&
3403            Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
3404 
3405     // Convert the vector source to the equivalent nxvXi32 vector.
3406     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3407     SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
3408 
3409     SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3410                                    DAG.getConstant(0, DL, XLenVT));
3411     SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3412                                    DAG.getConstant(1, DL, XLenVT));
3413 
3414     // Double the VL since we halved SEW.
3415     SDValue VL = Op.getOperand(NumOps - 1);
3416     SDValue I32VL =
3417         DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
3418 
3419     MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
3420     SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
3421 
3422     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
3423     // instructions.
3424     if (IntNo == Intrinsic::riscv_vslide1up ||
3425         IntNo == Intrinsic::riscv_vslide1up_mask) {
3426       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
3427                         I32Mask, I32VL);
3428       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
3429                         I32Mask, I32VL);
3430     } else {
3431       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
3432                         I32Mask, I32VL);
3433       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
3434                         I32Mask, I32VL);
3435     }
3436 
3437     // Convert back to nxvXi64.
3438     Vec = DAG.getBitcast(VT, Vec);
3439 
3440     if (!IsMasked)
3441       return Vec;
3442 
3443     // Apply mask after the operation.
3444     SDValue Mask = Op.getOperand(NumOps - 2);
3445     SDValue MaskedOff = Op.getOperand(1);
3446     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
3447   }
3448   }
3449 
3450   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3451 }
3452 
3453 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
3454                                                     SelectionDAG &DAG) const {
3455   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3456 }
3457 
3458 static MVT getLMUL1VT(MVT VT) {
3459   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3460          "Unexpected vector MVT");
3461   return MVT::getScalableVectorVT(
3462       VT.getVectorElementType(),
3463       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3464 }
3465 
3466 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
3467   switch (ISDOpcode) {
3468   default:
3469     llvm_unreachable("Unhandled reduction");
3470   case ISD::VECREDUCE_ADD:
3471     return RISCVISD::VECREDUCE_ADD_VL;
3472   case ISD::VECREDUCE_UMAX:
3473     return RISCVISD::VECREDUCE_UMAX_VL;
3474   case ISD::VECREDUCE_SMAX:
3475     return RISCVISD::VECREDUCE_SMAX_VL;
3476   case ISD::VECREDUCE_UMIN:
3477     return RISCVISD::VECREDUCE_UMIN_VL;
3478   case ISD::VECREDUCE_SMIN:
3479     return RISCVISD::VECREDUCE_SMIN_VL;
3480   case ISD::VECREDUCE_AND:
3481     return RISCVISD::VECREDUCE_AND_VL;
3482   case ISD::VECREDUCE_OR:
3483     return RISCVISD::VECREDUCE_OR_VL;
3484   case ISD::VECREDUCE_XOR:
3485     return RISCVISD::VECREDUCE_XOR_VL;
3486   }
3487 }
3488 
3489 SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
3490                                                       SelectionDAG &DAG) const {
3491   SDLoc DL(Op);
3492   SDValue Vec = Op.getOperand(0);
3493   MVT VecVT = Vec.getSimpleValueType();
3494   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
3495           Op.getOpcode() == ISD::VECREDUCE_OR ||
3496           Op.getOpcode() == ISD::VECREDUCE_XOR) &&
3497          "Unexpected reduction lowering");
3498 
3499   MVT XLenVT = Subtarget.getXLenVT();
3500   assert(Op.getValueType() == XLenVT &&
3501          "Expected reduction output to be legalized to XLenVT");
3502 
3503   MVT ContainerVT = VecVT;
3504   if (VecVT.isFixedLengthVector()) {
3505     ContainerVT = getContainerForFixedLengthVector(VecVT);
3506     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3507   }
3508 
3509   SDValue Mask, VL;
3510   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3511   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3512 
3513   switch (Op.getOpcode()) {
3514   default:
3515     llvm_unreachable("Unhandled reduction");
3516   case ISD::VECREDUCE_AND:
3517     // vpopc ~x == 0
3518     Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, Mask, VL);
3519     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3520     return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETEQ);
3521   case ISD::VECREDUCE_OR:
3522     // vpopc x != 0
3523     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3524     return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
3525   case ISD::VECREDUCE_XOR: {
3526     // ((vpopc x) & 1) != 0
3527     SDValue One = DAG.getConstant(1, DL, XLenVT);
3528     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3529     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
3530     return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
3531   }
3532   }
3533 }
3534 
3535 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
3536                                             SelectionDAG &DAG) const {
3537   SDLoc DL(Op);
3538   SDValue Vec = Op.getOperand(0);
3539   EVT VecEVT = Vec.getValueType();
3540 
3541   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
3542 
3543   // Due to ordering in legalize types we may have a vector type that needs to
3544   // be split. Do that manually so we can get down to a legal type.
3545   while (getTypeAction(*DAG.getContext(), VecEVT) ==
3546          TargetLowering::TypeSplitVector) {
3547     SDValue Lo, Hi;
3548     std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
3549     VecEVT = Lo.getValueType();
3550     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
3551   }
3552 
3553   // TODO: The type may need to be widened rather than split. Or widened before
3554   // it can be split.
3555   if (!isTypeLegal(VecEVT))
3556     return SDValue();
3557 
3558   MVT VecVT = VecEVT.getSimpleVT();
3559   MVT VecEltVT = VecVT.getVectorElementType();
3560   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
3561 
3562   MVT ContainerVT = VecVT;
3563   if (VecVT.isFixedLengthVector()) {
3564     ContainerVT = getContainerForFixedLengthVector(VecVT);
3565     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3566   }
3567 
3568   MVT M1VT = getLMUL1VT(ContainerVT);
3569 
3570   SDValue Mask, VL;
3571   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3572 
3573   // FIXME: This is a VLMAX splat which might be too large and can prevent
3574   // vsetvli removal.
3575   SDValue NeutralElem =
3576       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
3577   SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
3578   SDValue Reduction =
3579       DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL);
3580   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
3581                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3582   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
3583 }
3584 
3585 // Given a reduction op, this function returns the matching reduction opcode,
3586 // the vector SDValue and the scalar SDValue required to lower this to a
3587 // RISCVISD node.
3588 static std::tuple<unsigned, SDValue, SDValue>
3589 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
3590   SDLoc DL(Op);
3591   auto Flags = Op->getFlags();
3592   unsigned Opcode = Op.getOpcode();
3593   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
3594   switch (Opcode) {
3595   default:
3596     llvm_unreachable("Unhandled reduction");
3597   case ISD::VECREDUCE_FADD:
3598     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
3599                            DAG.getConstantFP(0.0, DL, EltVT));
3600   case ISD::VECREDUCE_SEQ_FADD:
3601     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
3602                            Op.getOperand(0));
3603   case ISD::VECREDUCE_FMIN:
3604     return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
3605                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
3606   case ISD::VECREDUCE_FMAX:
3607     return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
3608                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
3609   }
3610 }
3611 
3612 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
3613                                               SelectionDAG &DAG) const {
3614   SDLoc DL(Op);
3615   MVT VecEltVT = Op.getSimpleValueType();
3616 
3617   unsigned RVVOpcode;
3618   SDValue VectorVal, ScalarVal;
3619   std::tie(RVVOpcode, VectorVal, ScalarVal) =
3620       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
3621   MVT VecVT = VectorVal.getSimpleValueType();
3622 
3623   MVT ContainerVT = VecVT;
3624   if (VecVT.isFixedLengthVector()) {
3625     ContainerVT = getContainerForFixedLengthVector(VecVT);
3626     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
3627   }
3628 
3629   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
3630 
3631   SDValue Mask, VL;
3632   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3633 
3634   // FIXME: This is a VLMAX splat which might be too large and can prevent
3635   // vsetvli removal.
3636   SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
3637   SDValue Reduction =
3638       DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL);
3639   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
3640                      DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3641 }
3642 
3643 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
3644                                                    SelectionDAG &DAG) const {
3645   SDValue Vec = Op.getOperand(0);
3646   SDValue SubVec = Op.getOperand(1);
3647   MVT VecVT = Vec.getSimpleValueType();
3648   MVT SubVecVT = SubVec.getSimpleValueType();
3649 
3650   SDLoc DL(Op);
3651   MVT XLenVT = Subtarget.getXLenVT();
3652   unsigned OrigIdx = Op.getConstantOperandVal(2);
3653   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3654 
3655   // We don't have the ability to slide mask vectors up indexed by their i1
3656   // elements; the smallest we can do is i8. Often we are able to bitcast to
3657   // equivalent i8 vectors. Note that when inserting a fixed-length vector
3658   // into a scalable one, we might not necessarily have enough scalable
3659   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
3660   if (SubVecVT.getVectorElementType() == MVT::i1 &&
3661       (OrigIdx != 0 || !Vec.isUndef())) {
3662     if (VecVT.getVectorMinNumElements() >= 8 &&
3663         SubVecVT.getVectorMinNumElements() >= 8) {
3664       assert(OrigIdx % 8 == 0 && "Invalid index");
3665       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
3666              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
3667              "Unexpected mask vector lowering");
3668       OrigIdx /= 8;
3669       SubVecVT =
3670           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
3671                            SubVecVT.isScalableVector());
3672       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
3673                                VecVT.isScalableVector());
3674       Vec = DAG.getBitcast(VecVT, Vec);
3675       SubVec = DAG.getBitcast(SubVecVT, SubVec);
3676     } else {
3677       // We can't slide this mask vector up indexed by its i1 elements.
3678       // This poses a problem when we wish to insert a scalable vector which
3679       // can't be re-expressed as a larger type. Just choose the slow path and
3680       // extend to a larger type, then truncate back down.
3681       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
3682       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
3683       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
3684       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
3685       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
3686                         Op.getOperand(2));
3687       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
3688       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
3689     }
3690   }
3691 
3692   // If the subvector vector is a fixed-length type, we cannot use subregister
3693   // manipulation to simplify the codegen; we don't know which register of a
3694   // LMUL group contains the specific subvector as we only know the minimum
3695   // register size. Therefore we must slide the vector group up the full
3696   // amount.
3697   if (SubVecVT.isFixedLengthVector()) {
3698     if (OrigIdx == 0 && Vec.isUndef())
3699       return Op;
3700     MVT ContainerVT = VecVT;
3701     if (VecVT.isFixedLengthVector()) {
3702       ContainerVT = getContainerForFixedLengthVector(VecVT);
3703       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3704     }
3705     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3706                          DAG.getUNDEF(ContainerVT), SubVec,
3707                          DAG.getConstant(0, DL, XLenVT));
3708     SDValue Mask =
3709         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
3710     // Set the vector length to only the number of elements we care about. Note
3711     // that for slideup this includes the offset.
3712     SDValue VL =
3713         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
3714     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
3715     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
3716                                   SubVec, SlideupAmt, Mask, VL);
3717     if (VecVT.isFixedLengthVector())
3718       Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
3719     return DAG.getBitcast(Op.getValueType(), Slideup);
3720   }
3721 
3722   unsigned SubRegIdx, RemIdx;
3723   std::tie(SubRegIdx, RemIdx) =
3724       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
3725           VecVT, SubVecVT, OrigIdx, TRI);
3726 
3727   RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
3728   bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
3729                          SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
3730                          SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
3731 
3732   // 1. If the Idx has been completely eliminated and this subvector's size is
3733   // a vector register or a multiple thereof, or the surrounding elements are
3734   // undef, then this is a subvector insert which naturally aligns to a vector
3735   // register. These can easily be handled using subregister manipulation.
3736   // 2. If the subvector is smaller than a vector register, then the insertion
3737   // must preserve the undisturbed elements of the register. We do this by
3738   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
3739   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
3740   // subvector within the vector register, and an INSERT_SUBVECTOR of that
3741   // LMUL=1 type back into the larger vector (resolving to another subregister
3742   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
3743   // to avoid allocating a large register group to hold our subvector.
3744   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
3745     return Op;
3746 
3747   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
3748   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
3749   // (in our case undisturbed). This means we can set up a subvector insertion
3750   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
3751   // size of the subvector.
3752   MVT InterSubVT = VecVT;
3753   SDValue AlignedExtract = Vec;
3754   unsigned AlignedIdx = OrigIdx - RemIdx;
3755   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
3756     InterSubVT = getLMUL1VT(VecVT);
3757     // Extract a subvector equal to the nearest full vector register type. This
3758     // should resolve to a EXTRACT_SUBREG instruction.
3759     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
3760                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
3761   }
3762 
3763   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
3764   // For scalable vectors this must be further multiplied by vscale.
3765   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
3766 
3767   SDValue Mask, VL;
3768   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
3769 
3770   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
3771   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
3772   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
3773   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
3774 
3775   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
3776                        DAG.getUNDEF(InterSubVT), SubVec,
3777                        DAG.getConstant(0, DL, XLenVT));
3778 
3779   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
3780                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
3781 
3782   // If required, insert this subvector back into the correct vector register.
3783   // This should resolve to an INSERT_SUBREG instruction.
3784   if (VecVT.bitsGT(InterSubVT))
3785     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
3786                           DAG.getConstant(AlignedIdx, DL, XLenVT));
3787 
3788   // We might have bitcast from a mask type: cast back to the original type if
3789   // required.
3790   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
3791 }
3792 
3793 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
3794                                                     SelectionDAG &DAG) const {
3795   SDValue Vec = Op.getOperand(0);
3796   MVT SubVecVT = Op.getSimpleValueType();
3797   MVT VecVT = Vec.getSimpleValueType();
3798 
3799   SDLoc DL(Op);
3800   MVT XLenVT = Subtarget.getXLenVT();
3801   unsigned OrigIdx = Op.getConstantOperandVal(1);
3802   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3803 
3804   // We don't have the ability to slide mask vectors down indexed by their i1
3805   // elements; the smallest we can do is i8. Often we are able to bitcast to
3806   // equivalent i8 vectors. Note that when extracting a fixed-length vector
3807   // from a scalable one, we might not necessarily have enough scalable
3808   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
3809   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
3810     if (VecVT.getVectorMinNumElements() >= 8 &&
3811         SubVecVT.getVectorMinNumElements() >= 8) {
3812       assert(OrigIdx % 8 == 0 && "Invalid index");
3813       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
3814              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
3815              "Unexpected mask vector lowering");
3816       OrigIdx /= 8;
3817       SubVecVT =
3818           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
3819                            SubVecVT.isScalableVector());
3820       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
3821                                VecVT.isScalableVector());
3822       Vec = DAG.getBitcast(VecVT, Vec);
3823     } else {
3824       // We can't slide this mask vector down, indexed by its i1 elements.
3825       // This poses a problem when we wish to extract a scalable vector which
3826       // can't be re-expressed as a larger type. Just choose the slow path and
3827       // extend to a larger type, then truncate back down.
3828       // TODO: We could probably improve this when extracting certain fixed
3829       // from fixed, where we can extract as i8 and shift the correct element
3830       // right to reach the desired subvector?
3831       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
3832       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
3833       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
3834       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
3835                         Op.getOperand(1));
3836       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
3837       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
3838     }
3839   }
3840 
3841   // If the subvector vector is a fixed-length type, we cannot use subregister
3842   // manipulation to simplify the codegen; we don't know which register of a
3843   // LMUL group contains the specific subvector as we only know the minimum
3844   // register size. Therefore we must slide the vector group down the full
3845   // amount.
3846   if (SubVecVT.isFixedLengthVector()) {
3847     // With an index of 0 this is a cast-like subvector, which can be performed
3848     // with subregister operations.
3849     if (OrigIdx == 0)
3850       return Op;
3851     MVT ContainerVT = VecVT;
3852     if (VecVT.isFixedLengthVector()) {
3853       ContainerVT = getContainerForFixedLengthVector(VecVT);
3854       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3855     }
3856     SDValue Mask =
3857         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
3858     // Set the vector length to only the number of elements we care about. This
3859     // avoids sliding down elements we're going to discard straight away.
3860     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
3861     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
3862     SDValue Slidedown =
3863         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3864                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
3865     // Now we can use a cast-like subvector extract to get the result.
3866     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
3867                             DAG.getConstant(0, DL, XLenVT));
3868     return DAG.getBitcast(Op.getValueType(), Slidedown);
3869   }
3870 
3871   unsigned SubRegIdx, RemIdx;
3872   std::tie(SubRegIdx, RemIdx) =
3873       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
3874           VecVT, SubVecVT, OrigIdx, TRI);
3875 
3876   // If the Idx has been completely eliminated then this is a subvector extract
3877   // which naturally aligns to a vector register. These can easily be handled
3878   // using subregister manipulation.
3879   if (RemIdx == 0)
3880     return Op;
3881 
3882   // Else we must shift our vector register directly to extract the subvector.
3883   // Do this using VSLIDEDOWN.
3884 
3885   // If the vector type is an LMUL-group type, extract a subvector equal to the
3886   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
3887   // instruction.
3888   MVT InterSubVT = VecVT;
3889   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
3890     InterSubVT = getLMUL1VT(VecVT);
3891     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
3892                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
3893   }
3894 
3895   // Slide this vector register down by the desired number of elements in order
3896   // to place the desired subvector starting at element 0.
3897   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
3898   // For scalable vectors this must be further multiplied by vscale.
3899   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
3900 
3901   SDValue Mask, VL;
3902   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
3903   SDValue Slidedown =
3904       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
3905                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
3906 
3907   // Now the vector is in the right position, extract our final subvector. This
3908   // should resolve to a COPY.
3909   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
3910                           DAG.getConstant(0, DL, XLenVT));
3911 
3912   // We might have bitcast from a mask type: cast back to the original type if
3913   // required.
3914   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
3915 }
3916 
3917 // Lower step_vector to the vid instruction. Any non-identity step value must
3918 // be accounted for my manual expansion.
3919 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
3920                                               SelectionDAG &DAG) const {
3921   SDLoc DL(Op);
3922   MVT VT = Op.getSimpleValueType();
3923   MVT XLenVT = Subtarget.getXLenVT();
3924   SDValue Mask, VL;
3925   std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
3926   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
3927   uint64_t StepValImm = Op.getConstantOperandVal(0);
3928   if (StepValImm != 1) {
3929     assert(Op.getOperand(0).getValueType() == XLenVT &&
3930            "Unexpected step value type");
3931     if (isPowerOf2_64(StepValImm)) {
3932       SDValue StepVal =
3933           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
3934                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
3935       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
3936     } else {
3937       SDValue StepVal =
3938           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Op.getOperand(0));
3939       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
3940     }
3941   }
3942   return StepVec;
3943 }
3944 
3945 // Implement vector_reverse using vrgather.vv with indices determined by
3946 // subtracting the id of each element from (VLMAX-1). This will convert
3947 // the indices like so:
3948 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
3949 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
3950 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
3951                                                  SelectionDAG &DAG) const {
3952   SDLoc DL(Op);
3953   MVT VecVT = Op.getSimpleValueType();
3954   unsigned EltSize = VecVT.getScalarSizeInBits();
3955   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3956 
3957   unsigned MaxVLMAX = 0;
3958   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
3959   if (VectorBitsMax != 0)
3960     MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
3961 
3962   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
3963   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
3964 
3965   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
3966   // to use vrgatherei16.vv.
3967   // TODO: It's also possible to use vrgatherei16.vv for other types to
3968   // decrease register width for the index calculation.
3969   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
3970     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
3971     // Reverse each half, then reassemble them in reverse order.
3972     // NOTE: It's also possible that after splitting that VLMAX no longer
3973     // requires vrgatherei16.vv.
3974     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
3975       SDValue Lo, Hi;
3976       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
3977       EVT LoVT, HiVT;
3978       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
3979       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
3980       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
3981       // Reassemble the low and high pieces reversed.
3982       // FIXME: This is a CONCAT_VECTORS.
3983       SDValue Res =
3984           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
3985                       DAG.getIntPtrConstant(0, DL));
3986       return DAG.getNode(
3987           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
3988           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
3989     }
3990 
3991     // Just promote the int type to i16 which will double the LMUL.
3992     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
3993     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
3994   }
3995 
3996   MVT XLenVT = Subtarget.getXLenVT();
3997   SDValue Mask, VL;
3998   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
3999 
4000   // Calculate VLMAX-1 for the desired SEW.
4001   unsigned MinElts = VecVT.getVectorMinNumElements();
4002   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
4003                               DAG.getConstant(MinElts, DL, XLenVT));
4004   SDValue VLMinus1 =
4005       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
4006 
4007   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
4008   bool IsRV32E64 =
4009       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
4010   SDValue SplatVL;
4011   if (!IsRV32E64)
4012     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
4013   else
4014     SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
4015 
4016   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
4017   SDValue Indices =
4018       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
4019 
4020   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
4021 }
4022 
4023 SDValue
4024 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
4025                                                      SelectionDAG &DAG) const {
4026   auto *Load = cast<LoadSDNode>(Op);
4027 
4028   SDLoc DL(Op);
4029   MVT VT = Op.getSimpleValueType();
4030   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4031 
4032   SDValue VL =
4033       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
4034 
4035   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4036   SDValue NewLoad = DAG.getMemIntrinsicNode(
4037       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
4038       Load->getMemoryVT(), Load->getMemOperand());
4039 
4040   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4041   return DAG.getMergeValues({Result, Load->getChain()}, DL);
4042 }
4043 
4044 SDValue
4045 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
4046                                                       SelectionDAG &DAG) const {
4047   auto *Store = cast<StoreSDNode>(Op);
4048 
4049   SDLoc DL(Op);
4050   SDValue StoreVal = Store->getValue();
4051   MVT VT = StoreVal.getSimpleValueType();
4052 
4053   // If the size less than a byte, we need to pad with zeros to make a byte.
4054   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
4055     VT = MVT::v8i1;
4056     StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
4057                            DAG.getConstant(0, DL, VT), StoreVal,
4058                            DAG.getIntPtrConstant(0, DL));
4059   }
4060 
4061   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4062 
4063   SDValue VL =
4064       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
4065 
4066   SDValue NewValue =
4067       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
4068   return DAG.getMemIntrinsicNode(
4069       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
4070       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
4071       Store->getMemoryVT(), Store->getMemOperand());
4072 }
4073 
4074 SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
4075   auto *Load = cast<MaskedLoadSDNode>(Op);
4076 
4077   SDLoc DL(Op);
4078   MVT VT = Op.getSimpleValueType();
4079   MVT XLenVT = Subtarget.getXLenVT();
4080 
4081   SDValue Mask = Load->getMask();
4082   SDValue PassThru = Load->getPassThru();
4083   SDValue VL;
4084 
4085   MVT ContainerVT = VT;
4086   if (VT.isFixedLengthVector()) {
4087     ContainerVT = getContainerForFixedLengthVector(VT);
4088     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4089 
4090     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4091     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4092     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4093   } else
4094     VL = DAG.getRegister(RISCV::X0, XLenVT);
4095 
4096   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4097   SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT);
4098   SDValue Ops[] = {Load->getChain(),   IntID, PassThru,
4099                    Load->getBasePtr(), Mask,  VL};
4100   SDValue Result =
4101       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
4102                               Load->getMemoryVT(), Load->getMemOperand());
4103   SDValue Chain = Result.getValue(1);
4104 
4105   if (VT.isFixedLengthVector())
4106     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4107 
4108   return DAG.getMergeValues({Result, Chain}, DL);
4109 }
4110 
4111 SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const {
4112   auto *Store = cast<MaskedStoreSDNode>(Op);
4113 
4114   SDLoc DL(Op);
4115   SDValue Val = Store->getValue();
4116   SDValue Mask = Store->getMask();
4117   MVT VT = Val.getSimpleValueType();
4118   MVT XLenVT = Subtarget.getXLenVT();
4119   SDValue VL;
4120 
4121   MVT ContainerVT = VT;
4122   if (VT.isFixedLengthVector()) {
4123     ContainerVT = getContainerForFixedLengthVector(VT);
4124     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4125 
4126     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
4127     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4128     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4129   } else
4130     VL = DAG.getRegister(RISCV::X0, XLenVT);
4131 
4132   SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT);
4133   return DAG.getMemIntrinsicNode(
4134       ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
4135       {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL},
4136       Store->getMemoryVT(), Store->getMemOperand());
4137 }
4138 
4139 SDValue
4140 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
4141                                                       SelectionDAG &DAG) const {
4142   MVT InVT = Op.getOperand(0).getSimpleValueType();
4143   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
4144 
4145   MVT VT = Op.getSimpleValueType();
4146 
4147   SDValue Op1 =
4148       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
4149   SDValue Op2 =
4150       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
4151 
4152   SDLoc DL(Op);
4153   SDValue VL =
4154       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
4155 
4156   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4157   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4158 
4159   SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
4160                             Op.getOperand(2), Mask, VL);
4161 
4162   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
4163 }
4164 
4165 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
4166     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
4167   MVT VT = Op.getSimpleValueType();
4168 
4169   if (VT.getVectorElementType() == MVT::i1)
4170     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
4171 
4172   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
4173 }
4174 
4175 // Lower vector ABS to smax(X, sub(0, X)).
4176 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
4177   SDLoc DL(Op);
4178   MVT VT = Op.getSimpleValueType();
4179   SDValue X = Op.getOperand(0);
4180 
4181   assert(VT.isFixedLengthVector() && "Unexpected type");
4182 
4183   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4184   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
4185 
4186   SDValue Mask, VL;
4187   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4188 
4189   SDValue SplatZero =
4190       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4191                   DAG.getConstant(0, DL, Subtarget.getXLenVT()));
4192   SDValue NegX =
4193       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
4194   SDValue Max =
4195       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
4196 
4197   return convertFromScalableVector(VT, Max, DAG, Subtarget);
4198 }
4199 
4200 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
4201     SDValue Op, SelectionDAG &DAG) const {
4202   SDLoc DL(Op);
4203   MVT VT = Op.getSimpleValueType();
4204   SDValue Mag = Op.getOperand(0);
4205   SDValue Sign = Op.getOperand(1);
4206   assert(Mag.getValueType() == Sign.getValueType() &&
4207          "Can only handle COPYSIGN with matching types.");
4208 
4209   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4210   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
4211   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
4212 
4213   SDValue Mask, VL;
4214   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4215 
4216   SDValue CopySign =
4217       DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
4218 
4219   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
4220 }
4221 
4222 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
4223     SDValue Op, SelectionDAG &DAG) const {
4224   MVT VT = Op.getSimpleValueType();
4225   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4226 
4227   MVT I1ContainerVT =
4228       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4229 
4230   SDValue CC =
4231       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
4232   SDValue Op1 =
4233       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
4234   SDValue Op2 =
4235       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
4236 
4237   SDLoc DL(Op);
4238   SDValue Mask, VL;
4239   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4240 
4241   SDValue Select =
4242       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
4243 
4244   return convertFromScalableVector(VT, Select, DAG, Subtarget);
4245 }
4246 
4247 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
4248                                                unsigned NewOpc,
4249                                                bool HasMask) const {
4250   MVT VT = Op.getSimpleValueType();
4251   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4252 
4253   // Create list of operands by converting existing ones to scalable types.
4254   SmallVector<SDValue, 6> Ops;
4255   for (const SDValue &V : Op->op_values()) {
4256     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
4257 
4258     // Pass through non-vector operands.
4259     if (!V.getValueType().isVector()) {
4260       Ops.push_back(V);
4261       continue;
4262     }
4263 
4264     // "cast" fixed length vector to a scalable vector.
4265     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
4266            "Only fixed length vectors are supported!");
4267     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
4268   }
4269 
4270   SDLoc DL(Op);
4271   SDValue Mask, VL;
4272   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4273   if (HasMask)
4274     Ops.push_back(Mask);
4275   Ops.push_back(VL);
4276 
4277   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
4278   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
4279 }
4280 
4281 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
4282 // * Operands of each node are assumed to be in the same order.
4283 // * The EVL operand is promoted from i32 to i64 on RV64.
4284 // * Fixed-length vectors are converted to their scalable-vector container
4285 //   types.
4286 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
4287                                        unsigned RISCVISDOpc) const {
4288   SDLoc DL(Op);
4289   MVT VT = Op.getSimpleValueType();
4290   SmallVector<SDValue, 4> Ops;
4291 
4292   for (const auto &OpIdx : enumerate(Op->ops())) {
4293     SDValue V = OpIdx.value();
4294     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
4295     // Pass through operands which aren't fixed-length vectors.
4296     if (!V.getValueType().isFixedLengthVector()) {
4297       Ops.push_back(V);
4298       continue;
4299     }
4300     // "cast" fixed length vector to a scalable vector.
4301     MVT OpVT = V.getSimpleValueType();
4302     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
4303     assert(useRVVForFixedLengthVectorVT(OpVT) &&
4304            "Only fixed length vectors are supported!");
4305     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
4306   }
4307 
4308   if (!VT.isFixedLengthVector())
4309     return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
4310 
4311   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4312 
4313   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
4314 
4315   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
4316 }
4317 
4318 // Custom lower MGATHER to a legalized form for RVV. It will then be matched to
4319 // a RVV indexed load. The RVV indexed load instructions only support the
4320 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
4321 // truncated to XLEN and are treated as byte offsets. Any signed or scaled
4322 // indexing is extended to the XLEN value type and scaled accordingly.
4323 SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
4324   auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
4325   SDLoc DL(Op);
4326 
4327   SDValue Index = MGN->getIndex();
4328   SDValue Mask = MGN->getMask();
4329   SDValue PassThru = MGN->getPassThru();
4330 
4331   MVT VT = Op.getSimpleValueType();
4332   MVT IndexVT = Index.getSimpleValueType();
4333   MVT XLenVT = Subtarget.getXLenVT();
4334 
4335   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
4336          "Unexpected VTs!");
4337   assert(MGN->getBasePtr().getSimpleValueType() == XLenVT &&
4338          "Unexpected pointer type");
4339   // Targets have to explicitly opt-in for extending vector loads.
4340   assert(MGN->getExtensionType() == ISD::NON_EXTLOAD &&
4341          "Unexpected extending MGATHER");
4342 
4343   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4344   // the selection of the masked intrinsics doesn't do this for us.
4345   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4346 
4347   SDValue VL;
4348   MVT ContainerVT = VT;
4349   if (VT.isFixedLengthVector()) {
4350     // We need to use the larger of the result and index type to determine the
4351     // scalable type to use so we don't increase LMUL for any operand/result.
4352     if (VT.bitsGE(IndexVT)) {
4353       ContainerVT = getContainerForFixedLengthVector(VT);
4354       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
4355                                  ContainerVT.getVectorElementCount());
4356     } else {
4357       IndexVT = getContainerForFixedLengthVector(IndexVT);
4358       ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
4359                                      IndexVT.getVectorElementCount());
4360     }
4361 
4362     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
4363 
4364     if (!IsUnmasked) {
4365       MVT MaskVT =
4366           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4367       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4368       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4369     }
4370 
4371     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4372   } else
4373     VL = DAG.getRegister(RISCV::X0, XLenVT);
4374 
4375   unsigned IntID =
4376       IsUnmasked ? Intrinsic::riscv_vloxei : Intrinsic::riscv_vloxei_mask;
4377   SmallVector<SDValue, 8> Ops{MGN->getChain(),
4378                               DAG.getTargetConstant(IntID, DL, XLenVT)};
4379   if (!IsUnmasked)
4380     Ops.push_back(PassThru);
4381   Ops.push_back(MGN->getBasePtr());
4382   Ops.push_back(Index);
4383   if (!IsUnmasked)
4384     Ops.push_back(Mask);
4385   Ops.push_back(VL);
4386 
4387   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4388   SDValue Result =
4389       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
4390                               MGN->getMemoryVT(), MGN->getMemOperand());
4391   SDValue Chain = Result.getValue(1);
4392 
4393   if (VT.isFixedLengthVector())
4394     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4395 
4396   return DAG.getMergeValues({Result, Chain}, DL);
4397 }
4398 
4399 // Custom lower MSCATTER to a legalized form for RVV. It will then be matched to
4400 // a RVV indexed store. The RVV indexed store instructions only support the
4401 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
4402 // truncated to XLEN and are treated as byte offsets. Any signed or scaled
4403 // indexing is extended to the XLEN value type and scaled accordingly.
4404 SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
4405                                            SelectionDAG &DAG) const {
4406   auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
4407   SDLoc DL(Op);
4408   SDValue Index = MSN->getIndex();
4409   SDValue Mask = MSN->getMask();
4410   SDValue Val = MSN->getValue();
4411 
4412   MVT VT = Val.getSimpleValueType();
4413   MVT IndexVT = Index.getSimpleValueType();
4414   MVT XLenVT = Subtarget.getXLenVT();
4415 
4416   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
4417          "Unexpected VTs!");
4418   assert(MSN->getBasePtr().getSimpleValueType() == XLenVT &&
4419          "Unexpected pointer type");
4420   // Targets have to explicitly opt-in for extending vector loads and
4421   // truncating vector stores.
4422   assert(!MSN->isTruncatingStore() && "Unexpected extending MSCATTER");
4423 
4424   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4425   // the selection of the masked intrinsics doesn't do this for us.
4426   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4427 
4428   SDValue VL;
4429   if (VT.isFixedLengthVector()) {
4430     // We need to use the larger of the value and index type to determine the
4431     // scalable type to use so we don't increase LMUL for any operand/result.
4432     MVT ContainerVT;
4433     if (VT.bitsGE(IndexVT)) {
4434       ContainerVT = getContainerForFixedLengthVector(VT);
4435       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
4436                                  ContainerVT.getVectorElementCount());
4437     } else {
4438       IndexVT = getContainerForFixedLengthVector(IndexVT);
4439       ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
4440                                      IndexVT.getVectorElementCount());
4441     }
4442 
4443     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
4444     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
4445 
4446     if (!IsUnmasked) {
4447       MVT MaskVT =
4448           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4449       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4450     }
4451 
4452     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4453   } else
4454     VL = DAG.getRegister(RISCV::X0, XLenVT);
4455 
4456   unsigned IntID =
4457       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
4458   SmallVector<SDValue, 8> Ops{MSN->getChain(),
4459                               DAG.getTargetConstant(IntID, DL, XLenVT)};
4460   Ops.push_back(Val);
4461   Ops.push_back(MSN->getBasePtr());
4462   Ops.push_back(Index);
4463   if (!IsUnmasked)
4464     Ops.push_back(Mask);
4465   Ops.push_back(VL);
4466 
4467   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, MSN->getVTList(), Ops,
4468                                  MSN->getMemoryVT(), MSN->getMemOperand());
4469 }
4470 
4471 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
4472                                                SelectionDAG &DAG) const {
4473   const MVT XLenVT = Subtarget.getXLenVT();
4474   SDLoc DL(Op);
4475   SDValue Chain = Op->getOperand(0);
4476   SDValue SysRegNo = DAG.getConstant(
4477       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
4478   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
4479   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
4480 
4481   // Encoding used for rounding mode in RISCV differs from that used in
4482   // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
4483   // table, which consists of a sequence of 4-bit fields, each representing
4484   // corresponding FLT_ROUNDS mode.
4485   static const int Table =
4486       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
4487       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
4488       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
4489       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
4490       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
4491 
4492   SDValue Shift =
4493       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
4494   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
4495                                 DAG.getConstant(Table, DL, XLenVT), Shift);
4496   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
4497                                DAG.getConstant(7, DL, XLenVT));
4498 
4499   return DAG.getMergeValues({Masked, Chain}, DL);
4500 }
4501 
4502 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
4503                                                SelectionDAG &DAG) const {
4504   const MVT XLenVT = Subtarget.getXLenVT();
4505   SDLoc DL(Op);
4506   SDValue Chain = Op->getOperand(0);
4507   SDValue RMValue = Op->getOperand(1);
4508   SDValue SysRegNo = DAG.getConstant(
4509       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
4510 
4511   // Encoding used for rounding mode in RISCV differs from that used in
4512   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
4513   // a table, which consists of a sequence of 4-bit fields, each representing
4514   // corresponding RISCV mode.
4515   static const unsigned Table =
4516       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
4517       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
4518       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
4519       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
4520       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
4521 
4522   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
4523                               DAG.getConstant(2, DL, XLenVT));
4524   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
4525                                 DAG.getConstant(Table, DL, XLenVT), Shift);
4526   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
4527                         DAG.getConstant(0x7, DL, XLenVT));
4528   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
4529                      RMValue);
4530 }
4531 
4532 // Returns the opcode of the target-specific SDNode that implements the 32-bit
4533 // form of the given Opcode.
4534 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
4535   switch (Opcode) {
4536   default:
4537     llvm_unreachable("Unexpected opcode");
4538   case ISD::SHL:
4539     return RISCVISD::SLLW;
4540   case ISD::SRA:
4541     return RISCVISD::SRAW;
4542   case ISD::SRL:
4543     return RISCVISD::SRLW;
4544   case ISD::SDIV:
4545     return RISCVISD::DIVW;
4546   case ISD::UDIV:
4547     return RISCVISD::DIVUW;
4548   case ISD::UREM:
4549     return RISCVISD::REMUW;
4550   case ISD::ROTL:
4551     return RISCVISD::ROLW;
4552   case ISD::ROTR:
4553     return RISCVISD::RORW;
4554   case RISCVISD::GREV:
4555     return RISCVISD::GREVW;
4556   case RISCVISD::GORC:
4557     return RISCVISD::GORCW;
4558   }
4559 }
4560 
4561 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
4562 // Because i32 isn't a legal type for RV64, these operations would otherwise
4563 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
4564 // later one because the fact the operation was originally of type i32 is
4565 // lost.
4566 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
4567                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
4568   SDLoc DL(N);
4569   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
4570   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4571   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4572   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4573   // ReplaceNodeResults requires we maintain the same type for the return value.
4574   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4575 }
4576 
4577 // Converts the given 32-bit operation to a i64 operation with signed extension
4578 // semantic to reduce the signed extension instructions.
4579 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
4580   SDLoc DL(N);
4581   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4582   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4583   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4584   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4585                                DAG.getValueType(MVT::i32));
4586   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4587 }
4588 
4589 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
4590                                              SmallVectorImpl<SDValue> &Results,
4591                                              SelectionDAG &DAG) const {
4592   SDLoc DL(N);
4593   switch (N->getOpcode()) {
4594   default:
4595     llvm_unreachable("Don't know how to custom type legalize this operation!");
4596   case ISD::STRICT_FP_TO_SINT:
4597   case ISD::STRICT_FP_TO_UINT:
4598   case ISD::FP_TO_SINT:
4599   case ISD::FP_TO_UINT: {
4600     bool IsStrict = N->isStrictFPOpcode();
4601     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4602            "Unexpected custom legalisation");
4603     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
4604     // If the FP type needs to be softened, emit a library call using the 'si'
4605     // version. If we left it to default legalization we'd end up with 'di'. If
4606     // the FP type doesn't need to be softened just let generic type
4607     // legalization promote the result type.
4608     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
4609         TargetLowering::TypeSoftenFloat)
4610       return;
4611     RTLIB::Libcall LC;
4612     if (N->getOpcode() == ISD::FP_TO_SINT ||
4613         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
4614       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
4615     else
4616       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
4617     MakeLibCallOptions CallOptions;
4618     EVT OpVT = Op0.getValueType();
4619     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
4620     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4621     SDValue Result;
4622     std::tie(Result, Chain) =
4623         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
4624     Results.push_back(Result);
4625     if (IsStrict)
4626       Results.push_back(Chain);
4627     break;
4628   }
4629   case ISD::READCYCLECOUNTER: {
4630     assert(!Subtarget.is64Bit() &&
4631            "READCYCLECOUNTER only has custom type legalization on riscv32");
4632 
4633     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
4634     SDValue RCW =
4635         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
4636 
4637     Results.push_back(
4638         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
4639     Results.push_back(RCW.getValue(2));
4640     break;
4641   }
4642   case ISD::MUL: {
4643     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
4644     unsigned XLen = Subtarget.getXLen();
4645     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
4646     if (Size > XLen) {
4647       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
4648       SDValue LHS = N->getOperand(0);
4649       SDValue RHS = N->getOperand(1);
4650       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
4651 
4652       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
4653       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
4654       // We need exactly one side to be unsigned.
4655       if (LHSIsU == RHSIsU)
4656         return;
4657 
4658       auto MakeMULPair = [&](SDValue S, SDValue U) {
4659         MVT XLenVT = Subtarget.getXLenVT();
4660         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
4661         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
4662         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
4663         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
4664         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
4665       };
4666 
4667       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
4668       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
4669 
4670       // The other operand should be signed, but still prefer MULH when
4671       // possible.
4672       if (RHSIsU && LHSIsS && !RHSIsS)
4673         Results.push_back(MakeMULPair(LHS, RHS));
4674       else if (LHSIsU && RHSIsS && !LHSIsS)
4675         Results.push_back(MakeMULPair(RHS, LHS));
4676 
4677       return;
4678     }
4679     LLVM_FALLTHROUGH;
4680   }
4681   case ISD::ADD:
4682   case ISD::SUB:
4683     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4684            "Unexpected custom legalisation");
4685     if (N->getOperand(1).getOpcode() == ISD::Constant)
4686       return;
4687     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4688     break;
4689   case ISD::SHL:
4690   case ISD::SRA:
4691   case ISD::SRL:
4692     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4693            "Unexpected custom legalisation");
4694     if (N->getOperand(1).getOpcode() == ISD::Constant)
4695       return;
4696     Results.push_back(customLegalizeToWOp(N, DAG));
4697     break;
4698   case ISD::ROTL:
4699   case ISD::ROTR:
4700     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4701            "Unexpected custom legalisation");
4702     Results.push_back(customLegalizeToWOp(N, DAG));
4703     break;
4704   case ISD::CTTZ:
4705   case ISD::CTTZ_ZERO_UNDEF:
4706   case ISD::CTLZ:
4707   case ISD::CTLZ_ZERO_UNDEF: {
4708     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4709            "Unexpected custom legalisation");
4710 
4711     SDValue NewOp0 =
4712         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4713     bool IsCTZ =
4714         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
4715     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
4716     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
4717     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4718     return;
4719   }
4720   case ISD::SDIV:
4721   case ISD::UDIV:
4722   case ISD::UREM: {
4723     MVT VT = N->getSimpleValueType(0);
4724     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
4725            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
4726            "Unexpected custom legalisation");
4727     if (N->getOperand(0).getOpcode() == ISD::Constant ||
4728         N->getOperand(1).getOpcode() == ISD::Constant)
4729       return;
4730 
4731     // If the input is i32, use ANY_EXTEND since the W instructions don't read
4732     // the upper 32 bits. For other types we need to sign or zero extend
4733     // based on the opcode.
4734     unsigned ExtOpc = ISD::ANY_EXTEND;
4735     if (VT != MVT::i32)
4736       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
4737                                            : ISD::ZERO_EXTEND;
4738 
4739     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
4740     break;
4741   }
4742   case ISD::UADDO:
4743   case ISD::USUBO: {
4744     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4745            "Unexpected custom legalisation");
4746     bool IsAdd = N->getOpcode() == ISD::UADDO;
4747     // Create an ADDW or SUBW.
4748     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4749     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4750     SDValue Res =
4751         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
4752     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
4753                       DAG.getValueType(MVT::i32));
4754 
4755     // Sign extend the LHS and perform an unsigned compare with the ADDW result.
4756     // Since the inputs are sign extended from i32, this is equivalent to
4757     // comparing the lower 32 bits.
4758     LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
4759     SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
4760                                     IsAdd ? ISD::SETULT : ISD::SETUGT);
4761 
4762     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4763     Results.push_back(Overflow);
4764     return;
4765   }
4766   case ISD::UADDSAT:
4767   case ISD::USUBSAT: {
4768     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4769            "Unexpected custom legalisation");
4770     if (Subtarget.hasStdExtZbb()) {
4771       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
4772       // sign extend allows overflow of the lower 32 bits to be detected on
4773       // the promoted size.
4774       SDValue LHS =
4775           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
4776       SDValue RHS =
4777           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
4778       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
4779       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4780       return;
4781     }
4782 
4783     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
4784     // promotion for UADDO/USUBO.
4785     Results.push_back(expandAddSubSat(N, DAG));
4786     return;
4787   }
4788   case ISD::BITCAST: {
4789     EVT VT = N->getValueType(0);
4790     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
4791     SDValue Op0 = N->getOperand(0);
4792     EVT Op0VT = Op0.getValueType();
4793     MVT XLenVT = Subtarget.getXLenVT();
4794     if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) {
4795       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
4796       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
4797     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
4798                Subtarget.hasStdExtF()) {
4799       SDValue FPConv =
4800           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
4801       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
4802     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
4803                isTypeLegal(Op0VT)) {
4804       // Custom-legalize bitcasts from fixed-length vector types to illegal
4805       // scalar types in order to improve codegen. Bitcast the vector to a
4806       // one-element vector type whose element type is the same as the result
4807       // type, and extract the first element.
4808       LLVMContext &Context = *DAG.getContext();
4809       SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
4810       Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
4811                                     DAG.getConstant(0, DL, XLenVT)));
4812     }
4813     break;
4814   }
4815   case RISCVISD::GREV:
4816   case RISCVISD::GORC: {
4817     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4818            "Unexpected custom legalisation");
4819     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
4820     // This is similar to customLegalizeToWOp, except that we pass the second
4821     // operand (a TargetConstant) straight through: it is already of type
4822     // XLenVT.
4823     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
4824     SDValue NewOp0 =
4825         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4826     SDValue NewOp1 =
4827         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4828     SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4829     // ReplaceNodeResults requires we maintain the same type for the return
4830     // value.
4831     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
4832     break;
4833   }
4834   case RISCVISD::SHFL: {
4835     // There is no SHFLIW instruction, but we can just promote the operation.
4836     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4837            "Unexpected custom legalisation");
4838     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
4839     SDValue NewOp0 =
4840         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4841     SDValue NewOp1 =
4842         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4843     SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1);
4844     // ReplaceNodeResults requires we maintain the same type for the return
4845     // value.
4846     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
4847     break;
4848   }
4849   case ISD::BSWAP:
4850   case ISD::BITREVERSE: {
4851     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4852            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
4853     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4854                                  N->getOperand(0));
4855     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
4856     SDValue GREVIW = DAG.getNode(RISCVISD::GREVW, DL, MVT::i64, NewOp0,
4857                                  DAG.getConstant(Imm, DL, MVT::i64));
4858     // ReplaceNodeResults requires we maintain the same type for the return
4859     // value.
4860     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
4861     break;
4862   }
4863   case ISD::FSHL:
4864   case ISD::FSHR: {
4865     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4866            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
4867     SDValue NewOp0 =
4868         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4869     SDValue NewOp1 =
4870         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4871     SDValue NewOp2 =
4872         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
4873     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
4874     // Mask the shift amount to 5 bits.
4875     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
4876                          DAG.getConstant(0x1f, DL, MVT::i64));
4877     unsigned Opc =
4878         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
4879     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
4880     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
4881     break;
4882   }
4883   case ISD::EXTRACT_VECTOR_ELT: {
4884     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
4885     // type is illegal (currently only vXi64 RV32).
4886     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
4887     // transferred to the destination register. We issue two of these from the
4888     // upper- and lower- halves of the SEW-bit vector element, slid down to the
4889     // first element.
4890     SDValue Vec = N->getOperand(0);
4891     SDValue Idx = N->getOperand(1);
4892 
4893     // The vector type hasn't been legalized yet so we can't issue target
4894     // specific nodes if it needs legalization.
4895     // FIXME: We would manually legalize if it's important.
4896     if (!isTypeLegal(Vec.getValueType()))
4897       return;
4898 
4899     MVT VecVT = Vec.getSimpleValueType();
4900 
4901     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
4902            VecVT.getVectorElementType() == MVT::i64 &&
4903            "Unexpected EXTRACT_VECTOR_ELT legalization");
4904 
4905     // If this is a fixed vector, we need to convert it to a scalable vector.
4906     MVT ContainerVT = VecVT;
4907     if (VecVT.isFixedLengthVector()) {
4908       ContainerVT = getContainerForFixedLengthVector(VecVT);
4909       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4910     }
4911 
4912     MVT XLenVT = Subtarget.getXLenVT();
4913 
4914     // Use a VL of 1 to avoid processing more elements than we need.
4915     MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
4916     SDValue VL = DAG.getConstant(1, DL, XLenVT);
4917     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4918 
4919     // Unless the index is known to be 0, we must slide the vector down to get
4920     // the desired element into index 0.
4921     if (!isNullConstant(Idx)) {
4922       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
4923                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
4924     }
4925 
4926     // Extract the lower XLEN bits of the correct vector element.
4927     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
4928 
4929     // To extract the upper XLEN bits of the vector element, shift the first
4930     // element right by 32 bits and re-extract the lower XLEN bits.
4931     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4932                                      DAG.getConstant(32, DL, XLenVT), VL);
4933     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
4934                                  ThirtyTwoV, Mask, VL);
4935 
4936     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
4937 
4938     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
4939     break;
4940   }
4941   case ISD::INTRINSIC_WO_CHAIN: {
4942     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4943     switch (IntNo) {
4944     default:
4945       llvm_unreachable(
4946           "Don't know how to custom type legalize this intrinsic!");
4947     case Intrinsic::riscv_orc_b: {
4948       // Lower to the GORCI encoding for orc.b with the operand extended.
4949       SDValue NewOp =
4950           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4951       // If Zbp is enabled, use GORCIW which will sign extend the result.
4952       unsigned Opc =
4953           Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC;
4954       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp,
4955                                 DAG.getConstant(7, DL, MVT::i64));
4956       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4957       return;
4958     }
4959     case Intrinsic::riscv_grev:
4960     case Intrinsic::riscv_gorc: {
4961       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4962              "Unexpected custom legalisation");
4963       SDValue NewOp1 =
4964           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4965       SDValue NewOp2 =
4966           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
4967       unsigned Opc =
4968           IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW;
4969       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
4970       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4971       break;
4972     }
4973     case Intrinsic::riscv_shfl:
4974     case Intrinsic::riscv_unshfl: {
4975       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4976              "Unexpected custom legalisation");
4977       SDValue NewOp1 =
4978           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4979       SDValue NewOp2 =
4980           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
4981       unsigned Opc =
4982           IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
4983       if (isa<ConstantSDNode>(N->getOperand(2))) {
4984         NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
4985                              DAG.getConstant(0xf, DL, MVT::i64));
4986         Opc =
4987             IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
4988       }
4989       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
4990       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4991       break;
4992     }
4993     case Intrinsic::riscv_bcompress:
4994     case Intrinsic::riscv_bdecompress: {
4995       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4996              "Unexpected custom legalisation");
4997       SDValue NewOp1 =
4998           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4999       SDValue NewOp2 =
5000           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
5001       unsigned Opc = IntNo == Intrinsic::riscv_bcompress
5002                          ? RISCVISD::BCOMPRESSW
5003                          : RISCVISD::BDECOMPRESSW;
5004       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
5005       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5006       break;
5007     }
5008     case Intrinsic::riscv_vmv_x_s: {
5009       EVT VT = N->getValueType(0);
5010       MVT XLenVT = Subtarget.getXLenVT();
5011       if (VT.bitsLT(XLenVT)) {
5012         // Simple case just extract using vmv.x.s and truncate.
5013         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
5014                                       Subtarget.getXLenVT(), N->getOperand(1));
5015         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
5016         return;
5017       }
5018 
5019       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
5020              "Unexpected custom legalization");
5021 
5022       // We need to do the move in two steps.
5023       SDValue Vec = N->getOperand(1);
5024       MVT VecVT = Vec.getSimpleValueType();
5025 
5026       // First extract the lower XLEN bits of the element.
5027       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
5028 
5029       // To extract the upper XLEN bits of the vector element, shift the first
5030       // element right by 32 bits and re-extract the lower XLEN bits.
5031       SDValue VL = DAG.getConstant(1, DL, XLenVT);
5032       MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
5033       SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
5034       SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
5035                                        DAG.getConstant(32, DL, XLenVT), VL);
5036       SDValue LShr32 =
5037           DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
5038       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
5039 
5040       Results.push_back(
5041           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
5042       break;
5043     }
5044     }
5045     break;
5046   }
5047   case ISD::VECREDUCE_ADD:
5048   case ISD::VECREDUCE_AND:
5049   case ISD::VECREDUCE_OR:
5050   case ISD::VECREDUCE_XOR:
5051   case ISD::VECREDUCE_SMAX:
5052   case ISD::VECREDUCE_UMAX:
5053   case ISD::VECREDUCE_SMIN:
5054   case ISD::VECREDUCE_UMIN:
5055     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
5056       Results.push_back(V);
5057     break;
5058   case ISD::FLT_ROUNDS_: {
5059     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
5060     SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
5061     Results.push_back(Res.getValue(0));
5062     Results.push_back(Res.getValue(1));
5063     break;
5064   }
5065   }
5066 }
5067 
5068 // A structure to hold one of the bit-manipulation patterns below. Together, a
5069 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
5070 //   (or (and (shl x, 1), 0xAAAAAAAA),
5071 //       (and (srl x, 1), 0x55555555))
5072 struct RISCVBitmanipPat {
5073   SDValue Op;
5074   unsigned ShAmt;
5075   bool IsSHL;
5076 
5077   bool formsPairWith(const RISCVBitmanipPat &Other) const {
5078     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
5079   }
5080 };
5081 
5082 // Matches patterns of the form
5083 //   (and (shl x, C2), (C1 << C2))
5084 //   (and (srl x, C2), C1)
5085 //   (shl (and x, C1), C2)
5086 //   (srl (and x, (C1 << C2)), C2)
5087 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
5088 // The expected masks for each shift amount are specified in BitmanipMasks where
5089 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
5090 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
5091 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
5092 // XLen is 64.
5093 static Optional<RISCVBitmanipPat>
5094 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
5095   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
5096          "Unexpected number of masks");
5097   Optional<uint64_t> Mask;
5098   // Optionally consume a mask around the shift operation.
5099   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
5100     Mask = Op.getConstantOperandVal(1);
5101     Op = Op.getOperand(0);
5102   }
5103   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
5104     return None;
5105   bool IsSHL = Op.getOpcode() == ISD::SHL;
5106 
5107   if (!isa<ConstantSDNode>(Op.getOperand(1)))
5108     return None;
5109   uint64_t ShAmt = Op.getConstantOperandVal(1);
5110 
5111   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
5112   if (ShAmt >= Width && !isPowerOf2_64(ShAmt))
5113     return None;
5114   // If we don't have enough masks for 64 bit, then we must be trying to
5115   // match SHFL so we're only allowed to shift 1/4 of the width.
5116   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
5117     return None;
5118 
5119   SDValue Src = Op.getOperand(0);
5120 
5121   // The expected mask is shifted left when the AND is found around SHL
5122   // patterns.
5123   //   ((x >> 1) & 0x55555555)
5124   //   ((x << 1) & 0xAAAAAAAA)
5125   bool SHLExpMask = IsSHL;
5126 
5127   if (!Mask) {
5128     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
5129     // the mask is all ones: consume that now.
5130     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
5131       Mask = Src.getConstantOperandVal(1);
5132       Src = Src.getOperand(0);
5133       // The expected mask is now in fact shifted left for SRL, so reverse the
5134       // decision.
5135       //   ((x & 0xAAAAAAAA) >> 1)
5136       //   ((x & 0x55555555) << 1)
5137       SHLExpMask = !SHLExpMask;
5138     } else {
5139       // Use a default shifted mask of all-ones if there's no AND, truncated
5140       // down to the expected width. This simplifies the logic later on.
5141       Mask = maskTrailingOnes<uint64_t>(Width);
5142       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
5143     }
5144   }
5145 
5146   unsigned MaskIdx = Log2_32(ShAmt);
5147   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
5148 
5149   if (SHLExpMask)
5150     ExpMask <<= ShAmt;
5151 
5152   if (Mask != ExpMask)
5153     return None;
5154 
5155   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
5156 }
5157 
5158 // Matches any of the following bit-manipulation patterns:
5159 //   (and (shl x, 1), (0x55555555 << 1))
5160 //   (and (srl x, 1), 0x55555555)
5161 //   (shl (and x, 0x55555555), 1)
5162 //   (srl (and x, (0x55555555 << 1)), 1)
5163 // where the shift amount and mask may vary thus:
5164 //   [1]  = 0x55555555 / 0xAAAAAAAA
5165 //   [2]  = 0x33333333 / 0xCCCCCCCC
5166 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
5167 //   [8]  = 0x00FF00FF / 0xFF00FF00
5168 //   [16] = 0x0000FFFF / 0xFFFFFFFF
5169 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
5170 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
5171   // These are the unshifted masks which we use to match bit-manipulation
5172   // patterns. They may be shifted left in certain circumstances.
5173   static const uint64_t BitmanipMasks[] = {
5174       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
5175       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
5176 
5177   return matchRISCVBitmanipPat(Op, BitmanipMasks);
5178 }
5179 
5180 // Match the following pattern as a GREVI(W) operation
5181 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
5182 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
5183                                const RISCVSubtarget &Subtarget) {
5184   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
5185   EVT VT = Op.getValueType();
5186 
5187   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
5188     auto LHS = matchGREVIPat(Op.getOperand(0));
5189     auto RHS = matchGREVIPat(Op.getOperand(1));
5190     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
5191       SDLoc DL(Op);
5192       return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op,
5193                          DAG.getConstant(LHS->ShAmt, DL, VT));
5194     }
5195   }
5196   return SDValue();
5197 }
5198 
5199 // Matches any the following pattern as a GORCI(W) operation
5200 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
5201 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
5202 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
5203 // Note that with the variant of 3.,
5204 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
5205 // the inner pattern will first be matched as GREVI and then the outer
5206 // pattern will be matched to GORC via the first rule above.
5207 // 4.  (or (rotl/rotr x, bitwidth/2), x)
5208 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
5209                                const RISCVSubtarget &Subtarget) {
5210   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
5211   EVT VT = Op.getValueType();
5212 
5213   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
5214     SDLoc DL(Op);
5215     SDValue Op0 = Op.getOperand(0);
5216     SDValue Op1 = Op.getOperand(1);
5217 
5218     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
5219       if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X &&
5220           isa<ConstantSDNode>(Reverse.getOperand(1)) &&
5221           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
5222         return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1));
5223       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
5224       if ((Reverse.getOpcode() == ISD::ROTL ||
5225            Reverse.getOpcode() == ISD::ROTR) &&
5226           Reverse.getOperand(0) == X &&
5227           isa<ConstantSDNode>(Reverse.getOperand(1))) {
5228         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
5229         if (RotAmt == (VT.getSizeInBits() / 2))
5230           return DAG.getNode(RISCVISD::GORC, DL, VT, X,
5231                              DAG.getConstant(RotAmt, DL, VT));
5232       }
5233       return SDValue();
5234     };
5235 
5236     // Check for either commutable permutation of (or (GREVI x, shamt), x)
5237     if (SDValue V = MatchOROfReverse(Op0, Op1))
5238       return V;
5239     if (SDValue V = MatchOROfReverse(Op1, Op0))
5240       return V;
5241 
5242     // OR is commutable so canonicalize its OR operand to the left
5243     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
5244       std::swap(Op0, Op1);
5245     if (Op0.getOpcode() != ISD::OR)
5246       return SDValue();
5247     SDValue OrOp0 = Op0.getOperand(0);
5248     SDValue OrOp1 = Op0.getOperand(1);
5249     auto LHS = matchGREVIPat(OrOp0);
5250     // OR is commutable so swap the operands and try again: x might have been
5251     // on the left
5252     if (!LHS) {
5253       std::swap(OrOp0, OrOp1);
5254       LHS = matchGREVIPat(OrOp0);
5255     }
5256     auto RHS = matchGREVIPat(Op1);
5257     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
5258       return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op,
5259                          DAG.getConstant(LHS->ShAmt, DL, VT));
5260     }
5261   }
5262   return SDValue();
5263 }
5264 
5265 // Matches any of the following bit-manipulation patterns:
5266 //   (and (shl x, 1), (0x22222222 << 1))
5267 //   (and (srl x, 1), 0x22222222)
5268 //   (shl (and x, 0x22222222), 1)
5269 //   (srl (and x, (0x22222222 << 1)), 1)
5270 // where the shift amount and mask may vary thus:
5271 //   [1]  = 0x22222222 / 0x44444444
5272 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
5273 //   [4]  = 0x00F000F0 / 0x0F000F00
5274 //   [8]  = 0x0000FF00 / 0x00FF0000
5275 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
5276 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
5277   // These are the unshifted masks which we use to match bit-manipulation
5278   // patterns. They may be shifted left in certain circumstances.
5279   static const uint64_t BitmanipMasks[] = {
5280       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
5281       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
5282 
5283   return matchRISCVBitmanipPat(Op, BitmanipMasks);
5284 }
5285 
5286 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
5287 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
5288                                const RISCVSubtarget &Subtarget) {
5289   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
5290   EVT VT = Op.getValueType();
5291 
5292   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
5293     return SDValue();
5294 
5295   SDValue Op0 = Op.getOperand(0);
5296   SDValue Op1 = Op.getOperand(1);
5297 
5298   // Or is commutable so canonicalize the second OR to the LHS.
5299   if (Op0.getOpcode() != ISD::OR)
5300     std::swap(Op0, Op1);
5301   if (Op0.getOpcode() != ISD::OR)
5302     return SDValue();
5303 
5304   // We found an inner OR, so our operands are the operands of the inner OR
5305   // and the other operand of the outer OR.
5306   SDValue A = Op0.getOperand(0);
5307   SDValue B = Op0.getOperand(1);
5308   SDValue C = Op1;
5309 
5310   auto Match1 = matchSHFLPat(A);
5311   auto Match2 = matchSHFLPat(B);
5312 
5313   // If neither matched, we failed.
5314   if (!Match1 && !Match2)
5315     return SDValue();
5316 
5317   // We had at least one match. if one failed, try the remaining C operand.
5318   if (!Match1) {
5319     std::swap(A, C);
5320     Match1 = matchSHFLPat(A);
5321     if (!Match1)
5322       return SDValue();
5323   } else if (!Match2) {
5324     std::swap(B, C);
5325     Match2 = matchSHFLPat(B);
5326     if (!Match2)
5327       return SDValue();
5328   }
5329   assert(Match1 && Match2);
5330 
5331   // Make sure our matches pair up.
5332   if (!Match1->formsPairWith(*Match2))
5333     return SDValue();
5334 
5335   // All the remains is to make sure C is an AND with the same input, that masks
5336   // out the bits that are being shuffled.
5337   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
5338       C.getOperand(0) != Match1->Op)
5339     return SDValue();
5340 
5341   uint64_t Mask = C.getConstantOperandVal(1);
5342 
5343   static const uint64_t BitmanipMasks[] = {
5344       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
5345       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
5346   };
5347 
5348   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
5349   unsigned MaskIdx = Log2_32(Match1->ShAmt);
5350   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
5351 
5352   if (Mask != ExpMask)
5353     return SDValue();
5354 
5355   SDLoc DL(Op);
5356   return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op,
5357                      DAG.getConstant(Match1->ShAmt, DL, VT));
5358 }
5359 
5360 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
5361 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
5362 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
5363 // not undo itself, but they are redundant.
5364 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
5365   SDValue Src = N->getOperand(0);
5366 
5367   if (Src.getOpcode() != N->getOpcode())
5368     return SDValue();
5369 
5370   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
5371       !isa<ConstantSDNode>(Src.getOperand(1)))
5372     return SDValue();
5373 
5374   unsigned ShAmt1 = N->getConstantOperandVal(1);
5375   unsigned ShAmt2 = Src.getConstantOperandVal(1);
5376   Src = Src.getOperand(0);
5377 
5378   unsigned CombinedShAmt;
5379   if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW)
5380     CombinedShAmt = ShAmt1 | ShAmt2;
5381   else
5382     CombinedShAmt = ShAmt1 ^ ShAmt2;
5383 
5384   if (CombinedShAmt == 0)
5385     return Src;
5386 
5387   SDLoc DL(N);
5388   return DAG.getNode(
5389       N->getOpcode(), DL, N->getValueType(0), Src,
5390       DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
5391 }
5392 
5393 // Combine a constant select operand into its use:
5394 //
5395 // (and (select_cc lhs, rhs, cc, -1, c), x)
5396 //   -> (select_cc lhs, rhs, cc, x, (and, x, c))  [AllOnes=1]
5397 // (or  (select_cc lhs, rhs, cc, 0, c), x)
5398 //   -> (select_cc lhs, rhs, cc, x, (or, x, c))  [AllOnes=0]
5399 // (xor (select_cc lhs, rhs, cc, 0, c), x)
5400 //   -> (select_cc lhs, rhs, cc, x, (xor, x, c))  [AllOnes=0]
5401 static SDValue combineSelectCCAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
5402                                      SelectionDAG &DAG, bool AllOnes) {
5403   EVT VT = N->getValueType(0);
5404 
5405   if (Slct.getOpcode() != RISCVISD::SELECT_CC || !Slct.hasOneUse())
5406     return SDValue();
5407 
5408   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
5409     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
5410   };
5411 
5412   bool SwapSelectOps;
5413   SDValue TrueVal = Slct.getOperand(3);
5414   SDValue FalseVal = Slct.getOperand(4);
5415   SDValue NonConstantVal;
5416   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
5417     SwapSelectOps = false;
5418     NonConstantVal = FalseVal;
5419   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
5420     SwapSelectOps = true;
5421     NonConstantVal = TrueVal;
5422   } else
5423     return SDValue();
5424 
5425   // Slct is now know to be the desired identity constant when CC is true.
5426   TrueVal = OtherOp;
5427   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
5428   // Unless SwapSelectOps says CC should be false.
5429   if (SwapSelectOps)
5430     std::swap(TrueVal, FalseVal);
5431 
5432   return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
5433                      {Slct.getOperand(0), Slct.getOperand(1),
5434                       Slct.getOperand(2), TrueVal, FalseVal});
5435 }
5436 
5437 // Attempt combineSelectAndUse on each operand of a commutative operator N.
5438 static SDValue combineSelectCCAndUseCommutative(SDNode *N, SelectionDAG &DAG,
5439                                                 bool AllOnes) {
5440   SDValue N0 = N->getOperand(0);
5441   SDValue N1 = N->getOperand(1);
5442   if (SDValue Result = combineSelectCCAndUse(N, N0, N1, DAG, AllOnes))
5443     return Result;
5444   if (SDValue Result = combineSelectCCAndUse(N, N1, N0, DAG, AllOnes))
5445     return Result;
5446   return SDValue();
5447 }
5448 
5449 static SDValue performANDCombine(SDNode *N,
5450                                  TargetLowering::DAGCombinerInfo &DCI,
5451                                  const RISCVSubtarget &Subtarget) {
5452   SelectionDAG &DAG = DCI.DAG;
5453 
5454   // fold (and (select_cc lhs, rhs, cc, -1, y), x) ->
5455   //      (select lhs, rhs, cc, x, (and x, y))
5456   return combineSelectCCAndUseCommutative(N, DAG, true);
5457 }
5458 
5459 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
5460                                 const RISCVSubtarget &Subtarget) {
5461   SelectionDAG &DAG = DCI.DAG;
5462   if (Subtarget.hasStdExtZbp()) {
5463     if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
5464       return GREV;
5465     if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget))
5466       return GORC;
5467     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget))
5468       return SHFL;
5469   }
5470 
5471   // fold (or (select_cc lhs, rhs, cc, 0, y), x) ->
5472   //      (select lhs, rhs, cc, x, (or x, y))
5473   return combineSelectCCAndUseCommutative(N, DAG, false);
5474 }
5475 
5476 static SDValue performXORCombine(SDNode *N,
5477                                  TargetLowering::DAGCombinerInfo &DCI,
5478                                  const RISCVSubtarget &Subtarget) {
5479   SelectionDAG &DAG = DCI.DAG;
5480 
5481   // fold (xor (select_cc lhs, rhs, cc, 0, y), x) ->
5482   //      (select lhs, rhs, cc, x, (xor x, y))
5483   return combineSelectCCAndUseCommutative(N, DAG, false);
5484 }
5485 
5486 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
5487                                                DAGCombinerInfo &DCI) const {
5488   SelectionDAG &DAG = DCI.DAG;
5489 
5490   switch (N->getOpcode()) {
5491   default:
5492     break;
5493   case RISCVISD::SplitF64: {
5494     SDValue Op0 = N->getOperand(0);
5495     // If the input to SplitF64 is just BuildPairF64 then the operation is
5496     // redundant. Instead, use BuildPairF64's operands directly.
5497     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
5498       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
5499 
5500     SDLoc DL(N);
5501 
5502     // It's cheaper to materialise two 32-bit integers than to load a double
5503     // from the constant pool and transfer it to integer registers through the
5504     // stack.
5505     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
5506       APInt V = C->getValueAPF().bitcastToAPInt();
5507       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
5508       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
5509       return DCI.CombineTo(N, Lo, Hi);
5510     }
5511 
5512     // This is a target-specific version of a DAGCombine performed in
5513     // DAGCombiner::visitBITCAST. It performs the equivalent of:
5514     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
5515     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
5516     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
5517         !Op0.getNode()->hasOneUse())
5518       break;
5519     SDValue NewSplitF64 =
5520         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
5521                     Op0.getOperand(0));
5522     SDValue Lo = NewSplitF64.getValue(0);
5523     SDValue Hi = NewSplitF64.getValue(1);
5524     APInt SignBit = APInt::getSignMask(32);
5525     if (Op0.getOpcode() == ISD::FNEG) {
5526       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
5527                                   DAG.getConstant(SignBit, DL, MVT::i32));
5528       return DCI.CombineTo(N, Lo, NewHi);
5529     }
5530     assert(Op0.getOpcode() == ISD::FABS);
5531     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
5532                                 DAG.getConstant(~SignBit, DL, MVT::i32));
5533     return DCI.CombineTo(N, Lo, NewHi);
5534   }
5535   case RISCVISD::SLLW:
5536   case RISCVISD::SRAW:
5537   case RISCVISD::SRLW:
5538   case RISCVISD::ROLW:
5539   case RISCVISD::RORW: {
5540     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
5541     SDValue LHS = N->getOperand(0);
5542     SDValue RHS = N->getOperand(1);
5543     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
5544     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
5545     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
5546         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
5547       if (N->getOpcode() != ISD::DELETED_NODE)
5548         DCI.AddToWorklist(N);
5549       return SDValue(N, 0);
5550     }
5551     break;
5552   }
5553   case RISCVISD::CLZW:
5554   case RISCVISD::CTZW: {
5555     // Only the lower 32 bits of the first operand are read
5556     SDValue Op0 = N->getOperand(0);
5557     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
5558     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
5559       if (N->getOpcode() != ISD::DELETED_NODE)
5560         DCI.AddToWorklist(N);
5561       return SDValue(N, 0);
5562     }
5563     break;
5564   }
5565   case RISCVISD::FSL:
5566   case RISCVISD::FSR: {
5567     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
5568     SDValue ShAmt = N->getOperand(2);
5569     unsigned BitWidth = ShAmt.getValueSizeInBits();
5570     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
5571     APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
5572     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
5573       if (N->getOpcode() != ISD::DELETED_NODE)
5574         DCI.AddToWorklist(N);
5575       return SDValue(N, 0);
5576     }
5577     break;
5578   }
5579   case RISCVISD::FSLW:
5580   case RISCVISD::FSRW: {
5581     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
5582     // read.
5583     SDValue Op0 = N->getOperand(0);
5584     SDValue Op1 = N->getOperand(1);
5585     SDValue ShAmt = N->getOperand(2);
5586     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
5587     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
5588     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
5589         SimplifyDemandedBits(Op1, OpMask, DCI) ||
5590         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
5591       if (N->getOpcode() != ISD::DELETED_NODE)
5592         DCI.AddToWorklist(N);
5593       return SDValue(N, 0);
5594     }
5595     break;
5596   }
5597   case RISCVISD::GREV:
5598   case RISCVISD::GORC: {
5599     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
5600     SDValue ShAmt = N->getOperand(1);
5601     unsigned BitWidth = ShAmt.getValueSizeInBits();
5602     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
5603     APInt ShAmtMask(BitWidth, BitWidth - 1);
5604     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
5605       if (N->getOpcode() != ISD::DELETED_NODE)
5606         DCI.AddToWorklist(N);
5607       return SDValue(N, 0);
5608     }
5609 
5610     return combineGREVI_GORCI(N, DCI.DAG);
5611   }
5612   case RISCVISD::GREVW:
5613   case RISCVISD::GORCW: {
5614     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
5615     SDValue LHS = N->getOperand(0);
5616     SDValue RHS = N->getOperand(1);
5617     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
5618     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
5619     if (SimplifyDemandedBits(LHS, LHSMask, DCI) ||
5620         SimplifyDemandedBits(RHS, RHSMask, DCI)) {
5621       if (N->getOpcode() != ISD::DELETED_NODE)
5622         DCI.AddToWorklist(N);
5623       return SDValue(N, 0);
5624     }
5625 
5626     return combineGREVI_GORCI(N, DCI.DAG);
5627   }
5628   case RISCVISD::SHFL:
5629   case RISCVISD::UNSHFL: {
5630     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
5631     SDValue ShAmt = N->getOperand(1);
5632     unsigned BitWidth = ShAmt.getValueSizeInBits();
5633     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
5634     APInt ShAmtMask(BitWidth, (BitWidth / 2) - 1);
5635     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
5636       if (N->getOpcode() != ISD::DELETED_NODE)
5637         DCI.AddToWorklist(N);
5638       return SDValue(N, 0);
5639     }
5640 
5641     break;
5642   }
5643   case RISCVISD::SHFLW:
5644   case RISCVISD::UNSHFLW: {
5645     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
5646     SDValue LHS = N->getOperand(0);
5647     SDValue RHS = N->getOperand(1);
5648     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
5649     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
5650     if (SimplifyDemandedBits(LHS, LHSMask, DCI) ||
5651         SimplifyDemandedBits(RHS, RHSMask, DCI)) {
5652       if (N->getOpcode() != ISD::DELETED_NODE)
5653         DCI.AddToWorklist(N);
5654       return SDValue(N, 0);
5655     }
5656 
5657     break;
5658   }
5659   case RISCVISD::BCOMPRESSW:
5660   case RISCVISD::BDECOMPRESSW: {
5661     // Only the lower 32 bits of LHS and RHS are read.
5662     SDValue LHS = N->getOperand(0);
5663     SDValue RHS = N->getOperand(1);
5664     APInt Mask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
5665     if (SimplifyDemandedBits(LHS, Mask, DCI) ||
5666         SimplifyDemandedBits(RHS, Mask, DCI)) {
5667       if (N->getOpcode() != ISD::DELETED_NODE)
5668         DCI.AddToWorklist(N);
5669       return SDValue(N, 0);
5670     }
5671 
5672     break;
5673   }
5674   case RISCVISD::FMV_X_ANYEXTW_RV64: {
5675     SDLoc DL(N);
5676     SDValue Op0 = N->getOperand(0);
5677     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
5678     // conversion is unnecessary and can be replaced with an ANY_EXTEND
5679     // of the FMV_W_X_RV64 operand.
5680     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
5681       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
5682              "Unexpected value type!");
5683       return Op0.getOperand(0);
5684     }
5685 
5686     // This is a target-specific version of a DAGCombine performed in
5687     // DAGCombiner::visitBITCAST. It performs the equivalent of:
5688     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
5689     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
5690     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
5691         !Op0.getNode()->hasOneUse())
5692       break;
5693     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
5694                                  Op0.getOperand(0));
5695     APInt SignBit = APInt::getSignMask(32).sext(64);
5696     if (Op0.getOpcode() == ISD::FNEG)
5697       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
5698                          DAG.getConstant(SignBit, DL, MVT::i64));
5699 
5700     assert(Op0.getOpcode() == ISD::FABS);
5701     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
5702                        DAG.getConstant(~SignBit, DL, MVT::i64));
5703   }
5704   case ISD::AND:
5705     return performANDCombine(N, DCI, Subtarget);
5706   case ISD::OR:
5707     return performORCombine(N, DCI, Subtarget);
5708   case ISD::XOR:
5709     return performXORCombine(N, DCI, Subtarget);
5710   case RISCVISD::SELECT_CC: {
5711     // Transform
5712     SDValue LHS = N->getOperand(0);
5713     SDValue RHS = N->getOperand(1);
5714     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
5715     if (!ISD::isIntEqualitySetCC(CCVal))
5716       break;
5717 
5718     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
5719     //      (select_cc X, Y, lt, trueV, falseV)
5720     // Sometimes the setcc is introduced after select_cc has been formed.
5721     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5722         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
5723       // If we're looking for eq 0 instead of ne 0, we need to invert the
5724       // condition.
5725       bool Invert = CCVal == ISD::SETEQ;
5726       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5727       if (Invert)
5728         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5729 
5730       SDLoc DL(N);
5731       RHS = LHS.getOperand(1);
5732       LHS = LHS.getOperand(0);
5733       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5734 
5735       SDValue TargetCC =
5736           DAG.getTargetConstant(CCVal, DL, Subtarget.getXLenVT());
5737       return DAG.getNode(
5738           RISCVISD::SELECT_CC, DL, N->getValueType(0),
5739           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
5740     }
5741 
5742     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
5743     //      (select_cc X, Y, eq/ne, trueV, falseV)
5744     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
5745       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
5746                          {LHS.getOperand(0), LHS.getOperand(1),
5747                           N->getOperand(2), N->getOperand(3),
5748                           N->getOperand(4)});
5749     // (select_cc X, 1, setne, trueV, falseV) ->
5750     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
5751     // This can occur when legalizing some floating point comparisons.
5752     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5753     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5754       SDLoc DL(N);
5755       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5756       SDValue TargetCC =
5757           DAG.getTargetConstant(CCVal, DL, Subtarget.getXLenVT());
5758       RHS = DAG.getConstant(0, DL, LHS.getValueType());
5759       return DAG.getNode(
5760           RISCVISD::SELECT_CC, DL, N->getValueType(0),
5761           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
5762     }
5763 
5764     break;
5765   }
5766   case RISCVISD::BR_CC: {
5767     SDValue LHS = N->getOperand(1);
5768     SDValue RHS = N->getOperand(2);
5769     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
5770     if (!ISD::isIntEqualitySetCC(CCVal))
5771       break;
5772 
5773     // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
5774     //      (br_cc X, Y, lt, dest)
5775     // Sometimes the setcc is introduced after br_cc has been formed.
5776     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5777         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
5778       // If we're looking for eq 0 instead of ne 0, we need to invert the
5779       // condition.
5780       bool Invert = CCVal == ISD::SETEQ;
5781       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5782       if (Invert)
5783         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5784 
5785       SDLoc DL(N);
5786       RHS = LHS.getOperand(1);
5787       LHS = LHS.getOperand(0);
5788       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5789 
5790       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
5791                          N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
5792                          N->getOperand(4));
5793     }
5794 
5795     // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
5796     //      (br_cc X, Y, eq/ne, trueV, falseV)
5797     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
5798       return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
5799                          N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
5800                          N->getOperand(3), N->getOperand(4));
5801 
5802     // (br_cc X, 1, setne, br_cc) ->
5803     // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
5804     // This can occur when legalizing some floating point comparisons.
5805     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5806     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5807       SDLoc DL(N);
5808       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5809       SDValue TargetCC = DAG.getCondCode(CCVal);
5810       RHS = DAG.getConstant(0, DL, LHS.getValueType());
5811       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
5812                          N->getOperand(0), LHS, RHS, TargetCC,
5813                          N->getOperand(4));
5814     }
5815     break;
5816   }
5817   case ISD::FCOPYSIGN: {
5818     EVT VT = N->getValueType(0);
5819     if (!VT.isVector())
5820       break;
5821     // There is a form of VFSGNJ which injects the negated sign of its second
5822     // operand. Try and bubble any FNEG up after the extend/round to produce
5823     // this optimized pattern. Avoid modifying cases where FP_ROUND and
5824     // TRUNC=1.
5825     SDValue In2 = N->getOperand(1);
5826     // Avoid cases where the extend/round has multiple uses, as duplicating
5827     // those is typically more expensive than removing a fneg.
5828     if (!In2.hasOneUse())
5829       break;
5830     if (In2.getOpcode() != ISD::FP_EXTEND &&
5831         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
5832       break;
5833     In2 = In2.getOperand(0);
5834     if (In2.getOpcode() != ISD::FNEG)
5835       break;
5836     SDLoc DL(N);
5837     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
5838     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
5839                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
5840   }
5841   case ISD::MGATHER:
5842   case ISD::MSCATTER: {
5843     if (!DCI.isBeforeLegalize())
5844       break;
5845     MaskedGatherScatterSDNode *MGSN = cast<MaskedGatherScatterSDNode>(N);
5846     SDValue Index = MGSN->getIndex();
5847     EVT IndexVT = Index.getValueType();
5848     MVT XLenVT = Subtarget.getXLenVT();
5849     // RISCV indexed loads only support the "unsigned unscaled" addressing
5850     // mode, so anything else must be manually legalized.
5851     bool NeedsIdxLegalization = MGSN->isIndexScaled() ||
5852                                 (MGSN->isIndexSigned() &&
5853                                  IndexVT.getVectorElementType().bitsLT(XLenVT));
5854     if (!NeedsIdxLegalization)
5855       break;
5856 
5857     SDLoc DL(N);
5858 
5859     // Any index legalization should first promote to XLenVT, so we don't lose
5860     // bits when scaling. This may create an illegal index type so we let
5861     // LLVM's legalization take care of the splitting.
5862     if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
5863       IndexVT = IndexVT.changeVectorElementType(XLenVT);
5864       Index = DAG.getNode(MGSN->isIndexSigned() ? ISD::SIGN_EXTEND
5865                                                 : ISD::ZERO_EXTEND,
5866                           DL, IndexVT, Index);
5867     }
5868 
5869     unsigned Scale = N->getConstantOperandVal(5);
5870     if (MGSN->isIndexScaled() && Scale != 1) {
5871       // Manually scale the indices by the element size.
5872       // TODO: Sanitize the scale operand here?
5873       assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
5874       SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
5875       Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
5876     }
5877 
5878     ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
5879     if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) {
5880       return DAG.getMaskedGather(
5881           N->getVTList(), MGSN->getMemoryVT(), DL,
5882           {MGSN->getChain(), MGN->getPassThru(), MGSN->getMask(),
5883            MGSN->getBasePtr(), Index, MGN->getScale()},
5884           MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
5885     }
5886     const auto *MSN = cast<MaskedScatterSDNode>(N);
5887     return DAG.getMaskedScatter(
5888         N->getVTList(), MGSN->getMemoryVT(), DL,
5889         {MGSN->getChain(), MSN->getValue(), MGSN->getMask(), MGSN->getBasePtr(),
5890          Index, MGSN->getScale()},
5891         MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
5892   }
5893   case RISCVISD::SRA_VL:
5894   case RISCVISD::SRL_VL:
5895   case RISCVISD::SHL_VL: {
5896     SDValue ShAmt = N->getOperand(1);
5897     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
5898       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
5899       SDLoc DL(N);
5900       SDValue VL = N->getOperand(3);
5901       EVT VT = N->getValueType(0);
5902       ShAmt =
5903           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL);
5904       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
5905                          N->getOperand(2), N->getOperand(3));
5906     }
5907     break;
5908   }
5909   case ISD::SRA:
5910   case ISD::SRL:
5911   case ISD::SHL: {
5912     SDValue ShAmt = N->getOperand(1);
5913     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
5914       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
5915       SDLoc DL(N);
5916       EVT VT = N->getValueType(0);
5917       ShAmt =
5918           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0));
5919       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
5920     }
5921     break;
5922   }
5923   }
5924 
5925   return SDValue();
5926 }
5927 
5928 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
5929     const SDNode *N, CombineLevel Level) const {
5930   // The following folds are only desirable if `(OP _, c1 << c2)` can be
5931   // materialised in fewer instructions than `(OP _, c1)`:
5932   //
5933   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5934   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
5935   SDValue N0 = N->getOperand(0);
5936   EVT Ty = N0.getValueType();
5937   if (Ty.isScalarInteger() &&
5938       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
5939     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
5940     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
5941     if (C1 && C2) {
5942       const APInt &C1Int = C1->getAPIntValue();
5943       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
5944 
5945       // We can materialise `c1 << c2` into an add immediate, so it's "free",
5946       // and the combine should happen, to potentially allow further combines
5947       // later.
5948       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
5949           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
5950         return true;
5951 
5952       // We can materialise `c1` in an add immediate, so it's "free", and the
5953       // combine should be prevented.
5954       if (C1Int.getMinSignedBits() <= 64 &&
5955           isLegalAddImmediate(C1Int.getSExtValue()))
5956         return false;
5957 
5958       // Neither constant will fit into an immediate, so find materialisation
5959       // costs.
5960       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
5961                                               Subtarget.is64Bit());
5962       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
5963           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
5964 
5965       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
5966       // combine should be prevented.
5967       if (C1Cost < ShiftedC1Cost)
5968         return false;
5969     }
5970   }
5971   return true;
5972 }
5973 
5974 bool RISCVTargetLowering::targetShrinkDemandedConstant(
5975     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
5976     TargetLoweringOpt &TLO) const {
5977   // Delay this optimization as late as possible.
5978   if (!TLO.LegalOps)
5979     return false;
5980 
5981   EVT VT = Op.getValueType();
5982   if (VT.isVector())
5983     return false;
5984 
5985   // Only handle AND for now.
5986   if (Op.getOpcode() != ISD::AND)
5987     return false;
5988 
5989   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
5990   if (!C)
5991     return false;
5992 
5993   const APInt &Mask = C->getAPIntValue();
5994 
5995   // Clear all non-demanded bits initially.
5996   APInt ShrunkMask = Mask & DemandedBits;
5997 
5998   // Try to make a smaller immediate by setting undemanded bits.
5999 
6000   APInt ExpandedMask = Mask | ~DemandedBits;
6001 
6002   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
6003     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
6004   };
6005   auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
6006     if (NewMask == Mask)
6007       return true;
6008     SDLoc DL(Op);
6009     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
6010     SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
6011     return TLO.CombineTo(Op, NewOp);
6012   };
6013 
6014   // If the shrunk mask fits in sign extended 12 bits, let the target
6015   // independent code apply it.
6016   if (ShrunkMask.isSignedIntN(12))
6017     return false;
6018 
6019   // Preserve (and X, 0xffff) when zext.h is supported.
6020   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
6021     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
6022     if (IsLegalMask(NewMask))
6023       return UseMask(NewMask);
6024   }
6025 
6026   // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
6027   if (VT == MVT::i64) {
6028     APInt NewMask = APInt(64, 0xffffffff);
6029     if (IsLegalMask(NewMask))
6030       return UseMask(NewMask);
6031   }
6032 
6033   // For the remaining optimizations, we need to be able to make a negative
6034   // number through a combination of mask and undemanded bits.
6035   if (!ExpandedMask.isNegative())
6036     return false;
6037 
6038   // What is the fewest number of bits we need to represent the negative number.
6039   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
6040 
6041   // Try to make a 12 bit negative immediate. If that fails try to make a 32
6042   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
6043   APInt NewMask = ShrunkMask;
6044   if (MinSignedBits <= 12)
6045     NewMask.setBitsFrom(11);
6046   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
6047     NewMask.setBitsFrom(31);
6048   else
6049     return false;
6050 
6051   // Sanity check that our new mask is a subset of the demanded mask.
6052   assert(IsLegalMask(NewMask));
6053   return UseMask(NewMask);
6054 }
6055 
6056 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
6057                                                         KnownBits &Known,
6058                                                         const APInt &DemandedElts,
6059                                                         const SelectionDAG &DAG,
6060                                                         unsigned Depth) const {
6061   unsigned BitWidth = Known.getBitWidth();
6062   unsigned Opc = Op.getOpcode();
6063   assert((Opc >= ISD::BUILTIN_OP_END ||
6064           Opc == ISD::INTRINSIC_WO_CHAIN ||
6065           Opc == ISD::INTRINSIC_W_CHAIN ||
6066           Opc == ISD::INTRINSIC_VOID) &&
6067          "Should use MaskedValueIsZero if you don't know whether Op"
6068          " is a target node!");
6069 
6070   Known.resetAll();
6071   switch (Opc) {
6072   default: break;
6073   case RISCVISD::SELECT_CC: {
6074     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
6075     // If we don't know any bits, early out.
6076     if (Known.isUnknown())
6077       break;
6078     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
6079 
6080     // Only known if known in both the LHS and RHS.
6081     Known = KnownBits::commonBits(Known, Known2);
6082     break;
6083   }
6084   case RISCVISD::REMUW: {
6085     KnownBits Known2;
6086     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
6087     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
6088     // We only care about the lower 32 bits.
6089     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
6090     // Restore the original width by sign extending.
6091     Known = Known.sext(BitWidth);
6092     break;
6093   }
6094   case RISCVISD::DIVUW: {
6095     KnownBits Known2;
6096     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
6097     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
6098     // We only care about the lower 32 bits.
6099     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
6100     // Restore the original width by sign extending.
6101     Known = Known.sext(BitWidth);
6102     break;
6103   }
6104   case RISCVISD::CTZW: {
6105     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
6106     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
6107     unsigned LowBits = Log2_32(PossibleTZ) + 1;
6108     Known.Zero.setBitsFrom(LowBits);
6109     break;
6110   }
6111   case RISCVISD::CLZW: {
6112     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
6113     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
6114     unsigned LowBits = Log2_32(PossibleLZ) + 1;
6115     Known.Zero.setBitsFrom(LowBits);
6116     break;
6117   }
6118   case RISCVISD::READ_VLENB:
6119     // We assume VLENB is at least 16 bytes.
6120     Known.Zero.setLowBits(4);
6121     break;
6122   case ISD::INTRINSIC_W_CHAIN: {
6123     unsigned IntNo = Op.getConstantOperandVal(1);
6124     switch (IntNo) {
6125     default:
6126       // We can't do anything for most intrinsics.
6127       break;
6128     case Intrinsic::riscv_vsetvli:
6129     case Intrinsic::riscv_vsetvlimax:
6130       // Assume that VL output is positive and would fit in an int32_t.
6131       // TODO: VLEN might be capped at 16 bits in a future V spec update.
6132       if (BitWidth >= 32)
6133         Known.Zero.setBitsFrom(31);
6134       break;
6135     }
6136     break;
6137   }
6138   }
6139 }
6140 
6141 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
6142     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
6143     unsigned Depth) const {
6144   switch (Op.getOpcode()) {
6145   default:
6146     break;
6147   case RISCVISD::SLLW:
6148   case RISCVISD::SRAW:
6149   case RISCVISD::SRLW:
6150   case RISCVISD::DIVW:
6151   case RISCVISD::DIVUW:
6152   case RISCVISD::REMUW:
6153   case RISCVISD::ROLW:
6154   case RISCVISD::RORW:
6155   case RISCVISD::GREVW:
6156   case RISCVISD::GORCW:
6157   case RISCVISD::FSLW:
6158   case RISCVISD::FSRW:
6159   case RISCVISD::SHFLW:
6160   case RISCVISD::UNSHFLW:
6161   case RISCVISD::BCOMPRESSW:
6162   case RISCVISD::BDECOMPRESSW:
6163     // TODO: As the result is sign-extended, this is conservatively correct. A
6164     // more precise answer could be calculated for SRAW depending on known
6165     // bits in the shift amount.
6166     return 33;
6167   case RISCVISD::SHFL:
6168   case RISCVISD::UNSHFL: {
6169     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
6170     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
6171     // will stay within the upper 32 bits. If there were more than 32 sign bits
6172     // before there will be at least 33 sign bits after.
6173     if (Op.getValueType() == MVT::i64 &&
6174         isa<ConstantSDNode>(Op.getOperand(1)) &&
6175         (Op.getConstantOperandVal(1) & 0x10) == 0) {
6176       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
6177       if (Tmp > 32)
6178         return 33;
6179     }
6180     break;
6181   }
6182   case RISCVISD::VMV_X_S:
6183     // The number of sign bits of the scalar result is computed by obtaining the
6184     // element type of the input vector operand, subtracting its width from the
6185     // XLEN, and then adding one (sign bit within the element type). If the
6186     // element type is wider than XLen, the least-significant XLEN bits are
6187     // taken.
6188     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
6189       return 1;
6190     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
6191   }
6192 
6193   return 1;
6194 }
6195 
6196 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
6197                                                   MachineBasicBlock *BB) {
6198   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
6199 
6200   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
6201   // Should the count have wrapped while it was being read, we need to try
6202   // again.
6203   // ...
6204   // read:
6205   // rdcycleh x3 # load high word of cycle
6206   // rdcycle  x2 # load low word of cycle
6207   // rdcycleh x4 # load high word of cycle
6208   // bne x3, x4, read # check if high word reads match, otherwise try again
6209   // ...
6210 
6211   MachineFunction &MF = *BB->getParent();
6212   const BasicBlock *LLVM_BB = BB->getBasicBlock();
6213   MachineFunction::iterator It = ++BB->getIterator();
6214 
6215   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
6216   MF.insert(It, LoopMBB);
6217 
6218   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
6219   MF.insert(It, DoneMBB);
6220 
6221   // Transfer the remainder of BB and its successor edges to DoneMBB.
6222   DoneMBB->splice(DoneMBB->begin(), BB,
6223                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
6224   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
6225 
6226   BB->addSuccessor(LoopMBB);
6227 
6228   MachineRegisterInfo &RegInfo = MF.getRegInfo();
6229   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
6230   Register LoReg = MI.getOperand(0).getReg();
6231   Register HiReg = MI.getOperand(1).getReg();
6232   DebugLoc DL = MI.getDebugLoc();
6233 
6234   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
6235   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
6236       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
6237       .addReg(RISCV::X0);
6238   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
6239       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
6240       .addReg(RISCV::X0);
6241   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
6242       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
6243       .addReg(RISCV::X0);
6244 
6245   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
6246       .addReg(HiReg)
6247       .addReg(ReadAgainReg)
6248       .addMBB(LoopMBB);
6249 
6250   LoopMBB->addSuccessor(LoopMBB);
6251   LoopMBB->addSuccessor(DoneMBB);
6252 
6253   MI.eraseFromParent();
6254 
6255   return DoneMBB;
6256 }
6257 
6258 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
6259                                              MachineBasicBlock *BB) {
6260   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
6261 
6262   MachineFunction &MF = *BB->getParent();
6263   DebugLoc DL = MI.getDebugLoc();
6264   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6265   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
6266   Register LoReg = MI.getOperand(0).getReg();
6267   Register HiReg = MI.getOperand(1).getReg();
6268   Register SrcReg = MI.getOperand(2).getReg();
6269   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
6270   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
6271 
6272   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
6273                           RI);
6274   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
6275   MachineMemOperand *MMOLo =
6276       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
6277   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
6278       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
6279   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
6280       .addFrameIndex(FI)
6281       .addImm(0)
6282       .addMemOperand(MMOLo);
6283   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
6284       .addFrameIndex(FI)
6285       .addImm(4)
6286       .addMemOperand(MMOHi);
6287   MI.eraseFromParent(); // The pseudo instruction is gone now.
6288   return BB;
6289 }
6290 
6291 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
6292                                                  MachineBasicBlock *BB) {
6293   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
6294          "Unexpected instruction");
6295 
6296   MachineFunction &MF = *BB->getParent();
6297   DebugLoc DL = MI.getDebugLoc();
6298   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6299   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
6300   Register DstReg = MI.getOperand(0).getReg();
6301   Register LoReg = MI.getOperand(1).getReg();
6302   Register HiReg = MI.getOperand(2).getReg();
6303   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
6304   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
6305 
6306   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
6307   MachineMemOperand *MMOLo =
6308       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
6309   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
6310       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
6311   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
6312       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
6313       .addFrameIndex(FI)
6314       .addImm(0)
6315       .addMemOperand(MMOLo);
6316   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
6317       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
6318       .addFrameIndex(FI)
6319       .addImm(4)
6320       .addMemOperand(MMOHi);
6321   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
6322   MI.eraseFromParent(); // The pseudo instruction is gone now.
6323   return BB;
6324 }
6325 
6326 static bool isSelectPseudo(MachineInstr &MI) {
6327   switch (MI.getOpcode()) {
6328   default:
6329     return false;
6330   case RISCV::Select_GPR_Using_CC_GPR:
6331   case RISCV::Select_FPR16_Using_CC_GPR:
6332   case RISCV::Select_FPR32_Using_CC_GPR:
6333   case RISCV::Select_FPR64_Using_CC_GPR:
6334     return true;
6335   }
6336 }
6337 
6338 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
6339                                            MachineBasicBlock *BB) {
6340   // To "insert" Select_* instructions, we actually have to insert the triangle
6341   // control-flow pattern.  The incoming instructions know the destination vreg
6342   // to set, the condition code register to branch on, the true/false values to
6343   // select between, and the condcode to use to select the appropriate branch.
6344   //
6345   // We produce the following control flow:
6346   //     HeadMBB
6347   //     |  \
6348   //     |  IfFalseMBB
6349   //     | /
6350   //    TailMBB
6351   //
6352   // When we find a sequence of selects we attempt to optimize their emission
6353   // by sharing the control flow. Currently we only handle cases where we have
6354   // multiple selects with the exact same condition (same LHS, RHS and CC).
6355   // The selects may be interleaved with other instructions if the other
6356   // instructions meet some requirements we deem safe:
6357   // - They are debug instructions. Otherwise,
6358   // - They do not have side-effects, do not access memory and their inputs do
6359   //   not depend on the results of the select pseudo-instructions.
6360   // The TrueV/FalseV operands of the selects cannot depend on the result of
6361   // previous selects in the sequence.
6362   // These conditions could be further relaxed. See the X86 target for a
6363   // related approach and more information.
6364   Register LHS = MI.getOperand(1).getReg();
6365   Register RHS = MI.getOperand(2).getReg();
6366   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
6367 
6368   SmallVector<MachineInstr *, 4> SelectDebugValues;
6369   SmallSet<Register, 4> SelectDests;
6370   SelectDests.insert(MI.getOperand(0).getReg());
6371 
6372   MachineInstr *LastSelectPseudo = &MI;
6373 
6374   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6375        SequenceMBBI != E; ++SequenceMBBI) {
6376     if (SequenceMBBI->isDebugInstr())
6377       continue;
6378     else if (isSelectPseudo(*SequenceMBBI)) {
6379       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6380           SequenceMBBI->getOperand(2).getReg() != RHS ||
6381           SequenceMBBI->getOperand(3).getImm() != CC ||
6382           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6383           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6384         break;
6385       LastSelectPseudo = &*SequenceMBBI;
6386       SequenceMBBI->collectDebugValues(SelectDebugValues);
6387       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6388     } else {
6389       if (SequenceMBBI->hasUnmodeledSideEffects() ||
6390           SequenceMBBI->mayLoadOrStore())
6391         break;
6392       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6393             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6394           }))
6395         break;
6396     }
6397   }
6398 
6399   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
6400   const BasicBlock *LLVM_BB = BB->getBasicBlock();
6401   DebugLoc DL = MI.getDebugLoc();
6402   MachineFunction::iterator I = ++BB->getIterator();
6403 
6404   MachineBasicBlock *HeadMBB = BB;
6405   MachineFunction *F = BB->getParent();
6406   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6407   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6408 
6409   F->insert(I, IfFalseMBB);
6410   F->insert(I, TailMBB);
6411 
6412   // Transfer debug instructions associated with the selects to TailMBB.
6413   for (MachineInstr *DebugInstr : SelectDebugValues) {
6414     TailMBB->push_back(DebugInstr->removeFromParent());
6415   }
6416 
6417   // Move all instructions after the sequence to TailMBB.
6418   TailMBB->splice(TailMBB->end(), HeadMBB,
6419                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6420   // Update machine-CFG edges by transferring all successors of the current
6421   // block to the new block which will contain the Phi nodes for the selects.
6422   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6423   // Set the successors for HeadMBB.
6424   HeadMBB->addSuccessor(IfFalseMBB);
6425   HeadMBB->addSuccessor(TailMBB);
6426 
6427   // Insert appropriate branch.
6428   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
6429 
6430   BuildMI(HeadMBB, DL, TII.get(Opcode))
6431     .addReg(LHS)
6432     .addReg(RHS)
6433     .addMBB(TailMBB);
6434 
6435   // IfFalseMBB just falls through to TailMBB.
6436   IfFalseMBB->addSuccessor(TailMBB);
6437 
6438   // Create PHIs for all of the select pseudo-instructions.
6439   auto SelectMBBI = MI.getIterator();
6440   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6441   auto InsertionPoint = TailMBB->begin();
6442   while (SelectMBBI != SelectEnd) {
6443     auto Next = std::next(SelectMBBI);
6444     if (isSelectPseudo(*SelectMBBI)) {
6445       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6446       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6447               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
6448           .addReg(SelectMBBI->getOperand(4).getReg())
6449           .addMBB(HeadMBB)
6450           .addReg(SelectMBBI->getOperand(5).getReg())
6451           .addMBB(IfFalseMBB);
6452       SelectMBBI->eraseFromParent();
6453     }
6454     SelectMBBI = Next;
6455   }
6456 
6457   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
6458   return TailMBB;
6459 }
6460 
6461 MachineBasicBlock *
6462 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
6463                                                  MachineBasicBlock *BB) const {
6464   switch (MI.getOpcode()) {
6465   default:
6466     llvm_unreachable("Unexpected instr type to insert");
6467   case RISCV::ReadCycleWide:
6468     assert(!Subtarget.is64Bit() &&
6469            "ReadCycleWrite is only to be used on riscv32");
6470     return emitReadCycleWidePseudo(MI, BB);
6471   case RISCV::Select_GPR_Using_CC_GPR:
6472   case RISCV::Select_FPR16_Using_CC_GPR:
6473   case RISCV::Select_FPR32_Using_CC_GPR:
6474   case RISCV::Select_FPR64_Using_CC_GPR:
6475     return emitSelectPseudo(MI, BB);
6476   case RISCV::BuildPairF64Pseudo:
6477     return emitBuildPairF64Pseudo(MI, BB);
6478   case RISCV::SplitF64Pseudo:
6479     return emitSplitF64Pseudo(MI, BB);
6480   }
6481 }
6482 
6483 // Calling Convention Implementation.
6484 // The expectations for frontend ABI lowering vary from target to target.
6485 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
6486 // details, but this is a longer term goal. For now, we simply try to keep the
6487 // role of the frontend as simple and well-defined as possible. The rules can
6488 // be summarised as:
6489 // * Never split up large scalar arguments. We handle them here.
6490 // * If a hardfloat calling convention is being used, and the struct may be
6491 // passed in a pair of registers (fp+fp, int+fp), and both registers are
6492 // available, then pass as two separate arguments. If either the GPRs or FPRs
6493 // are exhausted, then pass according to the rule below.
6494 // * If a struct could never be passed in registers or directly in a stack
6495 // slot (as it is larger than 2*XLEN and the floating point rules don't
6496 // apply), then pass it using a pointer with the byval attribute.
6497 // * If a struct is less than 2*XLEN, then coerce to either a two-element
6498 // word-sized array or a 2*XLEN scalar (depending on alignment).
6499 // * The frontend can determine whether a struct is returned by reference or
6500 // not based on its size and fields. If it will be returned by reference, the
6501 // frontend must modify the prototype so a pointer with the sret annotation is
6502 // passed as the first argument. This is not necessary for large scalar
6503 // returns.
6504 // * Struct return values and varargs should be coerced to structs containing
6505 // register-size fields in the same situations they would be for fixed
6506 // arguments.
6507 
6508 static const MCPhysReg ArgGPRs[] = {
6509   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
6510   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
6511 };
6512 static const MCPhysReg ArgFPR16s[] = {
6513   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
6514   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
6515 };
6516 static const MCPhysReg ArgFPR32s[] = {
6517   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
6518   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
6519 };
6520 static const MCPhysReg ArgFPR64s[] = {
6521   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
6522   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
6523 };
6524 // This is an interim calling convention and it may be changed in the future.
6525 static const MCPhysReg ArgVRs[] = {
6526     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
6527     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
6528     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
6529 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
6530                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
6531                                      RISCV::V20M2, RISCV::V22M2};
6532 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
6533                                      RISCV::V20M4};
6534 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
6535 
6536 // Pass a 2*XLEN argument that has been split into two XLEN values through
6537 // registers or the stack as necessary.
6538 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
6539                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
6540                                 MVT ValVT2, MVT LocVT2,
6541                                 ISD::ArgFlagsTy ArgFlags2) {
6542   unsigned XLenInBytes = XLen / 8;
6543   if (Register Reg = State.AllocateReg(ArgGPRs)) {
6544     // At least one half can be passed via register.
6545     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
6546                                      VA1.getLocVT(), CCValAssign::Full));
6547   } else {
6548     // Both halves must be passed on the stack, with proper alignment.
6549     Align StackAlign =
6550         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
6551     State.addLoc(
6552         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
6553                             State.AllocateStack(XLenInBytes, StackAlign),
6554                             VA1.getLocVT(), CCValAssign::Full));
6555     State.addLoc(CCValAssign::getMem(
6556         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
6557         LocVT2, CCValAssign::Full));
6558     return false;
6559   }
6560 
6561   if (Register Reg = State.AllocateReg(ArgGPRs)) {
6562     // The second half can also be passed via register.
6563     State.addLoc(
6564         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
6565   } else {
6566     // The second half is passed via the stack, without additional alignment.
6567     State.addLoc(CCValAssign::getMem(
6568         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
6569         LocVT2, CCValAssign::Full));
6570   }
6571 
6572   return false;
6573 }
6574 
6575 // Implements the RISC-V calling convention. Returns true upon failure.
6576 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
6577                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
6578                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
6579                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
6580                      Optional<unsigned> FirstMaskArgument) {
6581   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
6582   assert(XLen == 32 || XLen == 64);
6583   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
6584 
6585   // Any return value split in to more than two values can't be returned
6586   // directly. Vectors are returned via the available vector registers.
6587   if (!LocVT.isVector() && IsRet && ValNo > 1)
6588     return true;
6589 
6590   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
6591   // variadic argument, or if no F16/F32 argument registers are available.
6592   bool UseGPRForF16_F32 = true;
6593   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
6594   // variadic argument, or if no F64 argument registers are available.
6595   bool UseGPRForF64 = true;
6596 
6597   switch (ABI) {
6598   default:
6599     llvm_unreachable("Unexpected ABI");
6600   case RISCVABI::ABI_ILP32:
6601   case RISCVABI::ABI_LP64:
6602     break;
6603   case RISCVABI::ABI_ILP32F:
6604   case RISCVABI::ABI_LP64F:
6605     UseGPRForF16_F32 = !IsFixed;
6606     break;
6607   case RISCVABI::ABI_ILP32D:
6608   case RISCVABI::ABI_LP64D:
6609     UseGPRForF16_F32 = !IsFixed;
6610     UseGPRForF64 = !IsFixed;
6611     break;
6612   }
6613 
6614   // FPR16, FPR32, and FPR64 alias each other.
6615   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
6616     UseGPRForF16_F32 = true;
6617     UseGPRForF64 = true;
6618   }
6619 
6620   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
6621   // similar local variables rather than directly checking against the target
6622   // ABI.
6623 
6624   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
6625     LocVT = XLenVT;
6626     LocInfo = CCValAssign::BCvt;
6627   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
6628     LocVT = MVT::i64;
6629     LocInfo = CCValAssign::BCvt;
6630   }
6631 
6632   // If this is a variadic argument, the RISC-V calling convention requires
6633   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
6634   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
6635   // be used regardless of whether the original argument was split during
6636   // legalisation or not. The argument will not be passed by registers if the
6637   // original type is larger than 2*XLEN, so the register alignment rule does
6638   // not apply.
6639   unsigned TwoXLenInBytes = (2 * XLen) / 8;
6640   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
6641       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
6642     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
6643     // Skip 'odd' register if necessary.
6644     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
6645       State.AllocateReg(ArgGPRs);
6646   }
6647 
6648   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
6649   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
6650       State.getPendingArgFlags();
6651 
6652   assert(PendingLocs.size() == PendingArgFlags.size() &&
6653          "PendingLocs and PendingArgFlags out of sync");
6654 
6655   // Handle passing f64 on RV32D with a soft float ABI or when floating point
6656   // registers are exhausted.
6657   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
6658     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
6659            "Can't lower f64 if it is split");
6660     // Depending on available argument GPRS, f64 may be passed in a pair of
6661     // GPRs, split between a GPR and the stack, or passed completely on the
6662     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
6663     // cases.
6664     Register Reg = State.AllocateReg(ArgGPRs);
6665     LocVT = MVT::i32;
6666     if (!Reg) {
6667       unsigned StackOffset = State.AllocateStack(8, Align(8));
6668       State.addLoc(
6669           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6670       return false;
6671     }
6672     if (!State.AllocateReg(ArgGPRs))
6673       State.AllocateStack(4, Align(4));
6674     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6675     return false;
6676   }
6677 
6678   // Fixed-length vectors are located in the corresponding scalable-vector
6679   // container types.
6680   if (ValVT.isFixedLengthVector())
6681     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
6682 
6683   // Split arguments might be passed indirectly, so keep track of the pending
6684   // values. Split vectors are passed via a mix of registers and indirectly, so
6685   // treat them as we would any other argument.
6686   if (!LocVT.isVector() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
6687     LocVT = XLenVT;
6688     LocInfo = CCValAssign::Indirect;
6689     PendingLocs.push_back(
6690         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
6691     PendingArgFlags.push_back(ArgFlags);
6692     if (!ArgFlags.isSplitEnd()) {
6693       return false;
6694     }
6695   }
6696 
6697   // If the split argument only had two elements, it should be passed directly
6698   // in registers or on the stack.
6699   if (!LocVT.isVector() && ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
6700     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
6701     // Apply the normal calling convention rules to the first half of the
6702     // split argument.
6703     CCValAssign VA = PendingLocs[0];
6704     ISD::ArgFlagsTy AF = PendingArgFlags[0];
6705     PendingLocs.clear();
6706     PendingArgFlags.clear();
6707     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
6708                                ArgFlags);
6709   }
6710 
6711   // Allocate to a register if possible, or else a stack slot.
6712   Register Reg;
6713   unsigned StoreSizeBytes = XLen / 8;
6714   Align StackAlign = Align(XLen / 8);
6715 
6716   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
6717     Reg = State.AllocateReg(ArgFPR16s);
6718   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
6719     Reg = State.AllocateReg(ArgFPR32s);
6720   else if (ValVT == MVT::f64 && !UseGPRForF64)
6721     Reg = State.AllocateReg(ArgFPR64s);
6722   else if (ValVT.isVector()) {
6723     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
6724     if (RC == &RISCV::VRRegClass) {
6725       // Assign the first mask argument to V0.
6726       // This is an interim calling convention and it may be changed in the
6727       // future.
6728       if (FirstMaskArgument.hasValue() &&
6729           ValNo == FirstMaskArgument.getValue()) {
6730         Reg = State.AllocateReg(RISCV::V0);
6731       } else {
6732         Reg = State.AllocateReg(ArgVRs);
6733       }
6734     } else if (RC == &RISCV::VRM2RegClass) {
6735       Reg = State.AllocateReg(ArgVRM2s);
6736     } else if (RC == &RISCV::VRM4RegClass) {
6737       Reg = State.AllocateReg(ArgVRM4s);
6738     } else if (RC == &RISCV::VRM8RegClass) {
6739       Reg = State.AllocateReg(ArgVRM8s);
6740     } else {
6741       llvm_unreachable("Unhandled class register for ValueType");
6742     }
6743     if (!Reg) {
6744       // For return values, the vector must be passed fully via registers or
6745       // via the stack.
6746       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
6747       // but we're using all of them.
6748       if (IsRet)
6749         return true;
6750       // Try using a GPR to pass the address
6751       if ((Reg = State.AllocateReg(ArgGPRs))) {
6752         LocVT = XLenVT;
6753         LocInfo = CCValAssign::Indirect;
6754       } else if (ValVT.isScalableVector()) {
6755         report_fatal_error("Unable to pass scalable vector types on the stack");
6756       } else {
6757         // Pass fixed-length vectors on the stack.
6758         LocVT = ValVT;
6759         StoreSizeBytes = ValVT.getStoreSize();
6760         // Align vectors to their element sizes, being careful for vXi1
6761         // vectors.
6762         StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
6763       }
6764     }
6765   } else {
6766     Reg = State.AllocateReg(ArgGPRs);
6767   }
6768 
6769   unsigned StackOffset =
6770       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
6771 
6772   // If we reach this point and PendingLocs is non-empty, we must be at the
6773   // end of a split argument that must be passed indirectly.
6774   if (!PendingLocs.empty()) {
6775     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
6776     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
6777 
6778     for (auto &It : PendingLocs) {
6779       if (Reg)
6780         It.convertToReg(Reg);
6781       else
6782         It.convertToMem(StackOffset);
6783       State.addLoc(It);
6784     }
6785     PendingLocs.clear();
6786     PendingArgFlags.clear();
6787     return false;
6788   }
6789 
6790   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
6791           (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) &&
6792          "Expected an XLenVT or vector types at this stage");
6793 
6794   if (Reg) {
6795     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6796     return false;
6797   }
6798 
6799   // When a floating-point value is passed on the stack, no bit-conversion is
6800   // needed.
6801   if (ValVT.isFloatingPoint()) {
6802     LocVT = ValVT;
6803     LocInfo = CCValAssign::Full;
6804   }
6805   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6806   return false;
6807 }
6808 
6809 template <typename ArgTy>
6810 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
6811   for (const auto &ArgIdx : enumerate(Args)) {
6812     MVT ArgVT = ArgIdx.value().VT;
6813     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
6814       return ArgIdx.index();
6815   }
6816   return None;
6817 }
6818 
6819 void RISCVTargetLowering::analyzeInputArgs(
6820     MachineFunction &MF, CCState &CCInfo,
6821     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
6822   unsigned NumArgs = Ins.size();
6823   FunctionType *FType = MF.getFunction().getFunctionType();
6824 
6825   Optional<unsigned> FirstMaskArgument;
6826   if (Subtarget.hasStdExtV())
6827     FirstMaskArgument = preAssignMask(Ins);
6828 
6829   for (unsigned i = 0; i != NumArgs; ++i) {
6830     MVT ArgVT = Ins[i].VT;
6831     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
6832 
6833     Type *ArgTy = nullptr;
6834     if (IsRet)
6835       ArgTy = FType->getReturnType();
6836     else if (Ins[i].isOrigArg())
6837       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
6838 
6839     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
6840     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
6841                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
6842                  FirstMaskArgument)) {
6843       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
6844                         << EVT(ArgVT).getEVTString() << '\n');
6845       llvm_unreachable(nullptr);
6846     }
6847   }
6848 }
6849 
6850 void RISCVTargetLowering::analyzeOutputArgs(
6851     MachineFunction &MF, CCState &CCInfo,
6852     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
6853     CallLoweringInfo *CLI) const {
6854   unsigned NumArgs = Outs.size();
6855 
6856   Optional<unsigned> FirstMaskArgument;
6857   if (Subtarget.hasStdExtV())
6858     FirstMaskArgument = preAssignMask(Outs);
6859 
6860   for (unsigned i = 0; i != NumArgs; i++) {
6861     MVT ArgVT = Outs[i].VT;
6862     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6863     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
6864 
6865     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
6866     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
6867                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
6868                  FirstMaskArgument)) {
6869       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
6870                         << EVT(ArgVT).getEVTString() << "\n");
6871       llvm_unreachable(nullptr);
6872     }
6873   }
6874 }
6875 
6876 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
6877 // values.
6878 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
6879                                    const CCValAssign &VA, const SDLoc &DL,
6880                                    const RISCVSubtarget &Subtarget) {
6881   switch (VA.getLocInfo()) {
6882   default:
6883     llvm_unreachable("Unexpected CCValAssign::LocInfo");
6884   case CCValAssign::Full:
6885     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
6886       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
6887     break;
6888   case CCValAssign::BCvt:
6889     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
6890       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
6891     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
6892       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
6893     else
6894       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
6895     break;
6896   }
6897   return Val;
6898 }
6899 
6900 // The caller is responsible for loading the full value if the argument is
6901 // passed with CCValAssign::Indirect.
6902 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
6903                                 const CCValAssign &VA, const SDLoc &DL,
6904                                 const RISCVTargetLowering &TLI) {
6905   MachineFunction &MF = DAG.getMachineFunction();
6906   MachineRegisterInfo &RegInfo = MF.getRegInfo();
6907   EVT LocVT = VA.getLocVT();
6908   SDValue Val;
6909   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
6910   Register VReg = RegInfo.createVirtualRegister(RC);
6911   RegInfo.addLiveIn(VA.getLocReg(), VReg);
6912   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
6913 
6914   if (VA.getLocInfo() == CCValAssign::Indirect)
6915     return Val;
6916 
6917   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
6918 }
6919 
6920 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
6921                                    const CCValAssign &VA, const SDLoc &DL,
6922                                    const RISCVSubtarget &Subtarget) {
6923   EVT LocVT = VA.getLocVT();
6924 
6925   switch (VA.getLocInfo()) {
6926   default:
6927     llvm_unreachable("Unexpected CCValAssign::LocInfo");
6928   case CCValAssign::Full:
6929     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
6930       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
6931     break;
6932   case CCValAssign::BCvt:
6933     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
6934       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
6935     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
6936       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
6937     else
6938       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
6939     break;
6940   }
6941   return Val;
6942 }
6943 
6944 // The caller is responsible for loading the full value if the argument is
6945 // passed with CCValAssign::Indirect.
6946 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
6947                                 const CCValAssign &VA, const SDLoc &DL) {
6948   MachineFunction &MF = DAG.getMachineFunction();
6949   MachineFrameInfo &MFI = MF.getFrameInfo();
6950   EVT LocVT = VA.getLocVT();
6951   EVT ValVT = VA.getValVT();
6952   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
6953   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
6954                                  /*Immutable=*/true);
6955   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6956   SDValue Val;
6957 
6958   ISD::LoadExtType ExtType;
6959   switch (VA.getLocInfo()) {
6960   default:
6961     llvm_unreachable("Unexpected CCValAssign::LocInfo");
6962   case CCValAssign::Full:
6963   case CCValAssign::Indirect:
6964   case CCValAssign::BCvt:
6965     ExtType = ISD::NON_EXTLOAD;
6966     break;
6967   }
6968   Val = DAG.getExtLoad(
6969       ExtType, DL, LocVT, Chain, FIN,
6970       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
6971   return Val;
6972 }
6973 
6974 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
6975                                        const CCValAssign &VA, const SDLoc &DL) {
6976   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
6977          "Unexpected VA");
6978   MachineFunction &MF = DAG.getMachineFunction();
6979   MachineFrameInfo &MFI = MF.getFrameInfo();
6980   MachineRegisterInfo &RegInfo = MF.getRegInfo();
6981 
6982   if (VA.isMemLoc()) {
6983     // f64 is passed on the stack.
6984     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
6985     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
6986     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
6987                        MachinePointerInfo::getFixedStack(MF, FI));
6988   }
6989 
6990   assert(VA.isRegLoc() && "Expected register VA assignment");
6991 
6992   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
6993   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
6994   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
6995   SDValue Hi;
6996   if (VA.getLocReg() == RISCV::X17) {
6997     // Second half of f64 is passed on the stack.
6998     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
6999     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7000     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7001                      MachinePointerInfo::getFixedStack(MF, FI));
7002   } else {
7003     // Second half of f64 is passed in another GPR.
7004     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
7005     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
7006     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7007   }
7008   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7009 }
7010 
7011 // FastCC has less than 1% performance improvement for some particular
7012 // benchmark. But theoretically, it may has benenfit for some cases.
7013 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
7014                             CCValAssign::LocInfo LocInfo,
7015                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
7016 
7017   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7018     // X5 and X6 might be used for save-restore libcall.
7019     static const MCPhysReg GPRList[] = {
7020         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
7021         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
7022         RISCV::X29, RISCV::X30, RISCV::X31};
7023     if (unsigned Reg = State.AllocateReg(GPRList)) {
7024       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7025       return false;
7026     }
7027   }
7028 
7029   if (LocVT == MVT::f16) {
7030     static const MCPhysReg FPR16List[] = {
7031         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
7032         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
7033         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
7034         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
7035     if (unsigned Reg = State.AllocateReg(FPR16List)) {
7036       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7037       return false;
7038     }
7039   }
7040 
7041   if (LocVT == MVT::f32) {
7042     static const MCPhysReg FPR32List[] = {
7043         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
7044         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
7045         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
7046         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
7047     if (unsigned Reg = State.AllocateReg(FPR32List)) {
7048       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7049       return false;
7050     }
7051   }
7052 
7053   if (LocVT == MVT::f64) {
7054     static const MCPhysReg FPR64List[] = {
7055         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
7056         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
7057         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
7058         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
7059     if (unsigned Reg = State.AllocateReg(FPR64List)) {
7060       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7061       return false;
7062     }
7063   }
7064 
7065   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
7066     unsigned Offset4 = State.AllocateStack(4, Align(4));
7067     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
7068     return false;
7069   }
7070 
7071   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
7072     unsigned Offset5 = State.AllocateStack(8, Align(8));
7073     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
7074     return false;
7075   }
7076 
7077   return true; // CC didn't match.
7078 }
7079 
7080 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7081                          CCValAssign::LocInfo LocInfo,
7082                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
7083 
7084   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7085     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
7086     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
7087     static const MCPhysReg GPRList[] = {
7088         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
7089         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
7090     if (unsigned Reg = State.AllocateReg(GPRList)) {
7091       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7092       return false;
7093     }
7094   }
7095 
7096   if (LocVT == MVT::f32) {
7097     // Pass in STG registers: F1, ..., F6
7098     //                        fs0 ... fs5
7099     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
7100                                           RISCV::F18_F, RISCV::F19_F,
7101                                           RISCV::F20_F, RISCV::F21_F};
7102     if (unsigned Reg = State.AllocateReg(FPR32List)) {
7103       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7104       return false;
7105     }
7106   }
7107 
7108   if (LocVT == MVT::f64) {
7109     // Pass in STG registers: D1, ..., D6
7110     //                        fs6 ... fs11
7111     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
7112                                           RISCV::F24_D, RISCV::F25_D,
7113                                           RISCV::F26_D, RISCV::F27_D};
7114     if (unsigned Reg = State.AllocateReg(FPR64List)) {
7115       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7116       return false;
7117     }
7118   }
7119 
7120   report_fatal_error("No registers left in GHC calling convention");
7121   return true;
7122 }
7123 
7124 // Transform physical registers into virtual registers.
7125 SDValue RISCVTargetLowering::LowerFormalArguments(
7126     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7127     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7128     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7129 
7130   MachineFunction &MF = DAG.getMachineFunction();
7131 
7132   switch (CallConv) {
7133   default:
7134     report_fatal_error("Unsupported calling convention");
7135   case CallingConv::C:
7136   case CallingConv::Fast:
7137     break;
7138   case CallingConv::GHC:
7139     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
7140         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
7141       report_fatal_error(
7142         "GHC calling convention requires the F and D instruction set extensions");
7143   }
7144 
7145   const Function &Func = MF.getFunction();
7146   if (Func.hasFnAttribute("interrupt")) {
7147     if (!Func.arg_empty())
7148       report_fatal_error(
7149         "Functions with the interrupt attribute cannot have arguments!");
7150 
7151     StringRef Kind =
7152       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
7153 
7154     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
7155       report_fatal_error(
7156         "Function interrupt attribute argument not supported!");
7157   }
7158 
7159   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7160   MVT XLenVT = Subtarget.getXLenVT();
7161   unsigned XLenInBytes = Subtarget.getXLen() / 8;
7162   // Used with vargs to acumulate store chains.
7163   std::vector<SDValue> OutChains;
7164 
7165   // Assign locations to all of the incoming arguments.
7166   SmallVector<CCValAssign, 16> ArgLocs;
7167   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7168 
7169   if (CallConv == CallingConv::Fast)
7170     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
7171   else if (CallConv == CallingConv::GHC)
7172     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
7173   else
7174     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
7175 
7176   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
7177     CCValAssign &VA = ArgLocs[i];
7178     SDValue ArgValue;
7179     // Passing f64 on RV32D with a soft float ABI must be handled as a special
7180     // case.
7181     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
7182       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
7183     else if (VA.isRegLoc())
7184       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
7185     else
7186       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7187 
7188     if (VA.getLocInfo() == CCValAssign::Indirect) {
7189       // If the original argument was split and passed by reference (e.g. i128
7190       // on RV32), we need to load all parts of it here (using the same
7191       // address). Vectors may be partly split to registers and partly to the
7192       // stack, in which case the base address is partly offset and subsequent
7193       // stores are relative to that.
7194       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7195                                    MachinePointerInfo()));
7196       unsigned ArgIndex = Ins[i].OrigArgIndex;
7197       unsigned ArgPartOffset = Ins[i].PartOffset;
7198       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
7199       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
7200         CCValAssign &PartVA = ArgLocs[i + 1];
7201         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
7202         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7203         if (PartVA.getValVT().isScalableVector())
7204           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
7205         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7206         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7207                                      MachinePointerInfo()));
7208         ++i;
7209       }
7210       continue;
7211     }
7212     InVals.push_back(ArgValue);
7213   }
7214 
7215   if (IsVarArg) {
7216     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
7217     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7218     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
7219     MachineFrameInfo &MFI = MF.getFrameInfo();
7220     MachineRegisterInfo &RegInfo = MF.getRegInfo();
7221     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
7222 
7223     // Offset of the first variable argument from stack pointer, and size of
7224     // the vararg save area. For now, the varargs save area is either zero or
7225     // large enough to hold a0-a7.
7226     int VaArgOffset, VarArgsSaveSize;
7227 
7228     // If all registers are allocated, then all varargs must be passed on the
7229     // stack and we don't need to save any argregs.
7230     if (ArgRegs.size() == Idx) {
7231       VaArgOffset = CCInfo.getNextStackOffset();
7232       VarArgsSaveSize = 0;
7233     } else {
7234       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
7235       VaArgOffset = -VarArgsSaveSize;
7236     }
7237 
7238     // Record the frame index of the first variable argument
7239     // which is a value necessary to VASTART.
7240     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
7241     RVFI->setVarArgsFrameIndex(FI);
7242 
7243     // If saving an odd number of registers then create an extra stack slot to
7244     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
7245     // offsets to even-numbered registered remain 2*XLEN-aligned.
7246     if (Idx % 2) {
7247       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
7248       VarArgsSaveSize += XLenInBytes;
7249     }
7250 
7251     // Copy the integer registers that may have been used for passing varargs
7252     // to the vararg save area.
7253     for (unsigned I = Idx; I < ArgRegs.size();
7254          ++I, VaArgOffset += XLenInBytes) {
7255       const Register Reg = RegInfo.createVirtualRegister(RC);
7256       RegInfo.addLiveIn(ArgRegs[I], Reg);
7257       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
7258       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
7259       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7260       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7261                                    MachinePointerInfo::getFixedStack(MF, FI));
7262       cast<StoreSDNode>(Store.getNode())
7263           ->getMemOperand()
7264           ->setValue((Value *)nullptr);
7265       OutChains.push_back(Store);
7266     }
7267     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
7268   }
7269 
7270   // All stores are grouped in one node to allow the matching between
7271   // the size of Ins and InVals. This only happens for vararg functions.
7272   if (!OutChains.empty()) {
7273     OutChains.push_back(Chain);
7274     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7275   }
7276 
7277   return Chain;
7278 }
7279 
7280 /// isEligibleForTailCallOptimization - Check whether the call is eligible
7281 /// for tail call optimization.
7282 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
7283 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
7284     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7285     const SmallVector<CCValAssign, 16> &ArgLocs) const {
7286 
7287   auto &Callee = CLI.Callee;
7288   auto CalleeCC = CLI.CallConv;
7289   auto &Outs = CLI.Outs;
7290   auto &Caller = MF.getFunction();
7291   auto CallerCC = Caller.getCallingConv();
7292 
7293   // Exception-handling functions need a special set of instructions to
7294   // indicate a return to the hardware. Tail-calling another function would
7295   // probably break this.
7296   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
7297   // should be expanded as new function attributes are introduced.
7298   if (Caller.hasFnAttribute("interrupt"))
7299     return false;
7300 
7301   // Do not tail call opt if the stack is used to pass parameters.
7302   if (CCInfo.getNextStackOffset() != 0)
7303     return false;
7304 
7305   // Do not tail call opt if any parameters need to be passed indirectly.
7306   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
7307   // passed indirectly. So the address of the value will be passed in a
7308   // register, or if not available, then the address is put on the stack. In
7309   // order to pass indirectly, space on the stack often needs to be allocated
7310   // in order to store the value. In this case the CCInfo.getNextStackOffset()
7311   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
7312   // are passed CCValAssign::Indirect.
7313   for (auto &VA : ArgLocs)
7314     if (VA.getLocInfo() == CCValAssign::Indirect)
7315       return false;
7316 
7317   // Do not tail call opt if either caller or callee uses struct return
7318   // semantics.
7319   auto IsCallerStructRet = Caller.hasStructRetAttr();
7320   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7321   if (IsCallerStructRet || IsCalleeStructRet)
7322     return false;
7323 
7324   // Externally-defined functions with weak linkage should not be
7325   // tail-called. The behaviour of branch instructions in this situation (as
7326   // used for tail calls) is implementation-defined, so we cannot rely on the
7327   // linker replacing the tail call with a return.
7328   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
7329     const GlobalValue *GV = G->getGlobal();
7330     if (GV->hasExternalWeakLinkage())
7331       return false;
7332   }
7333 
7334   // The callee has to preserve all registers the caller needs to preserve.
7335   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
7336   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7337   if (CalleeCC != CallerCC) {
7338     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7339     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7340       return false;
7341   }
7342 
7343   // Byval parameters hand the function a pointer directly into the stack area
7344   // we want to reuse during a tail call. Working around this *is* possible
7345   // but less efficient and uglier in LowerCall.
7346   for (auto &Arg : Outs)
7347     if (Arg.Flags.isByVal())
7348       return false;
7349 
7350   return true;
7351 }
7352 
7353 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
7354   return DAG.getDataLayout().getPrefTypeAlign(
7355       VT.getTypeForEVT(*DAG.getContext()));
7356 }
7357 
7358 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7359 // and output parameter nodes.
7360 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
7361                                        SmallVectorImpl<SDValue> &InVals) const {
7362   SelectionDAG &DAG = CLI.DAG;
7363   SDLoc &DL = CLI.DL;
7364   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
7365   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7366   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
7367   SDValue Chain = CLI.Chain;
7368   SDValue Callee = CLI.Callee;
7369   bool &IsTailCall = CLI.IsTailCall;
7370   CallingConv::ID CallConv = CLI.CallConv;
7371   bool IsVarArg = CLI.IsVarArg;
7372   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7373   MVT XLenVT = Subtarget.getXLenVT();
7374 
7375   MachineFunction &MF = DAG.getMachineFunction();
7376 
7377   // Analyze the operands of the call, assigning locations to each operand.
7378   SmallVector<CCValAssign, 16> ArgLocs;
7379   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7380 
7381   if (CallConv == CallingConv::Fast)
7382     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
7383   else if (CallConv == CallingConv::GHC)
7384     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
7385   else
7386     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
7387 
7388   // Check if it's really possible to do a tail call.
7389   if (IsTailCall)
7390     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7391 
7392   if (IsTailCall)
7393     ++NumTailCalls;
7394   else if (CLI.CB && CLI.CB->isMustTailCall())
7395     report_fatal_error("failed to perform tail call elimination on a call "
7396                        "site marked musttail");
7397 
7398   // Get a count of how many bytes are to be pushed on the stack.
7399   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
7400 
7401   // Create local copies for byval args
7402   SmallVector<SDValue, 8> ByValArgs;
7403   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7404     ISD::ArgFlagsTy Flags = Outs[i].Flags;
7405     if (!Flags.isByVal())
7406       continue;
7407 
7408     SDValue Arg = OutVals[i];
7409     unsigned Size = Flags.getByValSize();
7410     Align Alignment = Flags.getNonZeroByValAlign();
7411 
7412     int FI =
7413         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7414     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7415     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
7416 
7417     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7418                           /*IsVolatile=*/false,
7419                           /*AlwaysInline=*/false, IsTailCall,
7420                           MachinePointerInfo(), MachinePointerInfo());
7421     ByValArgs.push_back(FIPtr);
7422   }
7423 
7424   if (!IsTailCall)
7425     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7426 
7427   // Copy argument values to their designated locations.
7428   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
7429   SmallVector<SDValue, 8> MemOpChains;
7430   SDValue StackPtr;
7431   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
7432     CCValAssign &VA = ArgLocs[i];
7433     SDValue ArgValue = OutVals[i];
7434     ISD::ArgFlagsTy Flags = Outs[i].Flags;
7435 
7436     // Handle passing f64 on RV32D with a soft float ABI as a special case.
7437     bool IsF64OnRV32DSoftABI =
7438         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
7439     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
7440       SDValue SplitF64 = DAG.getNode(
7441           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7442       SDValue Lo = SplitF64.getValue(0);
7443       SDValue Hi = SplitF64.getValue(1);
7444 
7445       Register RegLo = VA.getLocReg();
7446       RegsToPass.push_back(std::make_pair(RegLo, Lo));
7447 
7448       if (RegLo == RISCV::X17) {
7449         // Second half of f64 is passed on the stack.
7450         // Work out the address of the stack slot.
7451         if (!StackPtr.getNode())
7452           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
7453         // Emit the store.
7454         MemOpChains.push_back(
7455             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
7456       } else {
7457         // Second half of f64 is passed in another GPR.
7458         assert(RegLo < RISCV::X31 && "Invalid register pair");
7459         Register RegHigh = RegLo + 1;
7460         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7461       }
7462       continue;
7463     }
7464 
7465     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
7466     // as any other MemLoc.
7467 
7468     // Promote the value if needed.
7469     // For now, only handle fully promoted and indirect arguments.
7470     if (VA.getLocInfo() == CCValAssign::Indirect) {
7471       // Store the argument in a stack slot and pass its address.
7472       Align StackAlign =
7473           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
7474                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
7475       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7476       // If the original argument was split (e.g. i128), we need
7477       // to store the required parts of it here (and pass just one address).
7478       // Vectors may be partly split to registers and partly to the stack, in
7479       // which case the base address is partly offset and subsequent stores are
7480       // relative to that.
7481       unsigned ArgIndex = Outs[i].OrigArgIndex;
7482       unsigned ArgPartOffset = Outs[i].PartOffset;
7483       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
7484       // Calculate the total size to store. We don't have access to what we're
7485       // actually storing other than performing the loop and collecting the
7486       // info.
7487       SmallVector<std::pair<SDValue, SDValue>> Parts;
7488       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
7489         SDValue PartValue = OutVals[i + 1];
7490         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
7491         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7492         EVT PartVT = PartValue.getValueType();
7493         if (PartVT.isScalableVector())
7494           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
7495         StoredSize += PartVT.getStoreSize();
7496         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
7497         Parts.push_back(std::make_pair(PartValue, Offset));
7498         ++i;
7499       }
7500       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
7501       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
7502       MemOpChains.push_back(
7503           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
7504                        MachinePointerInfo::getFixedStack(MF, FI)));
7505       for (const auto &Part : Parts) {
7506         SDValue PartValue = Part.first;
7507         SDValue PartOffset = Part.second;
7508         SDValue Address =
7509             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
7510         MemOpChains.push_back(
7511             DAG.getStore(Chain, DL, PartValue, Address,
7512                          MachinePointerInfo::getFixedStack(MF, FI)));
7513       }
7514       ArgValue = SpillSlot;
7515     } else {
7516       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
7517     }
7518 
7519     // Use local copy if it is a byval arg.
7520     if (Flags.isByVal())
7521       ArgValue = ByValArgs[j++];
7522 
7523     if (VA.isRegLoc()) {
7524       // Queue up the argument copies and emit them at the end.
7525       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
7526     } else {
7527       assert(VA.isMemLoc() && "Argument not register or memory");
7528       assert(!IsTailCall && "Tail call not allowed if stack is used "
7529                             "for passing parameters");
7530 
7531       // Work out the address of the stack slot.
7532       if (!StackPtr.getNode())
7533         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
7534       SDValue Address =
7535           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7536                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
7537 
7538       // Emit the store.
7539       MemOpChains.push_back(
7540           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
7541     }
7542   }
7543 
7544   // Join the stores, which are independent of one another.
7545   if (!MemOpChains.empty())
7546     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
7547 
7548   SDValue Glue;
7549 
7550   // Build a sequence of copy-to-reg nodes, chained and glued together.
7551   for (auto &Reg : RegsToPass) {
7552     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
7553     Glue = Chain.getValue(1);
7554   }
7555 
7556   // Validate that none of the argument registers have been marked as
7557   // reserved, if so report an error. Do the same for the return address if this
7558   // is not a tailcall.
7559   validateCCReservedRegs(RegsToPass, MF);
7560   if (!IsTailCall &&
7561       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
7562     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
7563         MF.getFunction(),
7564         "Return address register required, but has been reserved."});
7565 
7566   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
7567   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
7568   // split it and then direct call can be matched by PseudoCALL.
7569   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
7570     const GlobalValue *GV = S->getGlobal();
7571 
7572     unsigned OpFlags = RISCVII::MO_CALL;
7573     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
7574       OpFlags = RISCVII::MO_PLT;
7575 
7576     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
7577   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
7578     unsigned OpFlags = RISCVII::MO_CALL;
7579 
7580     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
7581                                                  nullptr))
7582       OpFlags = RISCVII::MO_PLT;
7583 
7584     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
7585   }
7586 
7587   // The first call operand is the chain and the second is the target address.
7588   SmallVector<SDValue, 8> Ops;
7589   Ops.push_back(Chain);
7590   Ops.push_back(Callee);
7591 
7592   // Add argument registers to the end of the list so that they are
7593   // known live into the call.
7594   for (auto &Reg : RegsToPass)
7595     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
7596 
7597   if (!IsTailCall) {
7598     // Add a register mask operand representing the call-preserved registers.
7599     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
7600     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
7601     assert(Mask && "Missing call preserved mask for calling convention");
7602     Ops.push_back(DAG.getRegisterMask(Mask));
7603   }
7604 
7605   // Glue the call to the argument copies, if any.
7606   if (Glue.getNode())
7607     Ops.push_back(Glue);
7608 
7609   // Emit the call.
7610   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7611 
7612   if (IsTailCall) {
7613     MF.getFrameInfo().setHasTailCall();
7614     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
7615   }
7616 
7617   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
7618   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
7619   Glue = Chain.getValue(1);
7620 
7621   // Mark the end of the call, which is glued to the call itself.
7622   Chain = DAG.getCALLSEQ_END(Chain,
7623                              DAG.getConstant(NumBytes, DL, PtrVT, true),
7624                              DAG.getConstant(0, DL, PtrVT, true),
7625                              Glue, DL);
7626   Glue = Chain.getValue(1);
7627 
7628   // Assign locations to each value returned by this call.
7629   SmallVector<CCValAssign, 16> RVLocs;
7630   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
7631   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
7632 
7633   // Copy all of the result registers out of their specified physreg.
7634   for (auto &VA : RVLocs) {
7635     // Copy the value out
7636     SDValue RetValue =
7637         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
7638     // Glue the RetValue to the end of the call sequence
7639     Chain = RetValue.getValue(1);
7640     Glue = RetValue.getValue(2);
7641 
7642     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7643       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
7644       SDValue RetValue2 =
7645           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
7646       Chain = RetValue2.getValue(1);
7647       Glue = RetValue2.getValue(2);
7648       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
7649                              RetValue2);
7650     }
7651 
7652     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
7653 
7654     InVals.push_back(RetValue);
7655   }
7656 
7657   return Chain;
7658 }
7659 
7660 bool RISCVTargetLowering::CanLowerReturn(
7661     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
7662     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
7663   SmallVector<CCValAssign, 16> RVLocs;
7664   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
7665 
7666   Optional<unsigned> FirstMaskArgument;
7667   if (Subtarget.hasStdExtV())
7668     FirstMaskArgument = preAssignMask(Outs);
7669 
7670   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7671     MVT VT = Outs[i].VT;
7672     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
7673     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
7674     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
7675                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
7676                  *this, FirstMaskArgument))
7677       return false;
7678   }
7679   return true;
7680 }
7681 
7682 SDValue
7683 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7684                                  bool IsVarArg,
7685                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
7686                                  const SmallVectorImpl<SDValue> &OutVals,
7687                                  const SDLoc &DL, SelectionDAG &DAG) const {
7688   const MachineFunction &MF = DAG.getMachineFunction();
7689   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
7690 
7691   // Stores the assignment of the return value to a location.
7692   SmallVector<CCValAssign, 16> RVLocs;
7693 
7694   // Info about the registers and stack slot.
7695   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
7696                  *DAG.getContext());
7697 
7698   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
7699                     nullptr);
7700 
7701   if (CallConv == CallingConv::GHC && !RVLocs.empty())
7702     report_fatal_error("GHC functions return void only");
7703 
7704   SDValue Glue;
7705   SmallVector<SDValue, 4> RetOps(1, Chain);
7706 
7707   // Copy the result values into the output registers.
7708   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
7709     SDValue Val = OutVals[i];
7710     CCValAssign &VA = RVLocs[i];
7711     assert(VA.isRegLoc() && "Can only return in registers!");
7712 
7713     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7714       // Handle returning f64 on RV32D with a soft float ABI.
7715       assert(VA.isRegLoc() && "Expected return via registers");
7716       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
7717                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
7718       SDValue Lo = SplitF64.getValue(0);
7719       SDValue Hi = SplitF64.getValue(1);
7720       Register RegLo = VA.getLocReg();
7721       assert(RegLo < RISCV::X31 && "Invalid register pair");
7722       Register RegHi = RegLo + 1;
7723 
7724       if (STI.isRegisterReservedByUser(RegLo) ||
7725           STI.isRegisterReservedByUser(RegHi))
7726         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
7727             MF.getFunction(),
7728             "Return value register required, but has been reserved."});
7729 
7730       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
7731       Glue = Chain.getValue(1);
7732       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
7733       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
7734       Glue = Chain.getValue(1);
7735       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
7736     } else {
7737       // Handle a 'normal' return.
7738       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
7739       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
7740 
7741       if (STI.isRegisterReservedByUser(VA.getLocReg()))
7742         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
7743             MF.getFunction(),
7744             "Return value register required, but has been reserved."});
7745 
7746       // Guarantee that all emitted copies are stuck together.
7747       Glue = Chain.getValue(1);
7748       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7749     }
7750   }
7751 
7752   RetOps[0] = Chain; // Update chain.
7753 
7754   // Add the glue node if we have it.
7755   if (Glue.getNode()) {
7756     RetOps.push_back(Glue);
7757   }
7758 
7759   // Interrupt service routines use different return instructions.
7760   const Function &Func = DAG.getMachineFunction().getFunction();
7761   if (Func.hasFnAttribute("interrupt")) {
7762     if (!Func.getReturnType()->isVoidTy())
7763       report_fatal_error(
7764           "Functions with the interrupt attribute must have void return type!");
7765 
7766     MachineFunction &MF = DAG.getMachineFunction();
7767     StringRef Kind =
7768       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
7769 
7770     unsigned RetOpc;
7771     if (Kind == "user")
7772       RetOpc = RISCVISD::URET_FLAG;
7773     else if (Kind == "supervisor")
7774       RetOpc = RISCVISD::SRET_FLAG;
7775     else
7776       RetOpc = RISCVISD::MRET_FLAG;
7777 
7778     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
7779   }
7780 
7781   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
7782 }
7783 
7784 void RISCVTargetLowering::validateCCReservedRegs(
7785     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
7786     MachineFunction &MF) const {
7787   const Function &F = MF.getFunction();
7788   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
7789 
7790   if (llvm::any_of(Regs, [&STI](auto Reg) {
7791         return STI.isRegisterReservedByUser(Reg.first);
7792       }))
7793     F.getContext().diagnose(DiagnosticInfoUnsupported{
7794         F, "Argument register required, but has been reserved."});
7795 }
7796 
7797 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
7798   return CI->isTailCall();
7799 }
7800 
7801 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
7802 #define NODE_NAME_CASE(NODE)                                                   \
7803   case RISCVISD::NODE:                                                         \
7804     return "RISCVISD::" #NODE;
7805   // clang-format off
7806   switch ((RISCVISD::NodeType)Opcode) {
7807   case RISCVISD::FIRST_NUMBER:
7808     break;
7809   NODE_NAME_CASE(RET_FLAG)
7810   NODE_NAME_CASE(URET_FLAG)
7811   NODE_NAME_CASE(SRET_FLAG)
7812   NODE_NAME_CASE(MRET_FLAG)
7813   NODE_NAME_CASE(CALL)
7814   NODE_NAME_CASE(SELECT_CC)
7815   NODE_NAME_CASE(BR_CC)
7816   NODE_NAME_CASE(BuildPairF64)
7817   NODE_NAME_CASE(SplitF64)
7818   NODE_NAME_CASE(TAIL)
7819   NODE_NAME_CASE(MULHSU)
7820   NODE_NAME_CASE(SLLW)
7821   NODE_NAME_CASE(SRAW)
7822   NODE_NAME_CASE(SRLW)
7823   NODE_NAME_CASE(DIVW)
7824   NODE_NAME_CASE(DIVUW)
7825   NODE_NAME_CASE(REMUW)
7826   NODE_NAME_CASE(ROLW)
7827   NODE_NAME_CASE(RORW)
7828   NODE_NAME_CASE(CLZW)
7829   NODE_NAME_CASE(CTZW)
7830   NODE_NAME_CASE(FSLW)
7831   NODE_NAME_CASE(FSRW)
7832   NODE_NAME_CASE(FSL)
7833   NODE_NAME_CASE(FSR)
7834   NODE_NAME_CASE(FMV_H_X)
7835   NODE_NAME_CASE(FMV_X_ANYEXTH)
7836   NODE_NAME_CASE(FMV_W_X_RV64)
7837   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
7838   NODE_NAME_CASE(READ_CYCLE_WIDE)
7839   NODE_NAME_CASE(GREV)
7840   NODE_NAME_CASE(GREVW)
7841   NODE_NAME_CASE(GORC)
7842   NODE_NAME_CASE(GORCW)
7843   NODE_NAME_CASE(SHFL)
7844   NODE_NAME_CASE(SHFLW)
7845   NODE_NAME_CASE(UNSHFL)
7846   NODE_NAME_CASE(UNSHFLW)
7847   NODE_NAME_CASE(BCOMPRESS)
7848   NODE_NAME_CASE(BCOMPRESSW)
7849   NODE_NAME_CASE(BDECOMPRESS)
7850   NODE_NAME_CASE(BDECOMPRESSW)
7851   NODE_NAME_CASE(VMV_V_X_VL)
7852   NODE_NAME_CASE(VFMV_V_F_VL)
7853   NODE_NAME_CASE(VMV_X_S)
7854   NODE_NAME_CASE(VMV_S_X_VL)
7855   NODE_NAME_CASE(VFMV_S_F_VL)
7856   NODE_NAME_CASE(SPLAT_VECTOR_I64)
7857   NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
7858   NODE_NAME_CASE(READ_VLENB)
7859   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
7860   NODE_NAME_CASE(VSLIDEUP_VL)
7861   NODE_NAME_CASE(VSLIDE1UP_VL)
7862   NODE_NAME_CASE(VSLIDEDOWN_VL)
7863   NODE_NAME_CASE(VSLIDE1DOWN_VL)
7864   NODE_NAME_CASE(VID_VL)
7865   NODE_NAME_CASE(VFNCVT_ROD_VL)
7866   NODE_NAME_CASE(VECREDUCE_ADD_VL)
7867   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
7868   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
7869   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
7870   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
7871   NODE_NAME_CASE(VECREDUCE_AND_VL)
7872   NODE_NAME_CASE(VECREDUCE_OR_VL)
7873   NODE_NAME_CASE(VECREDUCE_XOR_VL)
7874   NODE_NAME_CASE(VECREDUCE_FADD_VL)
7875   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
7876   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
7877   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
7878   NODE_NAME_CASE(ADD_VL)
7879   NODE_NAME_CASE(AND_VL)
7880   NODE_NAME_CASE(MUL_VL)
7881   NODE_NAME_CASE(OR_VL)
7882   NODE_NAME_CASE(SDIV_VL)
7883   NODE_NAME_CASE(SHL_VL)
7884   NODE_NAME_CASE(SREM_VL)
7885   NODE_NAME_CASE(SRA_VL)
7886   NODE_NAME_CASE(SRL_VL)
7887   NODE_NAME_CASE(SUB_VL)
7888   NODE_NAME_CASE(UDIV_VL)
7889   NODE_NAME_CASE(UREM_VL)
7890   NODE_NAME_CASE(XOR_VL)
7891   NODE_NAME_CASE(FADD_VL)
7892   NODE_NAME_CASE(FSUB_VL)
7893   NODE_NAME_CASE(FMUL_VL)
7894   NODE_NAME_CASE(FDIV_VL)
7895   NODE_NAME_CASE(FNEG_VL)
7896   NODE_NAME_CASE(FABS_VL)
7897   NODE_NAME_CASE(FSQRT_VL)
7898   NODE_NAME_CASE(FMA_VL)
7899   NODE_NAME_CASE(FCOPYSIGN_VL)
7900   NODE_NAME_CASE(SMIN_VL)
7901   NODE_NAME_CASE(SMAX_VL)
7902   NODE_NAME_CASE(UMIN_VL)
7903   NODE_NAME_CASE(UMAX_VL)
7904   NODE_NAME_CASE(FMINNUM_VL)
7905   NODE_NAME_CASE(FMAXNUM_VL)
7906   NODE_NAME_CASE(MULHS_VL)
7907   NODE_NAME_CASE(MULHU_VL)
7908   NODE_NAME_CASE(FP_TO_SINT_VL)
7909   NODE_NAME_CASE(FP_TO_UINT_VL)
7910   NODE_NAME_CASE(SINT_TO_FP_VL)
7911   NODE_NAME_CASE(UINT_TO_FP_VL)
7912   NODE_NAME_CASE(FP_EXTEND_VL)
7913   NODE_NAME_CASE(FP_ROUND_VL)
7914   NODE_NAME_CASE(SETCC_VL)
7915   NODE_NAME_CASE(VSELECT_VL)
7916   NODE_NAME_CASE(VMAND_VL)
7917   NODE_NAME_CASE(VMOR_VL)
7918   NODE_NAME_CASE(VMXOR_VL)
7919   NODE_NAME_CASE(VMCLR_VL)
7920   NODE_NAME_CASE(VMSET_VL)
7921   NODE_NAME_CASE(VRGATHER_VX_VL)
7922   NODE_NAME_CASE(VRGATHER_VV_VL)
7923   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
7924   NODE_NAME_CASE(VSEXT_VL)
7925   NODE_NAME_CASE(VZEXT_VL)
7926   NODE_NAME_CASE(VPOPC_VL)
7927   NODE_NAME_CASE(VLE_VL)
7928   NODE_NAME_CASE(VSE_VL)
7929   NODE_NAME_CASE(READ_CSR)
7930   NODE_NAME_CASE(WRITE_CSR)
7931   NODE_NAME_CASE(SWAP_CSR)
7932   }
7933   // clang-format on
7934   return nullptr;
7935 #undef NODE_NAME_CASE
7936 }
7937 
7938 /// getConstraintType - Given a constraint letter, return the type of
7939 /// constraint it is for this target.
7940 RISCVTargetLowering::ConstraintType
7941 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
7942   if (Constraint.size() == 1) {
7943     switch (Constraint[0]) {
7944     default:
7945       break;
7946     case 'f':
7947     case 'v':
7948       return C_RegisterClass;
7949     case 'I':
7950     case 'J':
7951     case 'K':
7952       return C_Immediate;
7953     case 'A':
7954       return C_Memory;
7955     }
7956   }
7957   return TargetLowering::getConstraintType(Constraint);
7958 }
7959 
7960 std::pair<unsigned, const TargetRegisterClass *>
7961 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
7962                                                   StringRef Constraint,
7963                                                   MVT VT) const {
7964   // First, see if this is a constraint that directly corresponds to a
7965   // RISCV register class.
7966   if (Constraint.size() == 1) {
7967     switch (Constraint[0]) {
7968     case 'r':
7969       return std::make_pair(0U, &RISCV::GPRRegClass);
7970     case 'f':
7971       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
7972         return std::make_pair(0U, &RISCV::FPR16RegClass);
7973       if (Subtarget.hasStdExtF() && VT == MVT::f32)
7974         return std::make_pair(0U, &RISCV::FPR32RegClass);
7975       if (Subtarget.hasStdExtD() && VT == MVT::f64)
7976         return std::make_pair(0U, &RISCV::FPR64RegClass);
7977       break;
7978     case 'v':
7979       for (const auto *RC :
7980            {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
7981             &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
7982         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
7983           return std::make_pair(0U, RC);
7984       }
7985       break;
7986     default:
7987       break;
7988     }
7989   }
7990 
7991   // Clang will correctly decode the usage of register name aliases into their
7992   // official names. However, other frontends like `rustc` do not. This allows
7993   // users of these frontends to use the ABI names for registers in LLVM-style
7994   // register constraints.
7995   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
7996                                .Case("{zero}", RISCV::X0)
7997                                .Case("{ra}", RISCV::X1)
7998                                .Case("{sp}", RISCV::X2)
7999                                .Case("{gp}", RISCV::X3)
8000                                .Case("{tp}", RISCV::X4)
8001                                .Case("{t0}", RISCV::X5)
8002                                .Case("{t1}", RISCV::X6)
8003                                .Case("{t2}", RISCV::X7)
8004                                .Cases("{s0}", "{fp}", RISCV::X8)
8005                                .Case("{s1}", RISCV::X9)
8006                                .Case("{a0}", RISCV::X10)
8007                                .Case("{a1}", RISCV::X11)
8008                                .Case("{a2}", RISCV::X12)
8009                                .Case("{a3}", RISCV::X13)
8010                                .Case("{a4}", RISCV::X14)
8011                                .Case("{a5}", RISCV::X15)
8012                                .Case("{a6}", RISCV::X16)
8013                                .Case("{a7}", RISCV::X17)
8014                                .Case("{s2}", RISCV::X18)
8015                                .Case("{s3}", RISCV::X19)
8016                                .Case("{s4}", RISCV::X20)
8017                                .Case("{s5}", RISCV::X21)
8018                                .Case("{s6}", RISCV::X22)
8019                                .Case("{s7}", RISCV::X23)
8020                                .Case("{s8}", RISCV::X24)
8021                                .Case("{s9}", RISCV::X25)
8022                                .Case("{s10}", RISCV::X26)
8023                                .Case("{s11}", RISCV::X27)
8024                                .Case("{t3}", RISCV::X28)
8025                                .Case("{t4}", RISCV::X29)
8026                                .Case("{t5}", RISCV::X30)
8027                                .Case("{t6}", RISCV::X31)
8028                                .Default(RISCV::NoRegister);
8029   if (XRegFromAlias != RISCV::NoRegister)
8030     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
8031 
8032   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
8033   // TableGen record rather than the AsmName to choose registers for InlineAsm
8034   // constraints, plus we want to match those names to the widest floating point
8035   // register type available, manually select floating point registers here.
8036   //
8037   // The second case is the ABI name of the register, so that frontends can also
8038   // use the ABI names in register constraint lists.
8039   if (Subtarget.hasStdExtF()) {
8040     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
8041                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
8042                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
8043                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
8044                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
8045                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
8046                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
8047                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
8048                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
8049                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
8050                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
8051                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
8052                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
8053                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
8054                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
8055                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
8056                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
8057                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
8058                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
8059                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
8060                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
8061                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
8062                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
8063                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
8064                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
8065                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
8066                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
8067                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
8068                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
8069                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
8070                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
8071                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
8072                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
8073                         .Default(RISCV::NoRegister);
8074     if (FReg != RISCV::NoRegister) {
8075       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
8076       if (Subtarget.hasStdExtD()) {
8077         unsigned RegNo = FReg - RISCV::F0_F;
8078         unsigned DReg = RISCV::F0_D + RegNo;
8079         return std::make_pair(DReg, &RISCV::FPR64RegClass);
8080       }
8081       return std::make_pair(FReg, &RISCV::FPR32RegClass);
8082     }
8083   }
8084 
8085   if (Subtarget.hasStdExtV()) {
8086     Register VReg = StringSwitch<Register>(Constraint.lower())
8087                         .Case("{v0}", RISCV::V0)
8088                         .Case("{v1}", RISCV::V1)
8089                         .Case("{v2}", RISCV::V2)
8090                         .Case("{v3}", RISCV::V3)
8091                         .Case("{v4}", RISCV::V4)
8092                         .Case("{v5}", RISCV::V5)
8093                         .Case("{v6}", RISCV::V6)
8094                         .Case("{v7}", RISCV::V7)
8095                         .Case("{v8}", RISCV::V8)
8096                         .Case("{v9}", RISCV::V9)
8097                         .Case("{v10}", RISCV::V10)
8098                         .Case("{v11}", RISCV::V11)
8099                         .Case("{v12}", RISCV::V12)
8100                         .Case("{v13}", RISCV::V13)
8101                         .Case("{v14}", RISCV::V14)
8102                         .Case("{v15}", RISCV::V15)
8103                         .Case("{v16}", RISCV::V16)
8104                         .Case("{v17}", RISCV::V17)
8105                         .Case("{v18}", RISCV::V18)
8106                         .Case("{v19}", RISCV::V19)
8107                         .Case("{v20}", RISCV::V20)
8108                         .Case("{v21}", RISCV::V21)
8109                         .Case("{v22}", RISCV::V22)
8110                         .Case("{v23}", RISCV::V23)
8111                         .Case("{v24}", RISCV::V24)
8112                         .Case("{v25}", RISCV::V25)
8113                         .Case("{v26}", RISCV::V26)
8114                         .Case("{v27}", RISCV::V27)
8115                         .Case("{v28}", RISCV::V28)
8116                         .Case("{v29}", RISCV::V29)
8117                         .Case("{v30}", RISCV::V30)
8118                         .Case("{v31}", RISCV::V31)
8119                         .Default(RISCV::NoRegister);
8120     if (VReg != RISCV::NoRegister) {
8121       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
8122         return std::make_pair(VReg, &RISCV::VMRegClass);
8123       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
8124         return std::make_pair(VReg, &RISCV::VRRegClass);
8125       for (const auto *RC :
8126            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
8127         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
8128           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
8129           return std::make_pair(VReg, RC);
8130         }
8131       }
8132     }
8133   }
8134 
8135   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8136 }
8137 
8138 unsigned
8139 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
8140   // Currently only support length 1 constraints.
8141   if (ConstraintCode.size() == 1) {
8142     switch (ConstraintCode[0]) {
8143     case 'A':
8144       return InlineAsm::Constraint_A;
8145     default:
8146       break;
8147     }
8148   }
8149 
8150   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
8151 }
8152 
8153 void RISCVTargetLowering::LowerAsmOperandForConstraint(
8154     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
8155     SelectionDAG &DAG) const {
8156   // Currently only support length 1 constraints.
8157   if (Constraint.length() == 1) {
8158     switch (Constraint[0]) {
8159     case 'I':
8160       // Validate & create a 12-bit signed immediate operand.
8161       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8162         uint64_t CVal = C->getSExtValue();
8163         if (isInt<12>(CVal))
8164           Ops.push_back(
8165               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
8166       }
8167       return;
8168     case 'J':
8169       // Validate & create an integer zero operand.
8170       if (auto *C = dyn_cast<ConstantSDNode>(Op))
8171         if (C->getZExtValue() == 0)
8172           Ops.push_back(
8173               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
8174       return;
8175     case 'K':
8176       // Validate & create a 5-bit unsigned immediate operand.
8177       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8178         uint64_t CVal = C->getZExtValue();
8179         if (isUInt<5>(CVal))
8180           Ops.push_back(
8181               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
8182       }
8183       return;
8184     default:
8185       break;
8186     }
8187   }
8188   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8189 }
8190 
8191 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
8192                                                    Instruction *Inst,
8193                                                    AtomicOrdering Ord) const {
8194   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
8195     return Builder.CreateFence(Ord);
8196   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
8197     return Builder.CreateFence(AtomicOrdering::Release);
8198   return nullptr;
8199 }
8200 
8201 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
8202                                                     Instruction *Inst,
8203                                                     AtomicOrdering Ord) const {
8204   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
8205     return Builder.CreateFence(AtomicOrdering::Acquire);
8206   return nullptr;
8207 }
8208 
8209 TargetLowering::AtomicExpansionKind
8210 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
8211   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
8212   // point operations can't be used in an lr/sc sequence without breaking the
8213   // forward-progress guarantee.
8214   if (AI->isFloatingPointOperation())
8215     return AtomicExpansionKind::CmpXChg;
8216 
8217   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8218   if (Size == 8 || Size == 16)
8219     return AtomicExpansionKind::MaskedIntrinsic;
8220   return AtomicExpansionKind::None;
8221 }
8222 
8223 static Intrinsic::ID
8224 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
8225   if (XLen == 32) {
8226     switch (BinOp) {
8227     default:
8228       llvm_unreachable("Unexpected AtomicRMW BinOp");
8229     case AtomicRMWInst::Xchg:
8230       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
8231     case AtomicRMWInst::Add:
8232       return Intrinsic::riscv_masked_atomicrmw_add_i32;
8233     case AtomicRMWInst::Sub:
8234       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
8235     case AtomicRMWInst::Nand:
8236       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
8237     case AtomicRMWInst::Max:
8238       return Intrinsic::riscv_masked_atomicrmw_max_i32;
8239     case AtomicRMWInst::Min:
8240       return Intrinsic::riscv_masked_atomicrmw_min_i32;
8241     case AtomicRMWInst::UMax:
8242       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
8243     case AtomicRMWInst::UMin:
8244       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
8245     }
8246   }
8247 
8248   if (XLen == 64) {
8249     switch (BinOp) {
8250     default:
8251       llvm_unreachable("Unexpected AtomicRMW BinOp");
8252     case AtomicRMWInst::Xchg:
8253       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
8254     case AtomicRMWInst::Add:
8255       return Intrinsic::riscv_masked_atomicrmw_add_i64;
8256     case AtomicRMWInst::Sub:
8257       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
8258     case AtomicRMWInst::Nand:
8259       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
8260     case AtomicRMWInst::Max:
8261       return Intrinsic::riscv_masked_atomicrmw_max_i64;
8262     case AtomicRMWInst::Min:
8263       return Intrinsic::riscv_masked_atomicrmw_min_i64;
8264     case AtomicRMWInst::UMax:
8265       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
8266     case AtomicRMWInst::UMin:
8267       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
8268     }
8269   }
8270 
8271   llvm_unreachable("Unexpected XLen\n");
8272 }
8273 
8274 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
8275     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8276     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8277   unsigned XLen = Subtarget.getXLen();
8278   Value *Ordering =
8279       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
8280   Type *Tys[] = {AlignedAddr->getType()};
8281   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
8282       AI->getModule(),
8283       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
8284 
8285   if (XLen == 64) {
8286     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8287     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8288     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8289   }
8290 
8291   Value *Result;
8292 
8293   // Must pass the shift amount needed to sign extend the loaded value prior
8294   // to performing a signed comparison for min/max. ShiftAmt is the number of
8295   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
8296   // is the number of bits to left+right shift the value in order to
8297   // sign-extend.
8298   if (AI->getOperation() == AtomicRMWInst::Min ||
8299       AI->getOperation() == AtomicRMWInst::Max) {
8300     const DataLayout &DL = AI->getModule()->getDataLayout();
8301     unsigned ValWidth =
8302         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8303     Value *SextShamt =
8304         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
8305     Result = Builder.CreateCall(LrwOpScwLoop,
8306                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8307   } else {
8308     Result =
8309         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8310   }
8311 
8312   if (XLen == 64)
8313     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8314   return Result;
8315 }
8316 
8317 TargetLowering::AtomicExpansionKind
8318 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
8319     AtomicCmpXchgInst *CI) const {
8320   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
8321   if (Size == 8 || Size == 16)
8322     return AtomicExpansionKind::MaskedIntrinsic;
8323   return AtomicExpansionKind::None;
8324 }
8325 
8326 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
8327     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8328     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8329   unsigned XLen = Subtarget.getXLen();
8330   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
8331   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
8332   if (XLen == 64) {
8333     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8334     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8335     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8336     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
8337   }
8338   Type *Tys[] = {AlignedAddr->getType()};
8339   Function *MaskedCmpXchg =
8340       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
8341   Value *Result = Builder.CreateCall(
8342       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
8343   if (XLen == 64)
8344     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8345   return Result;
8346 }
8347 
8348 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
8349   return false;
8350 }
8351 
8352 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
8353                                                      EVT VT) const {
8354   VT = VT.getScalarType();
8355 
8356   if (!VT.isSimple())
8357     return false;
8358 
8359   switch (VT.getSimpleVT().SimpleTy) {
8360   case MVT::f16:
8361     return Subtarget.hasStdExtZfh();
8362   case MVT::f32:
8363     return Subtarget.hasStdExtF();
8364   case MVT::f64:
8365     return Subtarget.hasStdExtD();
8366   default:
8367     break;
8368   }
8369 
8370   return false;
8371 }
8372 
8373 Register RISCVTargetLowering::getExceptionPointerRegister(
8374     const Constant *PersonalityFn) const {
8375   return RISCV::X10;
8376 }
8377 
8378 Register RISCVTargetLowering::getExceptionSelectorRegister(
8379     const Constant *PersonalityFn) const {
8380   return RISCV::X11;
8381 }
8382 
8383 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
8384   // Return false to suppress the unnecessary extensions if the LibCall
8385   // arguments or return value is f32 type for LP64 ABI.
8386   RISCVABI::ABI ABI = Subtarget.getTargetABI();
8387   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
8388     return false;
8389 
8390   return true;
8391 }
8392 
8393 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
8394   if (Subtarget.is64Bit() && Type == MVT::i32)
8395     return true;
8396 
8397   return IsSigned;
8398 }
8399 
8400 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
8401                                                  SDValue C) const {
8402   // Check integral scalar types.
8403   if (VT.isScalarInteger()) {
8404     // Omit the optimization if the sub target has the M extension and the data
8405     // size exceeds XLen.
8406     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
8407       return false;
8408     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8409       // Break the MUL to a SLLI and an ADD/SUB.
8410       const APInt &Imm = ConstNode->getAPIntValue();
8411       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8412           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8413         return true;
8414       // Omit the following optimization if the sub target has the M extension
8415       // and the data size >= XLen.
8416       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
8417         return false;
8418       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
8419       // a pair of LUI/ADDI.
8420       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
8421         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
8422         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
8423             (1 - ImmS).isPowerOf2())
8424         return true;
8425       }
8426     }
8427   }
8428 
8429   return false;
8430 }
8431 
8432 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
8433     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
8434     bool *Fast) const {
8435   if (!VT.isVector())
8436     return false;
8437 
8438   EVT ElemVT = VT.getVectorElementType();
8439   if (Alignment >= ElemVT.getStoreSize()) {
8440     if (Fast)
8441       *Fast = true;
8442     return true;
8443   }
8444 
8445   return false;
8446 }
8447 
8448 bool RISCVTargetLowering::splitValueIntoRegisterParts(
8449     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8450     unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
8451   bool IsABIRegCopy = CC.hasValue();
8452   EVT ValueVT = Val.getValueType();
8453   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
8454     // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
8455     // and cast to f32.
8456     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
8457     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
8458     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
8459                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
8460     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
8461     Parts[0] = Val;
8462     return true;
8463   }
8464 
8465   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
8466     LLVMContext &Context = *DAG.getContext();
8467     EVT ValueEltVT = ValueVT.getVectorElementType();
8468     EVT PartEltVT = PartVT.getVectorElementType();
8469     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
8470     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
8471     if (PartVTBitSize % ValueVTBitSize == 0) {
8472       // If the element types are different, bitcast to the same element type of
8473       // PartVT first.
8474       if (ValueEltVT != PartEltVT) {
8475         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
8476         assert(Count != 0 && "The number of element should not be zero.");
8477         EVT SameEltTypeVT =
8478             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
8479         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
8480       }
8481       Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
8482                         Val, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
8483       Parts[0] = Val;
8484       return true;
8485     }
8486   }
8487   return false;
8488 }
8489 
8490 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
8491     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
8492     MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
8493   bool IsABIRegCopy = CC.hasValue();
8494   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
8495     SDValue Val = Parts[0];
8496 
8497     // Cast the f32 to i32, truncate to i16, and cast back to f16.
8498     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
8499     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
8500     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
8501     return Val;
8502   }
8503 
8504   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
8505     LLVMContext &Context = *DAG.getContext();
8506     SDValue Val = Parts[0];
8507     EVT ValueEltVT = ValueVT.getVectorElementType();
8508     EVT PartEltVT = PartVT.getVectorElementType();
8509     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
8510     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
8511     if (PartVTBitSize % ValueVTBitSize == 0) {
8512       EVT SameEltTypeVT = ValueVT;
8513       // If the element types are different, convert it to the same element type
8514       // of PartVT.
8515       if (ValueEltVT != PartEltVT) {
8516         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
8517         assert(Count != 0 && "The number of element should not be zero.");
8518         SameEltTypeVT =
8519             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
8520       }
8521       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val,
8522                         DAG.getConstant(0, DL, Subtarget.getXLenVT()));
8523       if (ValueEltVT != PartEltVT)
8524         Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
8525       return Val;
8526     }
8527   }
8528   return SDValue();
8529 }
8530 
8531 #define GET_REGISTER_MATCHER
8532 #include "RISCVGenAsmMatcher.inc"
8533 
8534 Register
8535 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
8536                                        const MachineFunction &MF) const {
8537   Register Reg = MatchRegisterAltName(RegName);
8538   if (Reg == RISCV::NoRegister)
8539     Reg = MatchRegisterName(RegName);
8540   if (Reg == RISCV::NoRegister)
8541     report_fatal_error(
8542         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
8543   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8544   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
8545     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8546                              StringRef(RegName) + "\"."));
8547   return Reg;
8548 }
8549 
8550 namespace llvm {
8551 namespace RISCVVIntrinsicsTable {
8552 
8553 #define GET_RISCVVIntrinsicsTable_IMPL
8554 #include "RISCVGenSearchableTables.inc"
8555 
8556 } // namespace RISCVVIntrinsicsTable
8557 
8558 } // namespace llvm
8559