1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtZfh())
87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88   if (Subtarget.hasStdExtF())
89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90   if (Subtarget.hasStdExtD())
91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93   static const MVT::SimpleValueType BoolVecVTs[] = {
94       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
95       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
96   static const MVT::SimpleValueType IntVecVTs[] = {
97       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
98       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
99       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
100       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
101       MVT::nxv4i64, MVT::nxv8i64};
102   static const MVT::SimpleValueType F16VecVTs[] = {
103       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
104       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
105   static const MVT::SimpleValueType F32VecVTs[] = {
106       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
107   static const MVT::SimpleValueType F64VecVTs[] = {
108       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
109 
110   if (Subtarget.hasStdExtV()) {
111     auto addRegClassForRVV = [this](MVT VT) {
112       unsigned Size = VT.getSizeInBits().getKnownMinValue();
113       assert(Size <= 512 && isPowerOf2_32(Size));
114       const TargetRegisterClass *RC;
115       if (Size <= 64)
116         RC = &RISCV::VRRegClass;
117       else if (Size == 128)
118         RC = &RISCV::VRM2RegClass;
119       else if (Size == 256)
120         RC = &RISCV::VRM4RegClass;
121       else
122         RC = &RISCV::VRM8RegClass;
123 
124       addRegisterClass(VT, RC);
125     };
126 
127     for (MVT VT : BoolVecVTs)
128       addRegClassForRVV(VT);
129     for (MVT VT : IntVecVTs)
130       addRegClassForRVV(VT);
131 
132     if (Subtarget.hasStdExtZfh())
133       for (MVT VT : F16VecVTs)
134         addRegClassForRVV(VT);
135 
136     if (Subtarget.hasStdExtF())
137       for (MVT VT : F32VecVTs)
138         addRegClassForRVV(VT);
139 
140     if (Subtarget.hasStdExtD())
141       for (MVT VT : F64VecVTs)
142         addRegClassForRVV(VT);
143 
144     if (Subtarget.useRVVForFixedLengthVectors()) {
145       auto addRegClassForFixedVectors = [this](MVT VT) {
146         unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
147         const TargetRegisterClass *RC;
148         if (LMul == 1 || VT.getVectorElementType() == MVT::i1)
149           RC = &RISCV::VRRegClass;
150         else if (LMul == 2)
151           RC = &RISCV::VRM2RegClass;
152         else if (LMul == 4)
153           RC = &RISCV::VRM4RegClass;
154         else if (LMul == 8)
155           RC = &RISCV::VRM8RegClass;
156         else
157           llvm_unreachable("Unexpected LMul!");
158 
159         addRegisterClass(VT, RC);
160       };
161       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
162         if (useRVVForFixedLengthVectorVT(VT))
163           addRegClassForFixedVectors(VT);
164 
165       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
166         if (useRVVForFixedLengthVectorVT(VT))
167           addRegClassForFixedVectors(VT);
168     }
169   }
170 
171   // Compute derived properties from the register classes.
172   computeRegisterProperties(STI.getRegisterInfo());
173 
174   setStackPointerRegisterToSaveRestore(RISCV::X2);
175 
176   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
177     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
178 
179   // TODO: add all necessary setOperationAction calls.
180   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
181 
182   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
183   setOperationAction(ISD::BR_CC, XLenVT, Expand);
184   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
185   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
186 
187   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
188   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
189 
190   setOperationAction(ISD::VASTART, MVT::Other, Custom);
191   setOperationAction(ISD::VAARG, MVT::Other, Expand);
192   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
193   setOperationAction(ISD::VAEND, MVT::Other, Expand);
194 
195   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
196   if (!Subtarget.hasStdExtZbb()) {
197     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
198     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
199   }
200 
201   if (Subtarget.is64Bit()) {
202     setOperationAction(ISD::ADD, MVT::i32, Custom);
203     setOperationAction(ISD::SUB, MVT::i32, Custom);
204     setOperationAction(ISD::SHL, MVT::i32, Custom);
205     setOperationAction(ISD::SRA, MVT::i32, Custom);
206     setOperationAction(ISD::SRL, MVT::i32, Custom);
207 
208     setOperationAction(ISD::UADDO, MVT::i32, Custom);
209     setOperationAction(ISD::USUBO, MVT::i32, Custom);
210     setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
211     setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
212   }
213 
214   if (!Subtarget.hasStdExtM()) {
215     setOperationAction(ISD::MUL, XLenVT, Expand);
216     setOperationAction(ISD::MULHS, XLenVT, Expand);
217     setOperationAction(ISD::MULHU, XLenVT, Expand);
218     setOperationAction(ISD::SDIV, XLenVT, Expand);
219     setOperationAction(ISD::UDIV, XLenVT, Expand);
220     setOperationAction(ISD::SREM, XLenVT, Expand);
221     setOperationAction(ISD::UREM, XLenVT, Expand);
222   } else {
223     if (Subtarget.is64Bit()) {
224       setOperationAction(ISD::MUL, MVT::i32, Custom);
225       setOperationAction(ISD::MUL, MVT::i128, Custom);
226 
227       setOperationAction(ISD::SDIV, MVT::i8, Custom);
228       setOperationAction(ISD::UDIV, MVT::i8, Custom);
229       setOperationAction(ISD::UREM, MVT::i8, Custom);
230       setOperationAction(ISD::SDIV, MVT::i16, Custom);
231       setOperationAction(ISD::UDIV, MVT::i16, Custom);
232       setOperationAction(ISD::UREM, MVT::i16, Custom);
233       setOperationAction(ISD::SDIV, MVT::i32, Custom);
234       setOperationAction(ISD::UDIV, MVT::i32, Custom);
235       setOperationAction(ISD::UREM, MVT::i32, Custom);
236     } else {
237       setOperationAction(ISD::MUL, MVT::i64, Custom);
238     }
239   }
240 
241   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
242   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
243   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
244   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
245 
246   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
247   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
248   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
249 
250   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
251     if (Subtarget.is64Bit()) {
252       setOperationAction(ISD::ROTL, MVT::i32, Custom);
253       setOperationAction(ISD::ROTR, MVT::i32, Custom);
254     }
255   } else {
256     setOperationAction(ISD::ROTL, XLenVT, Expand);
257     setOperationAction(ISD::ROTR, XLenVT, Expand);
258   }
259 
260   if (Subtarget.hasStdExtZbp()) {
261     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
262     // more combining.
263     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
264     setOperationAction(ISD::BSWAP, XLenVT, Custom);
265 
266     if (Subtarget.is64Bit()) {
267       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
268       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
269     }
270   } else {
271     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
272     // pattern match it directly in isel.
273     setOperationAction(ISD::BSWAP, XLenVT,
274                        Subtarget.hasStdExtZbb() ? Legal : Expand);
275   }
276 
277   if (Subtarget.hasStdExtZbb()) {
278     setOperationAction(ISD::SMIN, XLenVT, Legal);
279     setOperationAction(ISD::SMAX, XLenVT, Legal);
280     setOperationAction(ISD::UMIN, XLenVT, Legal);
281     setOperationAction(ISD::UMAX, XLenVT, Legal);
282 
283     if (Subtarget.is64Bit()) {
284       setOperationAction(ISD::CTTZ, MVT::i32, Custom);
285       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
286       setOperationAction(ISD::CTLZ, MVT::i32, Custom);
287       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
288     }
289   } else {
290     setOperationAction(ISD::CTTZ, XLenVT, Expand);
291     setOperationAction(ISD::CTLZ, XLenVT, Expand);
292     setOperationAction(ISD::CTPOP, XLenVT, Expand);
293   }
294 
295   if (Subtarget.hasStdExtZbt()) {
296     setOperationAction(ISD::FSHL, XLenVT, Custom);
297     setOperationAction(ISD::FSHR, XLenVT, Custom);
298     setOperationAction(ISD::SELECT, XLenVT, Legal);
299 
300     if (Subtarget.is64Bit()) {
301       setOperationAction(ISD::FSHL, MVT::i32, Custom);
302       setOperationAction(ISD::FSHR, MVT::i32, Custom);
303     }
304   } else {
305     setOperationAction(ISD::SELECT, XLenVT, Custom);
306   }
307 
308   ISD::CondCode FPCCToExpand[] = {
309       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
310       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
311       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
312 
313   ISD::NodeType FPOpToExpand[] = {
314       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
315       ISD::FP_TO_FP16};
316 
317   if (Subtarget.hasStdExtZfh())
318     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
319 
320   if (Subtarget.hasStdExtZfh()) {
321     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
322     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
323     for (auto CC : FPCCToExpand)
324       setCondCodeAction(CC, MVT::f16, Expand);
325     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
326     setOperationAction(ISD::SELECT, MVT::f16, Custom);
327     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
328     for (auto Op : FPOpToExpand)
329       setOperationAction(Op, MVT::f16, Expand);
330   }
331 
332   if (Subtarget.hasStdExtF()) {
333     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
334     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
335     for (auto CC : FPCCToExpand)
336       setCondCodeAction(CC, MVT::f32, Expand);
337     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
338     setOperationAction(ISD::SELECT, MVT::f32, Custom);
339     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
340     for (auto Op : FPOpToExpand)
341       setOperationAction(Op, MVT::f32, Expand);
342     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
343     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
344   }
345 
346   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
347     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
348 
349   if (Subtarget.hasStdExtD()) {
350     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
351     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
352     for (auto CC : FPCCToExpand)
353       setCondCodeAction(CC, MVT::f64, Expand);
354     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
355     setOperationAction(ISD::SELECT, MVT::f64, Custom);
356     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
357     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
358     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
359     for (auto Op : FPOpToExpand)
360       setOperationAction(Op, MVT::f64, Expand);
361     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
362     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
363   }
364 
365   if (Subtarget.is64Bit()) {
366     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
367     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
368     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
369     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
370   }
371 
372   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
373   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
374   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
375   setOperationAction(ISD::JumpTable, XLenVT, Custom);
376 
377   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
378 
379   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
380   // Unfortunately this can't be determined just from the ISA naming string.
381   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
382                      Subtarget.is64Bit() ? Legal : Custom);
383 
384   setOperationAction(ISD::TRAP, MVT::Other, Legal);
385   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
386   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
387 
388   if (Subtarget.hasStdExtA()) {
389     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
390     setMinCmpXchgSizeInBits(32);
391   } else {
392     setMaxAtomicSizeInBitsSupported(0);
393   }
394 
395   setBooleanContents(ZeroOrOneBooleanContent);
396 
397   if (Subtarget.hasStdExtV()) {
398     setBooleanVectorContents(ZeroOrOneBooleanContent);
399 
400     setOperationAction(ISD::VSCALE, XLenVT, Custom);
401 
402     // RVV intrinsics may have illegal operands.
403     // We also need to custom legalize vmv.x.s.
404     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
405     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
406     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
407     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
408     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
409     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
410     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
411     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
412 
413     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
414 
415     if (!Subtarget.is64Bit()) {
416       // We must custom-lower certain vXi64 operations on RV32 due to the vector
417       // element type being illegal.
418       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
419       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
420 
421       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
422       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
423       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
424       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
425       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
426       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
427       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
428       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
429     }
430 
431     for (MVT VT : BoolVecVTs) {
432       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
433 
434       // Mask VTs are custom-expanded into a series of standard nodes
435       setOperationAction(ISD::TRUNCATE, VT, Custom);
436       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
437       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
438 
439       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
440     }
441 
442     for (MVT VT : IntVecVTs) {
443       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
444       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
445 
446       setOperationAction(ISD::SMIN, VT, Legal);
447       setOperationAction(ISD::SMAX, VT, Legal);
448       setOperationAction(ISD::UMIN, VT, Legal);
449       setOperationAction(ISD::UMAX, VT, Legal);
450 
451       setOperationAction(ISD::ROTL, VT, Expand);
452       setOperationAction(ISD::ROTR, VT, Expand);
453 
454       // Custom-lower extensions and truncations from/to mask types.
455       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
456       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
457       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
458 
459       // RVV has native int->float & float->int conversions where the
460       // element type sizes are within one power-of-two of each other. Any
461       // wider distances between type sizes have to be lowered as sequences
462       // which progressively narrow the gap in stages.
463       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
464       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
465       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
466       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
467 
468       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
469       // nodes which truncate by one power of two at a time.
470       setOperationAction(ISD::TRUNCATE, VT, Custom);
471 
472       // Custom-lower insert/extract operations to simplify patterns.
473       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
474       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
475 
476       // Custom-lower reduction operations to set up the corresponding custom
477       // nodes' operands.
478       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
479       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
480       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
481       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
482       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
483       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
484       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
485       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
486 
487       setOperationAction(ISD::MLOAD, VT, Custom);
488       setOperationAction(ISD::MSTORE, VT, Custom);
489       setOperationAction(ISD::MGATHER, VT, Custom);
490       setOperationAction(ISD::MSCATTER, VT, Custom);
491 
492       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
493       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
494       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
495 
496       setOperationAction(ISD::STEP_VECTOR, VT, Custom);
497       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
498     }
499 
500     // Expand various CCs to best match the RVV ISA, which natively supports UNE
501     // but no other unordered comparisons, and supports all ordered comparisons
502     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
503     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
504     // and we pattern-match those back to the "original", swapping operands once
505     // more. This way we catch both operations and both "vf" and "fv" forms with
506     // fewer patterns.
507     ISD::CondCode VFPCCToExpand[] = {
508         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
509         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
510         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
511     };
512 
513     // Sets common operation actions on RVV floating-point vector types.
514     const auto SetCommonVFPActions = [&](MVT VT) {
515       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
516       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
517       // sizes are within one power-of-two of each other. Therefore conversions
518       // between vXf16 and vXf64 must be lowered as sequences which convert via
519       // vXf32.
520       setOperationAction(ISD::FP_ROUND, VT, Custom);
521       setOperationAction(ISD::FP_EXTEND, VT, Custom);
522       // Custom-lower insert/extract operations to simplify patterns.
523       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
524       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
525       // Expand various condition codes (explained above).
526       for (auto CC : VFPCCToExpand)
527         setCondCodeAction(CC, VT, Expand);
528 
529       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
530       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
531       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
532 
533       setOperationAction(ISD::MLOAD, VT, Custom);
534       setOperationAction(ISD::MSTORE, VT, Custom);
535       setOperationAction(ISD::MGATHER, VT, Custom);
536       setOperationAction(ISD::MSCATTER, VT, Custom);
537 
538       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
539       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
540       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
541 
542       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
543     };
544 
545     if (Subtarget.hasStdExtZfh())
546       for (MVT VT : F16VecVTs)
547         SetCommonVFPActions(VT);
548 
549     if (Subtarget.hasStdExtF())
550       for (MVT VT : F32VecVTs)
551         SetCommonVFPActions(VT);
552 
553     if (Subtarget.hasStdExtD())
554       for (MVT VT : F64VecVTs)
555         SetCommonVFPActions(VT);
556 
557     if (Subtarget.useRVVForFixedLengthVectors()) {
558       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
559         if (!useRVVForFixedLengthVectorVT(VT))
560           continue;
561 
562         // By default everything must be expanded.
563         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
564           setOperationAction(Op, VT, Expand);
565         for (MVT OtherVT : MVT::fixedlen_vector_valuetypes())
566           setTruncStoreAction(VT, OtherVT, Expand);
567 
568         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
569         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
570         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
571 
572         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
573         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
574 
575         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
576 
577         setOperationAction(ISD::LOAD, VT, Custom);
578         setOperationAction(ISD::STORE, VT, Custom);
579 
580         setOperationAction(ISD::SETCC, VT, Custom);
581 
582         setOperationAction(ISD::TRUNCATE, VT, Custom);
583 
584         setOperationAction(ISD::BITCAST, VT, Custom);
585 
586         // Operations below are different for between masks and other vectors.
587         if (VT.getVectorElementType() == MVT::i1) {
588           setOperationAction(ISD::AND, VT, Custom);
589           setOperationAction(ISD::OR, VT, Custom);
590           setOperationAction(ISD::XOR, VT, Custom);
591           continue;
592         }
593 
594         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
595         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
596 
597         setOperationAction(ISD::MLOAD, VT, Custom);
598         setOperationAction(ISD::MSTORE, VT, Custom);
599         setOperationAction(ISD::MGATHER, VT, Custom);
600         setOperationAction(ISD::MSCATTER, VT, Custom);
601         setOperationAction(ISD::ADD, VT, Custom);
602         setOperationAction(ISD::MUL, VT, Custom);
603         setOperationAction(ISD::SUB, VT, Custom);
604         setOperationAction(ISD::AND, VT, Custom);
605         setOperationAction(ISD::OR, VT, Custom);
606         setOperationAction(ISD::XOR, VT, Custom);
607         setOperationAction(ISD::SDIV, VT, Custom);
608         setOperationAction(ISD::SREM, VT, Custom);
609         setOperationAction(ISD::UDIV, VT, Custom);
610         setOperationAction(ISD::UREM, VT, Custom);
611         setOperationAction(ISD::SHL, VT, Custom);
612         setOperationAction(ISD::SRA, VT, Custom);
613         setOperationAction(ISD::SRL, VT, Custom);
614 
615         setOperationAction(ISD::SMIN, VT, Custom);
616         setOperationAction(ISD::SMAX, VT, Custom);
617         setOperationAction(ISD::UMIN, VT, Custom);
618         setOperationAction(ISD::UMAX, VT, Custom);
619         setOperationAction(ISD::ABS,  VT, Custom);
620 
621         setOperationAction(ISD::MULHS, VT, Custom);
622         setOperationAction(ISD::MULHU, VT, Custom);
623 
624         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
625         setOperationAction(ISD::UINT_TO_FP, VT, Custom);
626         setOperationAction(ISD::FP_TO_SINT, VT, Custom);
627         setOperationAction(ISD::FP_TO_UINT, VT, Custom);
628 
629         setOperationAction(ISD::VSELECT, VT, Custom);
630 
631         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
632         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
633         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
634 
635         // Custom-lower reduction operations to set up the corresponding custom
636         // nodes' operands.
637         setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
638         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
639         setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
640         setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
641         setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
642         setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
643         setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
644         setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
645       }
646 
647       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
648         if (!useRVVForFixedLengthVectorVT(VT))
649           continue;
650 
651         // By default everything must be expanded.
652         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
653           setOperationAction(Op, VT, Expand);
654         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
655           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
656           setTruncStoreAction(VT, OtherVT, Expand);
657         }
658 
659         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
660         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
661         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
662 
663         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
664         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
665         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
666         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
667 
668         setOperationAction(ISD::LOAD, VT, Custom);
669         setOperationAction(ISD::STORE, VT, Custom);
670         setOperationAction(ISD::MLOAD, VT, Custom);
671         setOperationAction(ISD::MSTORE, VT, Custom);
672         setOperationAction(ISD::MGATHER, VT, Custom);
673         setOperationAction(ISD::MSCATTER, VT, Custom);
674         setOperationAction(ISD::FADD, VT, Custom);
675         setOperationAction(ISD::FSUB, VT, Custom);
676         setOperationAction(ISD::FMUL, VT, Custom);
677         setOperationAction(ISD::FDIV, VT, Custom);
678         setOperationAction(ISD::FNEG, VT, Custom);
679         setOperationAction(ISD::FABS, VT, Custom);
680         setOperationAction(ISD::FCOPYSIGN, VT, Custom);
681         setOperationAction(ISD::FSQRT, VT, Custom);
682         setOperationAction(ISD::FMA, VT, Custom);
683 
684         setOperationAction(ISD::FP_ROUND, VT, Custom);
685         setOperationAction(ISD::FP_EXTEND, VT, Custom);
686 
687         for (auto CC : VFPCCToExpand)
688           setCondCodeAction(CC, VT, Expand);
689 
690         setOperationAction(ISD::VSELECT, VT, Custom);
691 
692         setOperationAction(ISD::BITCAST, VT, Custom);
693 
694         setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
695         setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
696       }
697     }
698   }
699 
700   // Function alignments.
701   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
702   setMinFunctionAlignment(FunctionAlignment);
703   setPrefFunctionAlignment(FunctionAlignment);
704 
705   setMinimumJumpTableEntries(5);
706 
707   // Jumps are expensive, compared to logic
708   setJumpIsExpensive();
709 
710   // We can use any register for comparisons
711   setHasMultipleConditionRegisters();
712 
713   if (Subtarget.hasStdExtZbp()) {
714     setTargetDAGCombine(ISD::OR);
715   }
716   if (Subtarget.hasStdExtV()) {
717     setTargetDAGCombine(ISD::FCOPYSIGN);
718     setTargetDAGCombine(ISD::MGATHER);
719     setTargetDAGCombine(ISD::MSCATTER);
720   }
721 }
722 
723 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
724                                             LLVMContext &Context,
725                                             EVT VT) const {
726   if (!VT.isVector())
727     return getPointerTy(DL);
728   if (Subtarget.hasStdExtV() &&
729       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
730     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
731   return VT.changeVectorElementTypeToInteger();
732 }
733 
734 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
735                                              const CallInst &I,
736                                              MachineFunction &MF,
737                                              unsigned Intrinsic) const {
738   switch (Intrinsic) {
739   default:
740     return false;
741   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
742   case Intrinsic::riscv_masked_atomicrmw_add_i32:
743   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
744   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
745   case Intrinsic::riscv_masked_atomicrmw_max_i32:
746   case Intrinsic::riscv_masked_atomicrmw_min_i32:
747   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
748   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
749   case Intrinsic::riscv_masked_cmpxchg_i32:
750     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
751     Info.opc = ISD::INTRINSIC_W_CHAIN;
752     Info.memVT = MVT::getVT(PtrTy->getElementType());
753     Info.ptrVal = I.getArgOperand(0);
754     Info.offset = 0;
755     Info.align = Align(4);
756     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
757                  MachineMemOperand::MOVolatile;
758     return true;
759   }
760 }
761 
762 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
763                                                 const AddrMode &AM, Type *Ty,
764                                                 unsigned AS,
765                                                 Instruction *I) const {
766   // No global is ever allowed as a base.
767   if (AM.BaseGV)
768     return false;
769 
770   // Require a 12-bit signed offset.
771   if (!isInt<12>(AM.BaseOffs))
772     return false;
773 
774   switch (AM.Scale) {
775   case 0: // "r+i" or just "i", depending on HasBaseReg.
776     break;
777   case 1:
778     if (!AM.HasBaseReg) // allow "r+i".
779       break;
780     return false; // disallow "r+r" or "r+r+i".
781   default:
782     return false;
783   }
784 
785   return true;
786 }
787 
788 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
789   return isInt<12>(Imm);
790 }
791 
792 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
793   return isInt<12>(Imm);
794 }
795 
796 // On RV32, 64-bit integers are split into their high and low parts and held
797 // in two different registers, so the trunc is free since the low register can
798 // just be used.
799 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
800   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
801     return false;
802   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
803   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
804   return (SrcBits == 64 && DestBits == 32);
805 }
806 
807 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
808   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
809       !SrcVT.isInteger() || !DstVT.isInteger())
810     return false;
811   unsigned SrcBits = SrcVT.getSizeInBits();
812   unsigned DestBits = DstVT.getSizeInBits();
813   return (SrcBits == 64 && DestBits == 32);
814 }
815 
816 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
817   // Zexts are free if they can be combined with a load.
818   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
819     EVT MemVT = LD->getMemoryVT();
820     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
821          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
822         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
823          LD->getExtensionType() == ISD::ZEXTLOAD))
824       return true;
825   }
826 
827   return TargetLowering::isZExtFree(Val, VT2);
828 }
829 
830 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
831   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
832 }
833 
834 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
835   return Subtarget.hasStdExtZbb();
836 }
837 
838 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
839   return Subtarget.hasStdExtZbb();
840 }
841 
842 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
843                                        bool ForCodeSize) const {
844   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
845     return false;
846   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
847     return false;
848   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
849     return false;
850   if (Imm.isNegZero())
851     return false;
852   return Imm.isZero();
853 }
854 
855 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
856   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
857          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
858          (VT == MVT::f64 && Subtarget.hasStdExtD());
859 }
860 
861 // Changes the condition code and swaps operands if necessary, so the SetCC
862 // operation matches one of the comparisons supported directly by branches
863 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
864 // with 1/-1.
865 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
866                                     ISD::CondCode &CC, SelectionDAG &DAG) {
867   // Convert X > -1 to X >= 0.
868   if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
869     RHS = DAG.getConstant(0, DL, RHS.getValueType());
870     CC = ISD::SETGE;
871     return;
872   }
873   // Convert X < 1 to 0 >= X.
874   if (CC == ISD::SETLT && isOneConstant(RHS)) {
875     RHS = LHS;
876     LHS = DAG.getConstant(0, DL, RHS.getValueType());
877     CC = ISD::SETGE;
878     return;
879   }
880 
881   switch (CC) {
882   default:
883     break;
884   case ISD::SETGT:
885   case ISD::SETLE:
886   case ISD::SETUGT:
887   case ISD::SETULE:
888     CC = ISD::getSetCCSwappedOperands(CC);
889     std::swap(LHS, RHS);
890     break;
891   }
892 }
893 
894 // Return the RISC-V branch opcode that matches the given DAG integer
895 // condition code. The CondCode must be one of those supported by the RISC-V
896 // ISA (see translateSetCCForBranch).
897 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
898   switch (CC) {
899   default:
900     llvm_unreachable("Unsupported CondCode");
901   case ISD::SETEQ:
902     return RISCV::BEQ;
903   case ISD::SETNE:
904     return RISCV::BNE;
905   case ISD::SETLT:
906     return RISCV::BLT;
907   case ISD::SETGE:
908     return RISCV::BGE;
909   case ISD::SETULT:
910     return RISCV::BLTU;
911   case ISD::SETUGE:
912     return RISCV::BGEU;
913   }
914 }
915 
916 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) {
917   assert(VT.isScalableVector() && "Expecting a scalable vector type");
918   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
919   if (VT.getVectorElementType() == MVT::i1)
920     KnownSize *= 8;
921 
922   switch (KnownSize) {
923   default:
924     llvm_unreachable("Invalid LMUL.");
925   case 8:
926     return RISCVVLMUL::LMUL_F8;
927   case 16:
928     return RISCVVLMUL::LMUL_F4;
929   case 32:
930     return RISCVVLMUL::LMUL_F2;
931   case 64:
932     return RISCVVLMUL::LMUL_1;
933   case 128:
934     return RISCVVLMUL::LMUL_2;
935   case 256:
936     return RISCVVLMUL::LMUL_4;
937   case 512:
938     return RISCVVLMUL::LMUL_8;
939   }
940 }
941 
942 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) {
943   switch (LMul) {
944   default:
945     llvm_unreachable("Invalid LMUL.");
946   case RISCVVLMUL::LMUL_F8:
947   case RISCVVLMUL::LMUL_F4:
948   case RISCVVLMUL::LMUL_F2:
949   case RISCVVLMUL::LMUL_1:
950     return RISCV::VRRegClassID;
951   case RISCVVLMUL::LMUL_2:
952     return RISCV::VRM2RegClassID;
953   case RISCVVLMUL::LMUL_4:
954     return RISCV::VRM4RegClassID;
955   case RISCVVLMUL::LMUL_8:
956     return RISCV::VRM8RegClassID;
957   }
958 }
959 
960 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
961   RISCVVLMUL LMUL = getLMUL(VT);
962   if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 ||
963       LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) {
964     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
965                   "Unexpected subreg numbering");
966     return RISCV::sub_vrm1_0 + Index;
967   }
968   if (LMUL == RISCVVLMUL::LMUL_2) {
969     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
970                   "Unexpected subreg numbering");
971     return RISCV::sub_vrm2_0 + Index;
972   }
973   if (LMUL == RISCVVLMUL::LMUL_4) {
974     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
975                   "Unexpected subreg numbering");
976     return RISCV::sub_vrm4_0 + Index;
977   }
978   llvm_unreachable("Invalid vector type.");
979 }
980 
981 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
982   if (VT.getVectorElementType() == MVT::i1)
983     return RISCV::VRRegClassID;
984   return getRegClassIDForLMUL(getLMUL(VT));
985 }
986 
987 // Attempt to decompose a subvector insert/extract between VecVT and
988 // SubVecVT via subregister indices. Returns the subregister index that
989 // can perform the subvector insert/extract with the given element index, as
990 // well as the index corresponding to any leftover subvectors that must be
991 // further inserted/extracted within the register class for SubVecVT.
992 std::pair<unsigned, unsigned>
993 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
994     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
995     const RISCVRegisterInfo *TRI) {
996   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
997                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
998                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
999                 "Register classes not ordered");
1000   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1001   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1002   // Try to compose a subregister index that takes us from the incoming
1003   // LMUL>1 register class down to the outgoing one. At each step we half
1004   // the LMUL:
1005   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1006   // Note that this is not guaranteed to find a subregister index, such as
1007   // when we are extracting from one VR type to another.
1008   unsigned SubRegIdx = RISCV::NoSubRegister;
1009   for (const unsigned RCID :
1010        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1011     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1012       VecVT = VecVT.getHalfNumVectorElementsVT();
1013       bool IsHi =
1014           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1015       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1016                                             getSubregIndexByMVT(VecVT, IsHi));
1017       if (IsHi)
1018         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1019     }
1020   return {SubRegIdx, InsertExtractIdx};
1021 }
1022 
1023 // Return the largest legal scalable vector type that matches VT's element type.
1024 MVT RISCVTargetLowering::getContainerForFixedLengthVector(
1025     const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) {
1026   assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) &&
1027          "Expected legal fixed length vector!");
1028 
1029   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
1030   assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
1031 
1032   MVT EltVT = VT.getVectorElementType();
1033   switch (EltVT.SimpleTy) {
1034   default:
1035     llvm_unreachable("unexpected element type for RVV container");
1036   case MVT::i1: {
1037     // Masks are calculated assuming 8-bit elements since that's when we need
1038     // the most elements.
1039     unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8;
1040     return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock);
1041   }
1042   case MVT::i8:
1043   case MVT::i16:
1044   case MVT::i32:
1045   case MVT::i64:
1046   case MVT::f16:
1047   case MVT::f32:
1048   case MVT::f64: {
1049     unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits();
1050     return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock);
1051   }
1052   }
1053 }
1054 
1055 MVT RISCVTargetLowering::getContainerForFixedLengthVector(
1056     SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) {
1057   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1058                                           Subtarget);
1059 }
1060 
1061 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1062   return getContainerForFixedLengthVector(*this, VT, getSubtarget());
1063 }
1064 
1065 // Grow V to consume an entire RVV register.
1066 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1067                                        const RISCVSubtarget &Subtarget) {
1068   assert(VT.isScalableVector() &&
1069          "Expected to convert into a scalable vector!");
1070   assert(V.getValueType().isFixedLengthVector() &&
1071          "Expected a fixed length vector operand!");
1072   SDLoc DL(V);
1073   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1074   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1075 }
1076 
1077 // Shrink V so it's just big enough to maintain a VT's worth of data.
1078 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1079                                          const RISCVSubtarget &Subtarget) {
1080   assert(VT.isFixedLengthVector() &&
1081          "Expected to convert into a fixed length vector!");
1082   assert(V.getValueType().isScalableVector() &&
1083          "Expected a scalable vector operand!");
1084   SDLoc DL(V);
1085   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1086   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1087 }
1088 
1089 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1090 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1091 // the vector type that it is contained in.
1092 static std::pair<SDValue, SDValue>
1093 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1094                 const RISCVSubtarget &Subtarget) {
1095   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1096   MVT XLenVT = Subtarget.getXLenVT();
1097   SDValue VL = VecVT.isFixedLengthVector()
1098                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1099                    : DAG.getRegister(RISCV::X0, XLenVT);
1100   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1101   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1102   return {Mask, VL};
1103 }
1104 
1105 // As above but assuming the given type is a scalable vector type.
1106 static std::pair<SDValue, SDValue>
1107 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1108                         const RISCVSubtarget &Subtarget) {
1109   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1110   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1111 }
1112 
1113 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1114 // of either is (currently) supported. This can get us into an infinite loop
1115 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1116 // as a ..., etc.
1117 // Until either (or both) of these can reliably lower any node, reporting that
1118 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1119 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1120 // which is not desirable.
1121 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1122     EVT VT, unsigned DefinedValues) const {
1123   return false;
1124 }
1125 
1126 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1127   // Only splats are currently supported.
1128   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1129     return true;
1130 
1131   return false;
1132 }
1133 
1134 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
1135                                  const RISCVSubtarget &Subtarget) {
1136   MVT VT = Op.getSimpleValueType();
1137   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1138 
1139   MVT ContainerVT =
1140       RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget);
1141 
1142   SDLoc DL(Op);
1143   SDValue Mask, VL;
1144   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1145 
1146   MVT XLenVT = Subtarget.getXLenVT();
1147   unsigned NumElts = Op.getNumOperands();
1148 
1149   if (VT.getVectorElementType() == MVT::i1) {
1150     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1151       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1152       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1153     }
1154 
1155     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1156       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1157       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1158     }
1159 
1160     // Lower constant mask BUILD_VECTORs via an integer vector type, in
1161     // scalar integer chunks whose bit-width depends on the number of mask
1162     // bits and XLEN.
1163     // First, determine the most appropriate scalar integer type to use. This
1164     // is at most XLenVT, but may be shrunk to a smaller vector element type
1165     // according to the size of the final vector - use i8 chunks rather than
1166     // XLenVT if we're producing a v8i1. This results in more consistent
1167     // codegen across RV32 and RV64.
1168     // If we have to use more than one INSERT_VECTOR_ELT then this optimization
1169     // is likely to increase code size; avoid peforming it in such a case.
1170     unsigned NumViaIntegerBits =
1171         std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1172     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
1173         (!DAG.shouldOptForSize() || NumElts <= NumViaIntegerBits)) {
1174       // Now we can create our integer vector type. Note that it may be larger
1175       // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1176       MVT IntegerViaVecVT =
1177           MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1178                            divideCeil(NumElts, NumViaIntegerBits));
1179 
1180       uint64_t Bits = 0;
1181       unsigned BitPos = 0, IntegerEltIdx = 0;
1182       SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1183 
1184       for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1185         // Once we accumulate enough bits to fill our scalar type, insert into
1186         // our vector and clear our accumulated data.
1187         if (I != 0 && I % NumViaIntegerBits == 0) {
1188           if (NumViaIntegerBits <= 32)
1189             Bits = SignExtend64(Bits, 32);
1190           SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1191           Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1192                             Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1193           Bits = 0;
1194           BitPos = 0;
1195           IntegerEltIdx++;
1196         }
1197         SDValue V = Op.getOperand(I);
1198         bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
1199         Bits |= ((uint64_t)BitValue << BitPos);
1200       }
1201 
1202       // Insert the (remaining) scalar value into position in our integer
1203       // vector type.
1204       if (NumViaIntegerBits <= 32)
1205         Bits = SignExtend64(Bits, 32);
1206       SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1207       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
1208                         DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1209 
1210       if (NumElts < NumViaIntegerBits) {
1211         // If we're producing a smaller vector than our minimum legal integer
1212         // type, bitcast to the equivalent (known-legal) mask type, and extract
1213         // our final mask.
1214         assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
1215         Vec = DAG.getBitcast(MVT::v8i1, Vec);
1216         Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
1217                           DAG.getConstant(0, DL, XLenVT));
1218       } else {
1219         // Else we must have produced an integer type with the same size as the
1220         // mask type; bitcast for the final result.
1221         assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
1222         Vec = DAG.getBitcast(VT, Vec);
1223       }
1224 
1225       return Vec;
1226     }
1227 
1228     return SDValue();
1229   }
1230 
1231   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1232     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1233                                         : RISCVISD::VMV_V_X_VL;
1234     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1235     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1236   }
1237 
1238   // Try and match an index sequence, which we can lower directly to the vid
1239   // instruction. An all-undef vector is matched by getSplatValue, above.
1240   if (VT.isInteger()) {
1241     bool IsVID = true;
1242     for (unsigned I = 0; I < NumElts && IsVID; I++)
1243       IsVID &= Op.getOperand(I).isUndef() ||
1244                (isa<ConstantSDNode>(Op.getOperand(I)) &&
1245                 Op.getConstantOperandVal(I) == I);
1246 
1247     if (IsVID) {
1248       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1249       return convertFromScalableVector(VT, VID, DAG, Subtarget);
1250     }
1251   }
1252 
1253   // Attempt to detect "hidden" splats, which only reveal themselves as splats
1254   // when re-interpreted as a vector with a larger element type. For example,
1255   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
1256   // could be instead splat as
1257   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
1258   // TODO: This optimization could also work on non-constant splats, but it
1259   // would require bit-manipulation instructions to construct the splat value.
1260   SmallVector<SDValue> Sequence;
1261   unsigned EltBitSize = VT.getScalarSizeInBits();
1262   const auto *BV = cast<BuildVectorSDNode>(Op);
1263   if (VT.isInteger() && EltBitSize < 64 &&
1264       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
1265       BV->getRepeatedSequence(Sequence) &&
1266       (Sequence.size() * EltBitSize) <= 64) {
1267     unsigned SeqLen = Sequence.size();
1268     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
1269     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
1270     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
1271             ViaIntVT == MVT::i64) &&
1272            "Unexpected sequence type");
1273 
1274     unsigned EltIdx = 0;
1275     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
1276     uint64_t SplatValue = 0;
1277     // Construct the amalgamated value which can be splatted as this larger
1278     // vector type.
1279     for (const auto &SeqV : Sequence) {
1280       if (!SeqV.isUndef())
1281         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
1282                        << (EltIdx * EltBitSize));
1283       EltIdx++;
1284     }
1285 
1286     // On RV64, sign-extend from 32 to 64 bits where possible in order to
1287     // achieve better constant materializion.
1288     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
1289       SplatValue = SignExtend64(SplatValue, 32);
1290 
1291     // Since we can't introduce illegal i64 types at this stage, we can only
1292     // perform an i64 splat on RV32 if it is its own sign-extended value. That
1293     // way we can use RVV instructions to splat.
1294     assert((ViaIntVT.bitsLE(XLenVT) ||
1295             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
1296            "Unexpected bitcast sequence");
1297     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
1298       SDValue ViaVL =
1299           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
1300       MVT ViaContainerVT =
1301           RISCVTargetLowering::getContainerForFixedLengthVector(DAG, ViaVecVT,
1302                                                                 Subtarget);
1303       SDValue Splat =
1304           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
1305                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
1306       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
1307       return DAG.getBitcast(VT, Splat);
1308     }
1309   }
1310 
1311   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
1312   // which constitute a large proportion of the elements. In such cases we can
1313   // splat a vector with the dominant element and make up the shortfall with
1314   // INSERT_VECTOR_ELTs.
1315   // Note that this includes vectors of 2 elements by association. The
1316   // upper-most element is the "dominant" one, allowing us to use a splat to
1317   // "insert" the upper element, and an insert of the lower element at position
1318   // 0, which improves codegen.
1319   SDValue DominantValue;
1320   unsigned MostCommonCount = 0;
1321   DenseMap<SDValue, unsigned> ValueCounts;
1322   unsigned NumUndefElts =
1323       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
1324 
1325   for (SDValue V : Op->op_values()) {
1326     if (V.isUndef())
1327       continue;
1328 
1329     ValueCounts.insert(std::make_pair(V, 0));
1330     unsigned &Count = ValueCounts[V];
1331 
1332     // Is this value dominant? In case of a tie, prefer the highest element as
1333     // it's cheaper to insert near the beginning of a vector than it is at the
1334     // end.
1335     if (++Count >= MostCommonCount) {
1336       DominantValue = V;
1337       MostCommonCount = Count;
1338     }
1339   }
1340 
1341   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
1342   unsigned NumDefElts = NumElts - NumUndefElts;
1343   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
1344 
1345   // Don't perform this optimization when optimizing for size, since
1346   // materializing elements and inserting them tends to cause code bloat.
1347   if (!DAG.shouldOptForSize() &&
1348       ((MostCommonCount > DominantValueCountThreshold) ||
1349        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
1350     // Start by splatting the most common element.
1351     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
1352 
1353     DenseSet<SDValue> Processed{DominantValue};
1354     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
1355     for (const auto &OpIdx : enumerate(Op->ops())) {
1356       const SDValue &V = OpIdx.value();
1357       if (V.isUndef() || !Processed.insert(V).second)
1358         continue;
1359       if (ValueCounts[V] == 1) {
1360         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
1361                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
1362       } else {
1363         // Blend in all instances of this value using a VSELECT, using a
1364         // mask where each bit signals whether that element is the one
1365         // we're after.
1366         SmallVector<SDValue> Ops;
1367         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
1368           return DAG.getConstant(V == V1, DL, XLenVT);
1369         });
1370         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
1371                           DAG.getBuildVector(SelMaskTy, DL, Ops),
1372                           DAG.getSplatBuildVector(VT, DL, V), Vec);
1373       }
1374     }
1375 
1376     return Vec;
1377   }
1378 
1379   return SDValue();
1380 }
1381 
1382 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
1383                                    const RISCVSubtarget &Subtarget) {
1384   SDValue V1 = Op.getOperand(0);
1385   SDValue V2 = Op.getOperand(1);
1386   SDLoc DL(Op);
1387   MVT XLenVT = Subtarget.getXLenVT();
1388   MVT VT = Op.getSimpleValueType();
1389   unsigned NumElts = VT.getVectorNumElements();
1390   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
1391 
1392   if (SVN->isSplat()) {
1393     int Lane = SVN->getSplatIndex();
1394     if (Lane >= 0) {
1395       MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector(
1396           DAG, VT, Subtarget);
1397 
1398       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
1399       assert(Lane < (int)NumElts && "Unexpected lane!");
1400 
1401       SDValue Mask, VL;
1402       std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1403       SDValue Gather =
1404           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
1405                       DAG.getConstant(Lane, DL, XLenVT), Mask, VL);
1406       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1407     }
1408   }
1409 
1410   // Detect shuffles which can be re-expressed as vector selects.
1411   SmallVector<SDValue> MaskVals;
1412   // By default we preserve the original operand order, and select LHS as true
1413   // and RHS as false. However, since RVV vector selects may feature splats but
1414   // only on the LHS, we may choose to invert our mask and instead select
1415   // between RHS and LHS.
1416   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
1417 
1418   bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
1419     int MaskIndex = MaskIdx.value();
1420     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
1421     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
1422     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
1423   });
1424 
1425   if (IsSelect) {
1426     assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
1427     MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
1428     SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
1429     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SwapOps ? V2 : V1,
1430                        SwapOps ? V1 : V2);
1431   }
1432 
1433   return SDValue();
1434 }
1435 
1436 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
1437                                      SDLoc DL, SelectionDAG &DAG,
1438                                      const RISCVSubtarget &Subtarget) {
1439   if (VT.isScalableVector())
1440     return DAG.getFPExtendOrRound(Op, DL, VT);
1441   assert(VT.isFixedLengthVector() &&
1442          "Unexpected value type for RVV FP extend/round lowering");
1443   SDValue Mask, VL;
1444   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1445   unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
1446                         ? RISCVISD::FP_EXTEND_VL
1447                         : RISCVISD::FP_ROUND_VL;
1448   return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
1449 }
1450 
1451 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
1452                                             SelectionDAG &DAG) const {
1453   switch (Op.getOpcode()) {
1454   default:
1455     report_fatal_error("unimplemented operand");
1456   case ISD::GlobalAddress:
1457     return lowerGlobalAddress(Op, DAG);
1458   case ISD::BlockAddress:
1459     return lowerBlockAddress(Op, DAG);
1460   case ISD::ConstantPool:
1461     return lowerConstantPool(Op, DAG);
1462   case ISD::JumpTable:
1463     return lowerJumpTable(Op, DAG);
1464   case ISD::GlobalTLSAddress:
1465     return lowerGlobalTLSAddress(Op, DAG);
1466   case ISD::SELECT:
1467     return lowerSELECT(Op, DAG);
1468   case ISD::BRCOND:
1469     return lowerBRCOND(Op, DAG);
1470   case ISD::VASTART:
1471     return lowerVASTART(Op, DAG);
1472   case ISD::FRAMEADDR:
1473     return lowerFRAMEADDR(Op, DAG);
1474   case ISD::RETURNADDR:
1475     return lowerRETURNADDR(Op, DAG);
1476   case ISD::SHL_PARTS:
1477     return lowerShiftLeftParts(Op, DAG);
1478   case ISD::SRA_PARTS:
1479     return lowerShiftRightParts(Op, DAG, true);
1480   case ISD::SRL_PARTS:
1481     return lowerShiftRightParts(Op, DAG, false);
1482   case ISD::BITCAST: {
1483     SDValue Op0 = Op.getOperand(0);
1484     // We can handle fixed length vector bitcasts with a simple replacement
1485     // in isel.
1486     if (Op.getValueType().isFixedLengthVector()) {
1487       if (Op0.getValueType().isFixedLengthVector())
1488         return Op;
1489       return SDValue();
1490     }
1491     assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
1492             Subtarget.hasStdExtZfh()) &&
1493            "Unexpected custom legalisation");
1494     SDLoc DL(Op);
1495     if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
1496       if (Op0.getValueType() != MVT::i16)
1497         return SDValue();
1498       SDValue NewOp0 =
1499           DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
1500       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
1501       return FPConv;
1502     } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
1503                Subtarget.hasStdExtF()) {
1504       if (Op0.getValueType() != MVT::i32)
1505         return SDValue();
1506       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1507       SDValue FPConv =
1508           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
1509       return FPConv;
1510     }
1511     return SDValue();
1512   }
1513   case ISD::INTRINSIC_WO_CHAIN:
1514     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
1515   case ISD::INTRINSIC_W_CHAIN:
1516     return LowerINTRINSIC_W_CHAIN(Op, DAG);
1517   case ISD::BSWAP:
1518   case ISD::BITREVERSE: {
1519     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
1520     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1521     MVT VT = Op.getSimpleValueType();
1522     SDLoc DL(Op);
1523     // Start with the maximum immediate value which is the bitwidth - 1.
1524     unsigned Imm = VT.getSizeInBits() - 1;
1525     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
1526     if (Op.getOpcode() == ISD::BSWAP)
1527       Imm &= ~0x7U;
1528     return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
1529                        DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
1530   }
1531   case ISD::FSHL:
1532   case ISD::FSHR: {
1533     MVT VT = Op.getSimpleValueType();
1534     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
1535     SDLoc DL(Op);
1536     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
1537     // use log(XLen) bits. Mask the shift amount accordingly.
1538     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
1539     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
1540                                 DAG.getConstant(ShAmtWidth, DL, VT));
1541     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
1542     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
1543   }
1544   case ISD::TRUNCATE: {
1545     SDLoc DL(Op);
1546     MVT VT = Op.getSimpleValueType();
1547     // Only custom-lower vector truncates
1548     if (!VT.isVector())
1549       return Op;
1550 
1551     // Truncates to mask types are handled differently
1552     if (VT.getVectorElementType() == MVT::i1)
1553       return lowerVectorMaskTrunc(Op, DAG);
1554 
1555     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
1556     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
1557     // truncate by one power of two at a time.
1558     MVT DstEltVT = VT.getVectorElementType();
1559 
1560     SDValue Src = Op.getOperand(0);
1561     MVT SrcVT = Src.getSimpleValueType();
1562     MVT SrcEltVT = SrcVT.getVectorElementType();
1563 
1564     assert(DstEltVT.bitsLT(SrcEltVT) &&
1565            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
1566            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
1567            "Unexpected vector truncate lowering");
1568 
1569     MVT ContainerVT = SrcVT;
1570     if (SrcVT.isFixedLengthVector()) {
1571       ContainerVT = getContainerForFixedLengthVector(SrcVT);
1572       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
1573     }
1574 
1575     SDValue Result = Src;
1576     SDValue Mask, VL;
1577     std::tie(Mask, VL) =
1578         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
1579     LLVMContext &Context = *DAG.getContext();
1580     const ElementCount Count = ContainerVT.getVectorElementCount();
1581     do {
1582       SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
1583       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
1584       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
1585                            Mask, VL);
1586     } while (SrcEltVT != DstEltVT);
1587 
1588     if (SrcVT.isFixedLengthVector())
1589       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
1590 
1591     return Result;
1592   }
1593   case ISD::ANY_EXTEND:
1594   case ISD::ZERO_EXTEND:
1595     if (Op.getOperand(0).getValueType().isVector() &&
1596         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
1597       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
1598     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
1599   case ISD::SIGN_EXTEND:
1600     if (Op.getOperand(0).getValueType().isVector() &&
1601         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
1602       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
1603     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
1604   case ISD::SPLAT_VECTOR_PARTS:
1605     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
1606   case ISD::INSERT_VECTOR_ELT:
1607     return lowerINSERT_VECTOR_ELT(Op, DAG);
1608   case ISD::EXTRACT_VECTOR_ELT:
1609     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1610   case ISD::VSCALE: {
1611     MVT VT = Op.getSimpleValueType();
1612     SDLoc DL(Op);
1613     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
1614     // We define our scalable vector types for lmul=1 to use a 64 bit known
1615     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
1616     // vscale as VLENB / 8.
1617     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
1618                                  DAG.getConstant(3, DL, VT));
1619     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
1620   }
1621   case ISD::FP_EXTEND: {
1622     // RVV can only do fp_extend to types double the size as the source. We
1623     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
1624     // via f32.
1625     SDLoc DL(Op);
1626     MVT VT = Op.getSimpleValueType();
1627     SDValue Src = Op.getOperand(0);
1628     MVT SrcVT = Src.getSimpleValueType();
1629 
1630     // Prepare any fixed-length vector operands.
1631     MVT ContainerVT = VT;
1632     if (SrcVT.isFixedLengthVector()) {
1633       ContainerVT = getContainerForFixedLengthVector(VT);
1634       MVT SrcContainerVT =
1635           ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
1636       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1637     }
1638 
1639     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
1640         SrcVT.getVectorElementType() != MVT::f16) {
1641       // For scalable vectors, we only need to close the gap between
1642       // vXf16->vXf64.
1643       if (!VT.isFixedLengthVector())
1644         return Op;
1645       // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
1646       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
1647       return convertFromScalableVector(VT, Src, DAG, Subtarget);
1648     }
1649 
1650     MVT InterVT = VT.changeVectorElementType(MVT::f32);
1651     MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
1652     SDValue IntermediateExtend = getRVVFPExtendOrRound(
1653         Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
1654 
1655     SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
1656                                            DL, DAG, Subtarget);
1657     if (VT.isFixedLengthVector())
1658       return convertFromScalableVector(VT, Extend, DAG, Subtarget);
1659     return Extend;
1660   }
1661   case ISD::FP_ROUND: {
1662     // RVV can only do fp_round to types half the size as the source. We
1663     // custom-lower f64->f16 rounds via RVV's round-to-odd float
1664     // conversion instruction.
1665     SDLoc DL(Op);
1666     MVT VT = Op.getSimpleValueType();
1667     SDValue Src = Op.getOperand(0);
1668     MVT SrcVT = Src.getSimpleValueType();
1669 
1670     // Prepare any fixed-length vector operands.
1671     MVT ContainerVT = VT;
1672     if (VT.isFixedLengthVector()) {
1673       MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
1674       ContainerVT =
1675           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
1676       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1677     }
1678 
1679     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
1680         SrcVT.getVectorElementType() != MVT::f64) {
1681       // For scalable vectors, we only need to close the gap between
1682       // vXf64<->vXf16.
1683       if (!VT.isFixedLengthVector())
1684         return Op;
1685       // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
1686       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
1687       return convertFromScalableVector(VT, Src, DAG, Subtarget);
1688     }
1689 
1690     SDValue Mask, VL;
1691     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1692 
1693     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
1694     SDValue IntermediateRound =
1695         DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
1696     SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
1697                                           DL, DAG, Subtarget);
1698 
1699     if (VT.isFixedLengthVector())
1700       return convertFromScalableVector(VT, Round, DAG, Subtarget);
1701     return Round;
1702   }
1703   case ISD::FP_TO_SINT:
1704   case ISD::FP_TO_UINT:
1705   case ISD::SINT_TO_FP:
1706   case ISD::UINT_TO_FP: {
1707     // RVV can only do fp<->int conversions to types half/double the size as
1708     // the source. We custom-lower any conversions that do two hops into
1709     // sequences.
1710     MVT VT = Op.getSimpleValueType();
1711     if (!VT.isVector())
1712       return Op;
1713     SDLoc DL(Op);
1714     SDValue Src = Op.getOperand(0);
1715     MVT EltVT = VT.getVectorElementType();
1716     MVT SrcVT = Src.getSimpleValueType();
1717     MVT SrcEltVT = SrcVT.getVectorElementType();
1718     unsigned EltSize = EltVT.getSizeInBits();
1719     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
1720     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
1721            "Unexpected vector element types");
1722 
1723     bool IsInt2FP = SrcEltVT.isInteger();
1724     // Widening conversions
1725     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
1726       if (IsInt2FP) {
1727         // Do a regular integer sign/zero extension then convert to float.
1728         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
1729                                       VT.getVectorElementCount());
1730         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
1731                                  ? ISD::ZERO_EXTEND
1732                                  : ISD::SIGN_EXTEND;
1733         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
1734         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
1735       }
1736       // FP2Int
1737       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
1738       // Do one doubling fp_extend then complete the operation by converting
1739       // to int.
1740       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1741       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
1742       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
1743     }
1744 
1745     // Narrowing conversions
1746     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
1747       if (IsInt2FP) {
1748         // One narrowing int_to_fp, then an fp_round.
1749         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
1750         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1751         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
1752         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
1753       }
1754       // FP2Int
1755       // One narrowing fp_to_int, then truncate the integer. If the float isn't
1756       // representable by the integer, the result is poison.
1757       MVT IVecVT =
1758           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
1759                            VT.getVectorElementCount());
1760       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
1761       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
1762     }
1763 
1764     // Scalable vectors can exit here. Patterns will handle equally-sized
1765     // conversions halving/doubling ones.
1766     if (!VT.isFixedLengthVector())
1767       return Op;
1768 
1769     // For fixed-length vectors we lower to a custom "VL" node.
1770     unsigned RVVOpc = 0;
1771     switch (Op.getOpcode()) {
1772     default:
1773       llvm_unreachable("Impossible opcode");
1774     case ISD::FP_TO_SINT:
1775       RVVOpc = RISCVISD::FP_TO_SINT_VL;
1776       break;
1777     case ISD::FP_TO_UINT:
1778       RVVOpc = RISCVISD::FP_TO_UINT_VL;
1779       break;
1780     case ISD::SINT_TO_FP:
1781       RVVOpc = RISCVISD::SINT_TO_FP_VL;
1782       break;
1783     case ISD::UINT_TO_FP:
1784       RVVOpc = RISCVISD::UINT_TO_FP_VL;
1785       break;
1786     }
1787 
1788     MVT ContainerVT, SrcContainerVT;
1789     // Derive the reference container type from the larger vector type.
1790     if (SrcEltSize > EltSize) {
1791       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
1792       ContainerVT =
1793           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
1794     } else {
1795       ContainerVT = getContainerForFixedLengthVector(VT);
1796       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
1797     }
1798 
1799     SDValue Mask, VL;
1800     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1801 
1802     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1803     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
1804     return convertFromScalableVector(VT, Src, DAG, Subtarget);
1805   }
1806   case ISD::VECREDUCE_ADD:
1807   case ISD::VECREDUCE_UMAX:
1808   case ISD::VECREDUCE_SMAX:
1809   case ISD::VECREDUCE_UMIN:
1810   case ISD::VECREDUCE_SMIN:
1811   case ISD::VECREDUCE_AND:
1812   case ISD::VECREDUCE_OR:
1813   case ISD::VECREDUCE_XOR:
1814     return lowerVECREDUCE(Op, DAG);
1815   case ISD::VECREDUCE_FADD:
1816   case ISD::VECREDUCE_SEQ_FADD:
1817     return lowerFPVECREDUCE(Op, DAG);
1818   case ISD::INSERT_SUBVECTOR:
1819     return lowerINSERT_SUBVECTOR(Op, DAG);
1820   case ISD::EXTRACT_SUBVECTOR:
1821     return lowerEXTRACT_SUBVECTOR(Op, DAG);
1822   case ISD::STEP_VECTOR:
1823     return lowerSTEP_VECTOR(Op, DAG);
1824   case ISD::VECTOR_REVERSE:
1825     return lowerVECTOR_REVERSE(Op, DAG);
1826   case ISD::BUILD_VECTOR:
1827     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
1828   case ISD::VECTOR_SHUFFLE:
1829     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
1830   case ISD::CONCAT_VECTORS: {
1831     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
1832     // better than going through the stack, as the default expansion does.
1833     SDLoc DL(Op);
1834     MVT VT = Op.getSimpleValueType();
1835     unsigned NumOpElts =
1836         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
1837     SDValue Vec = DAG.getUNDEF(VT);
1838     for (const auto &OpIdx : enumerate(Op->ops()))
1839       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
1840                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
1841     return Vec;
1842   }
1843   case ISD::LOAD:
1844     return lowerFixedLengthVectorLoadToRVV(Op, DAG);
1845   case ISD::STORE:
1846     return lowerFixedLengthVectorStoreToRVV(Op, DAG);
1847   case ISD::MLOAD:
1848     return lowerMLOAD(Op, DAG);
1849   case ISD::MSTORE:
1850     return lowerMSTORE(Op, DAG);
1851   case ISD::SETCC:
1852     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
1853   case ISD::ADD:
1854     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
1855   case ISD::SUB:
1856     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
1857   case ISD::MUL:
1858     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
1859   case ISD::MULHS:
1860     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
1861   case ISD::MULHU:
1862     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
1863   case ISD::AND:
1864     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
1865                                               RISCVISD::AND_VL);
1866   case ISD::OR:
1867     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
1868                                               RISCVISD::OR_VL);
1869   case ISD::XOR:
1870     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
1871                                               RISCVISD::XOR_VL);
1872   case ISD::SDIV:
1873     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
1874   case ISD::SREM:
1875     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
1876   case ISD::UDIV:
1877     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
1878   case ISD::UREM:
1879     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
1880   case ISD::SHL:
1881     return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
1882   case ISD::SRA:
1883     return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
1884   case ISD::SRL:
1885     return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
1886   case ISD::FADD:
1887     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
1888   case ISD::FSUB:
1889     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
1890   case ISD::FMUL:
1891     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
1892   case ISD::FDIV:
1893     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
1894   case ISD::FNEG:
1895     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
1896   case ISD::FABS:
1897     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
1898   case ISD::FSQRT:
1899     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
1900   case ISD::FMA:
1901     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
1902   case ISD::SMIN:
1903     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
1904   case ISD::SMAX:
1905     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
1906   case ISD::UMIN:
1907     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
1908   case ISD::UMAX:
1909     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
1910   case ISD::ABS:
1911     return lowerABS(Op, DAG);
1912   case ISD::VSELECT:
1913     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
1914   case ISD::FCOPYSIGN:
1915     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
1916   case ISD::MGATHER:
1917     return lowerMGATHER(Op, DAG);
1918   case ISD::MSCATTER:
1919     return lowerMSCATTER(Op, DAG);
1920   }
1921 }
1922 
1923 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1924                              SelectionDAG &DAG, unsigned Flags) {
1925   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1926 }
1927 
1928 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1929                              SelectionDAG &DAG, unsigned Flags) {
1930   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1931                                    Flags);
1932 }
1933 
1934 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1935                              SelectionDAG &DAG, unsigned Flags) {
1936   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1937                                    N->getOffset(), Flags);
1938 }
1939 
1940 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1941                              SelectionDAG &DAG, unsigned Flags) {
1942   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1943 }
1944 
1945 template <class NodeTy>
1946 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1947                                      bool IsLocal) const {
1948   SDLoc DL(N);
1949   EVT Ty = getPointerTy(DAG.getDataLayout());
1950 
1951   if (isPositionIndependent()) {
1952     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1953     if (IsLocal)
1954       // Use PC-relative addressing to access the symbol. This generates the
1955       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
1956       // %pcrel_lo(auipc)).
1957       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1958 
1959     // Use PC-relative addressing to access the GOT for this symbol, then load
1960     // the address from the GOT. This generates the pattern (PseudoLA sym),
1961     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
1962     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
1963   }
1964 
1965   switch (getTargetMachine().getCodeModel()) {
1966   default:
1967     report_fatal_error("Unsupported code model for lowering");
1968   case CodeModel::Small: {
1969     // Generate a sequence for accessing addresses within the first 2 GiB of
1970     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
1971     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
1972     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
1973     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1974     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
1975   }
1976   case CodeModel::Medium: {
1977     // Generate a sequence for accessing addresses within any 2GiB range within
1978     // the address space. This generates the pattern (PseudoLLA sym), which
1979     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
1980     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1981     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1982   }
1983   }
1984 }
1985 
1986 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
1987                                                 SelectionDAG &DAG) const {
1988   SDLoc DL(Op);
1989   EVT Ty = Op.getValueType();
1990   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1991   int64_t Offset = N->getOffset();
1992   MVT XLenVT = Subtarget.getXLenVT();
1993 
1994   const GlobalValue *GV = N->getGlobal();
1995   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1996   SDValue Addr = getAddr(N, DAG, IsLocal);
1997 
1998   // In order to maximise the opportunity for common subexpression elimination,
1999   // emit a separate ADD node for the global address offset instead of folding
2000   // it in the global address node. Later peephole optimisations may choose to
2001   // fold it back in when profitable.
2002   if (Offset != 0)
2003     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2004                        DAG.getConstant(Offset, DL, XLenVT));
2005   return Addr;
2006 }
2007 
2008 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
2009                                                SelectionDAG &DAG) const {
2010   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
2011 
2012   return getAddr(N, DAG);
2013 }
2014 
2015 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
2016                                                SelectionDAG &DAG) const {
2017   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
2018 
2019   return getAddr(N, DAG);
2020 }
2021 
2022 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
2023                                             SelectionDAG &DAG) const {
2024   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
2025 
2026   return getAddr(N, DAG);
2027 }
2028 
2029 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
2030                                               SelectionDAG &DAG,
2031                                               bool UseGOT) const {
2032   SDLoc DL(N);
2033   EVT Ty = getPointerTy(DAG.getDataLayout());
2034   const GlobalValue *GV = N->getGlobal();
2035   MVT XLenVT = Subtarget.getXLenVT();
2036 
2037   if (UseGOT) {
2038     // Use PC-relative addressing to access the GOT for this TLS symbol, then
2039     // load the address from the GOT and add the thread pointer. This generates
2040     // the pattern (PseudoLA_TLS_IE sym), which expands to
2041     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
2042     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2043     SDValue Load =
2044         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
2045 
2046     // Add the thread pointer.
2047     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2048     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
2049   }
2050 
2051   // Generate a sequence for accessing the address relative to the thread
2052   // pointer, with the appropriate adjustment for the thread pointer offset.
2053   // This generates the pattern
2054   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
2055   SDValue AddrHi =
2056       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
2057   SDValue AddrAdd =
2058       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
2059   SDValue AddrLo =
2060       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
2061 
2062   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2063   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2064   SDValue MNAdd = SDValue(
2065       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
2066       0);
2067   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
2068 }
2069 
2070 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
2071                                                SelectionDAG &DAG) const {
2072   SDLoc DL(N);
2073   EVT Ty = getPointerTy(DAG.getDataLayout());
2074   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
2075   const GlobalValue *GV = N->getGlobal();
2076 
2077   // Use a PC-relative addressing mode to access the global dynamic GOT address.
2078   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
2079   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
2080   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2081   SDValue Load =
2082       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
2083 
2084   // Prepare argument list to generate call.
2085   ArgListTy Args;
2086   ArgListEntry Entry;
2087   Entry.Node = Load;
2088   Entry.Ty = CallTy;
2089   Args.push_back(Entry);
2090 
2091   // Setup call to __tls_get_addr.
2092   TargetLowering::CallLoweringInfo CLI(DAG);
2093   CLI.setDebugLoc(DL)
2094       .setChain(DAG.getEntryNode())
2095       .setLibCallee(CallingConv::C, CallTy,
2096                     DAG.getExternalSymbol("__tls_get_addr", Ty),
2097                     std::move(Args));
2098 
2099   return LowerCallTo(CLI).first;
2100 }
2101 
2102 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
2103                                                    SelectionDAG &DAG) const {
2104   SDLoc DL(Op);
2105   EVT Ty = Op.getValueType();
2106   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2107   int64_t Offset = N->getOffset();
2108   MVT XLenVT = Subtarget.getXLenVT();
2109 
2110   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
2111 
2112   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
2113       CallingConv::GHC)
2114     report_fatal_error("In GHC calling convention TLS is not supported");
2115 
2116   SDValue Addr;
2117   switch (Model) {
2118   case TLSModel::LocalExec:
2119     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
2120     break;
2121   case TLSModel::InitialExec:
2122     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
2123     break;
2124   case TLSModel::LocalDynamic:
2125   case TLSModel::GeneralDynamic:
2126     Addr = getDynamicTLSAddr(N, DAG);
2127     break;
2128   }
2129 
2130   // In order to maximise the opportunity for common subexpression elimination,
2131   // emit a separate ADD node for the global address offset instead of folding
2132   // it in the global address node. Later peephole optimisations may choose to
2133   // fold it back in when profitable.
2134   if (Offset != 0)
2135     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2136                        DAG.getConstant(Offset, DL, XLenVT));
2137   return Addr;
2138 }
2139 
2140 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2141   SDValue CondV = Op.getOperand(0);
2142   SDValue TrueV = Op.getOperand(1);
2143   SDValue FalseV = Op.getOperand(2);
2144   SDLoc DL(Op);
2145   MVT XLenVT = Subtarget.getXLenVT();
2146 
2147   // If the result type is XLenVT and CondV is the output of a SETCC node
2148   // which also operated on XLenVT inputs, then merge the SETCC node into the
2149   // lowered RISCVISD::SELECT_CC to take advantage of the integer
2150   // compare+branch instructions. i.e.:
2151   // (select (setcc lhs, rhs, cc), truev, falsev)
2152   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
2153   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
2154       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
2155     SDValue LHS = CondV.getOperand(0);
2156     SDValue RHS = CondV.getOperand(1);
2157     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
2158     ISD::CondCode CCVal = CC->get();
2159 
2160     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
2161 
2162     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
2163     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
2164     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
2165   }
2166 
2167   // Otherwise:
2168   // (select condv, truev, falsev)
2169   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
2170   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
2171   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
2172 
2173   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
2174 
2175   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
2176 }
2177 
2178 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
2179   SDValue CondV = Op.getOperand(1);
2180   SDLoc DL(Op);
2181   MVT XLenVT = Subtarget.getXLenVT();
2182 
2183   if (CondV.getOpcode() == ISD::SETCC &&
2184       CondV.getOperand(0).getValueType() == XLenVT) {
2185     SDValue LHS = CondV.getOperand(0);
2186     SDValue RHS = CondV.getOperand(1);
2187     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
2188 
2189     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
2190 
2191     SDValue TargetCC = DAG.getCondCode(CCVal);
2192     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
2193                        LHS, RHS, TargetCC, Op.getOperand(2));
2194   }
2195 
2196   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
2197                      CondV, DAG.getConstant(0, DL, XLenVT),
2198                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
2199 }
2200 
2201 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2202   MachineFunction &MF = DAG.getMachineFunction();
2203   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
2204 
2205   SDLoc DL(Op);
2206   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2207                                  getPointerTy(MF.getDataLayout()));
2208 
2209   // vastart just stores the address of the VarArgsFrameIndex slot into the
2210   // memory location argument.
2211   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2212   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
2213                       MachinePointerInfo(SV));
2214 }
2215 
2216 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
2217                                             SelectionDAG &DAG) const {
2218   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
2219   MachineFunction &MF = DAG.getMachineFunction();
2220   MachineFrameInfo &MFI = MF.getFrameInfo();
2221   MFI.setFrameAddressIsTaken(true);
2222   Register FrameReg = RI.getFrameRegister(MF);
2223   int XLenInBytes = Subtarget.getXLen() / 8;
2224 
2225   EVT VT = Op.getValueType();
2226   SDLoc DL(Op);
2227   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2228   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2229   while (Depth--) {
2230     int Offset = -(XLenInBytes * 2);
2231     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2232                               DAG.getIntPtrConstant(Offset, DL));
2233     FrameAddr =
2234         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2235   }
2236   return FrameAddr;
2237 }
2238 
2239 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
2240                                              SelectionDAG &DAG) const {
2241   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
2242   MachineFunction &MF = DAG.getMachineFunction();
2243   MachineFrameInfo &MFI = MF.getFrameInfo();
2244   MFI.setReturnAddressIsTaken(true);
2245   MVT XLenVT = Subtarget.getXLenVT();
2246   int XLenInBytes = Subtarget.getXLen() / 8;
2247 
2248   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
2249     return SDValue();
2250 
2251   EVT VT = Op.getValueType();
2252   SDLoc DL(Op);
2253   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2254   if (Depth) {
2255     int Off = -XLenInBytes;
2256     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
2257     SDValue Offset = DAG.getConstant(Off, DL, VT);
2258     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
2259                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
2260                        MachinePointerInfo());
2261   }
2262 
2263   // Return the value of the return address register, marking it an implicit
2264   // live-in.
2265   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
2266   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
2267 }
2268 
2269 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
2270                                                  SelectionDAG &DAG) const {
2271   SDLoc DL(Op);
2272   SDValue Lo = Op.getOperand(0);
2273   SDValue Hi = Op.getOperand(1);
2274   SDValue Shamt = Op.getOperand(2);
2275   EVT VT = Lo.getValueType();
2276 
2277   // if Shamt-XLEN < 0: // Shamt < XLEN
2278   //   Lo = Lo << Shamt
2279   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
2280   // else:
2281   //   Lo = 0
2282   //   Hi = Lo << (Shamt-XLEN)
2283 
2284   SDValue Zero = DAG.getConstant(0, DL, VT);
2285   SDValue One = DAG.getConstant(1, DL, VT);
2286   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2287   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2288   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2289   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2290 
2291   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2292   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2293   SDValue ShiftRightLo =
2294       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
2295   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2296   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2297   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
2298 
2299   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2300 
2301   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2302   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2303 
2304   SDValue Parts[2] = {Lo, Hi};
2305   return DAG.getMergeValues(Parts, DL);
2306 }
2307 
2308 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
2309                                                   bool IsSRA) const {
2310   SDLoc DL(Op);
2311   SDValue Lo = Op.getOperand(0);
2312   SDValue Hi = Op.getOperand(1);
2313   SDValue Shamt = Op.getOperand(2);
2314   EVT VT = Lo.getValueType();
2315 
2316   // SRA expansion:
2317   //   if Shamt-XLEN < 0: // Shamt < XLEN
2318   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2319   //     Hi = Hi >>s Shamt
2320   //   else:
2321   //     Lo = Hi >>s (Shamt-XLEN);
2322   //     Hi = Hi >>s (XLEN-1)
2323   //
2324   // SRL expansion:
2325   //   if Shamt-XLEN < 0: // Shamt < XLEN
2326   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2327   //     Hi = Hi >>u Shamt
2328   //   else:
2329   //     Lo = Hi >>u (Shamt-XLEN);
2330   //     Hi = 0;
2331 
2332   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2333 
2334   SDValue Zero = DAG.getConstant(0, DL, VT);
2335   SDValue One = DAG.getConstant(1, DL, VT);
2336   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2337   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2338   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2339   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2340 
2341   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2342   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2343   SDValue ShiftLeftHi =
2344       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
2345   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2346   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2347   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
2348   SDValue HiFalse =
2349       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
2350 
2351   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2352 
2353   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2354   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2355 
2356   SDValue Parts[2] = {Lo, Hi};
2357   return DAG.getMergeValues(Parts, DL);
2358 }
2359 
2360 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
2361 // illegal (currently only vXi64 RV32).
2362 // FIXME: We could also catch non-constant sign-extended i32 values and lower
2363 // them to SPLAT_VECTOR_I64
2364 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
2365                                                      SelectionDAG &DAG) const {
2366   SDLoc DL(Op);
2367   EVT VecVT = Op.getValueType();
2368   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
2369          "Unexpected SPLAT_VECTOR_PARTS lowering");
2370 
2371   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
2372   SDValue Lo = Op.getOperand(0);
2373   SDValue Hi = Op.getOperand(1);
2374 
2375   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2376     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2377     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2378     // If Hi constant is all the same sign bit as Lo, lower this as a custom
2379     // node in order to try and match RVV vector/scalar instructions.
2380     if ((LoC >> 31) == HiC)
2381       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
2382   }
2383 
2384   // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
2385   // to accidentally sign-extend the 32-bit halves to the e64 SEW:
2386   // vmv.v.x vX, hi
2387   // vsll.vx vX, vX, /*32*/
2388   // vmv.v.x vY, lo
2389   // vsll.vx vY, vY, /*32*/
2390   // vsrl.vx vY, vY, /*32*/
2391   // vor.vv vX, vX, vY
2392   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
2393 
2394   Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
2395   Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
2396   Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
2397 
2398   if (isNullConstant(Hi))
2399     return Lo;
2400 
2401   Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
2402   Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
2403 
2404   return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
2405 }
2406 
2407 // Custom-lower extensions from mask vectors by using a vselect either with 1
2408 // for zero/any-extension or -1 for sign-extension:
2409 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
2410 // Note that any-extension is lowered identically to zero-extension.
2411 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
2412                                                 int64_t ExtTrueVal) const {
2413   SDLoc DL(Op);
2414   MVT VecVT = Op.getSimpleValueType();
2415   SDValue Src = Op.getOperand(0);
2416   // Only custom-lower extensions from mask types
2417   assert(Src.getValueType().isVector() &&
2418          Src.getValueType().getVectorElementType() == MVT::i1);
2419 
2420   MVT XLenVT = Subtarget.getXLenVT();
2421   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
2422   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
2423 
2424   if (VecVT.isScalableVector()) {
2425     // Be careful not to introduce illegal scalar types at this stage, and be
2426     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
2427     // illegal and must be expanded. Since we know that the constants are
2428     // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
2429     bool IsRV32E64 =
2430         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
2431 
2432     if (!IsRV32E64) {
2433       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
2434       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
2435     } else {
2436       SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
2437       SplatTrueVal =
2438           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
2439     }
2440 
2441     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
2442   }
2443 
2444   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
2445   MVT I1ContainerVT =
2446       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2447 
2448   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
2449 
2450   SDValue Mask, VL;
2451   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2452 
2453   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
2454   SplatTrueVal =
2455       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
2456   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
2457                                SplatTrueVal, SplatZero, VL);
2458 
2459   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
2460 }
2461 
2462 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
2463     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
2464   MVT ExtVT = Op.getSimpleValueType();
2465   // Only custom-lower extensions from fixed-length vector types.
2466   if (!ExtVT.isFixedLengthVector())
2467     return Op;
2468   MVT VT = Op.getOperand(0).getSimpleValueType();
2469   // Grab the canonical container type for the extended type. Infer the smaller
2470   // type from that to ensure the same number of vector elements, as we know
2471   // the LMUL will be sufficient to hold the smaller type.
2472   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
2473   // Get the extended container type manually to ensure the same number of
2474   // vector elements between source and dest.
2475   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
2476                                      ContainerExtVT.getVectorElementCount());
2477 
2478   SDValue Op1 =
2479       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
2480 
2481   SDLoc DL(Op);
2482   SDValue Mask, VL;
2483   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2484 
2485   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
2486 
2487   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
2488 }
2489 
2490 // Custom-lower truncations from vectors to mask vectors by using a mask and a
2491 // setcc operation:
2492 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
2493 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
2494                                                   SelectionDAG &DAG) const {
2495   SDLoc DL(Op);
2496   EVT MaskVT = Op.getValueType();
2497   // Only expect to custom-lower truncations to mask types
2498   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
2499          "Unexpected type for vector mask lowering");
2500   SDValue Src = Op.getOperand(0);
2501   MVT VecVT = Src.getSimpleValueType();
2502 
2503   // If this is a fixed vector, we need to convert it to a scalable vector.
2504   MVT ContainerVT = VecVT;
2505   if (VecVT.isFixedLengthVector()) {
2506     ContainerVT = getContainerForFixedLengthVector(VecVT);
2507     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2508   }
2509 
2510   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
2511   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2512 
2513   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
2514   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
2515 
2516   if (VecVT.isScalableVector()) {
2517     SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
2518     return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
2519   }
2520 
2521   SDValue Mask, VL;
2522   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2523 
2524   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2525   SDValue Trunc =
2526       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
2527   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
2528                       DAG.getCondCode(ISD::SETNE), Mask, VL);
2529   return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
2530 }
2531 
2532 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
2533 // first position of a vector, and that vector is slid up to the insert index.
2534 // By limiting the active vector length to index+1 and merging with the
2535 // original vector (with an undisturbed tail policy for elements >= VL), we
2536 // achieve the desired result of leaving all elements untouched except the one
2537 // at VL-1, which is replaced with the desired value.
2538 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2539                                                     SelectionDAG &DAG) const {
2540   SDLoc DL(Op);
2541   MVT VecVT = Op.getSimpleValueType();
2542   SDValue Vec = Op.getOperand(0);
2543   SDValue Val = Op.getOperand(1);
2544   SDValue Idx = Op.getOperand(2);
2545 
2546   MVT ContainerVT = VecVT;
2547   // If the operand is a fixed-length vector, convert to a scalable one.
2548   if (VecVT.isFixedLengthVector()) {
2549     ContainerVT = getContainerForFixedLengthVector(VecVT);
2550     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2551   }
2552 
2553   MVT XLenVT = Subtarget.getXLenVT();
2554 
2555   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
2556   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
2557   // Even i64-element vectors on RV32 can be lowered without scalar
2558   // legalization if the most-significant 32 bits of the value are not affected
2559   // by the sign-extension of the lower 32 bits.
2560   // TODO: We could also catch sign extensions of a 32-bit value.
2561   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
2562     const auto *CVal = cast<ConstantSDNode>(Val);
2563     if (isInt<32>(CVal->getSExtValue())) {
2564       IsLegalInsert = true;
2565       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
2566     }
2567   }
2568 
2569   SDValue Mask, VL;
2570   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2571 
2572   SDValue ValInVec;
2573 
2574   if (IsLegalInsert) {
2575     if (isNullConstant(Idx)) {
2576       Vec = DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL);
2577       if (!VecVT.isFixedLengthVector())
2578         return Vec;
2579       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
2580     }
2581     ValInVec = DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT,
2582                            DAG.getUNDEF(ContainerVT), Val, VL);
2583   } else {
2584     // On RV32, i64-element vectors must be specially handled to place the
2585     // value at element 0, by using two vslide1up instructions in sequence on
2586     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
2587     // this.
2588     SDValue One = DAG.getConstant(1, DL, XLenVT);
2589     SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
2590     SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
2591     MVT I32ContainerVT =
2592         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
2593     SDValue I32Mask =
2594         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
2595     // Limit the active VL to two.
2596     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
2597     // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
2598     // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
2599     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
2600                            InsertI64VL);
2601     // First slide in the hi value, then the lo in underneath it.
2602     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
2603                            ValHi, I32Mask, InsertI64VL);
2604     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
2605                            ValLo, I32Mask, InsertI64VL);
2606     // Bitcast back to the right container type.
2607     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
2608   }
2609 
2610   // Now that the value is in a vector, slide it into position.
2611   SDValue InsertVL =
2612       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
2613   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
2614                                 ValInVec, Idx, Mask, InsertVL);
2615   if (!VecVT.isFixedLengthVector())
2616     return Slideup;
2617   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
2618 }
2619 
2620 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
2621 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
2622 // types this is done using VMV_X_S to allow us to glean information about the
2623 // sign bits of the result.
2624 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2625                                                      SelectionDAG &DAG) const {
2626   SDLoc DL(Op);
2627   SDValue Idx = Op.getOperand(1);
2628   SDValue Vec = Op.getOperand(0);
2629   EVT EltVT = Op.getValueType();
2630   MVT VecVT = Vec.getSimpleValueType();
2631   MVT XLenVT = Subtarget.getXLenVT();
2632 
2633   if (VecVT.getVectorElementType() == MVT::i1) {
2634     // FIXME: For now we just promote to an i8 vector and extract from that,
2635     // but this is probably not optimal.
2636     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
2637     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
2638     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
2639   }
2640 
2641   // If this is a fixed vector, we need to convert it to a scalable vector.
2642   MVT ContainerVT = VecVT;
2643   if (VecVT.isFixedLengthVector()) {
2644     ContainerVT = getContainerForFixedLengthVector(VecVT);
2645     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2646   }
2647 
2648   // If the index is 0, the vector is already in the right position.
2649   if (!isNullConstant(Idx)) {
2650     // Use a VL of 1 to avoid processing more elements than we need.
2651     SDValue VL = DAG.getConstant(1, DL, XLenVT);
2652     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2653     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2654     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2655                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
2656   }
2657 
2658   if (!EltVT.isInteger()) {
2659     // Floating-point extracts are handled in TableGen.
2660     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
2661                        DAG.getConstant(0, DL, XLenVT));
2662   }
2663 
2664   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
2665   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
2666 }
2667 
2668 // Called by type legalization to handle splat of i64 on RV32.
2669 // FIXME: We can optimize this when the type has sign or zero bits in one
2670 // of the halves.
2671 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
2672                                    SDValue VL, SelectionDAG &DAG) {
2673   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VT);
2674   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2675                            DAG.getConstant(0, DL, MVT::i32));
2676   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2677                            DAG.getConstant(1, DL, MVT::i32));
2678 
2679   // vmv.v.x vX, hi
2680   // vsll.vx vX, vX, /*32*/
2681   // vmv.v.x vY, lo
2682   // vsll.vx vY, vY, /*32*/
2683   // vsrl.vx vY, vY, /*32*/
2684   // vor.vv vX, vX, vY
2685   MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
2686   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2687   Lo = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
2688   Lo = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL);
2689   Lo = DAG.getNode(RISCVISD::SRL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL);
2690 
2691   Hi = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Hi, VL);
2692   Hi = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Hi, ThirtyTwoV, Mask, VL);
2693 
2694   return DAG.getNode(RISCVISD::OR_VL, DL, VT, Lo, Hi, Mask, VL);
2695 }
2696 
2697 // Some RVV intrinsics may claim that they want an integer operand to be
2698 // promoted or expanded.
2699 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
2700                                           const RISCVSubtarget &Subtarget) {
2701   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2702           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2703          "Unexpected opcode");
2704 
2705   if (!Subtarget.hasStdExtV())
2706     return SDValue();
2707 
2708   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2709   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2710   SDLoc DL(Op);
2711 
2712   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2713       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2714   if (!II || !II->SplatOperand)
2715     return SDValue();
2716 
2717   unsigned SplatOp = II->SplatOperand + HasChain;
2718   assert(SplatOp < Op.getNumOperands());
2719 
2720   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
2721   SDValue &ScalarOp = Operands[SplatOp];
2722   MVT OpVT = ScalarOp.getSimpleValueType();
2723   MVT XLenVT = Subtarget.getXLenVT();
2724 
2725   // If this isn't a scalar, or its type is XLenVT we're done.
2726   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
2727     return SDValue();
2728 
2729   // Simplest case is that the operand needs to be promoted to XLenVT.
2730   if (OpVT.bitsLT(XLenVT)) {
2731     // If the operand is a constant, sign extend to increase our chances
2732     // of being able to use a .vi instruction. ANY_EXTEND would become a
2733     // a zero extend and the simm5 check in isel would fail.
2734     // FIXME: Should we ignore the upper bits in isel instead?
2735     unsigned ExtOpc =
2736         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2737     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
2738     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
2739   }
2740 
2741   // Use the previous operand to get the vXi64 VT. The result might be a mask
2742   // VT for compares. Using the previous operand assumes that the previous
2743   // operand will never have a smaller element size than a scalar operand and
2744   // that a widening operation never uses SEW=64.
2745   // NOTE: If this fails the below assert, we can probably just find the
2746   // element count from any operand or result and use it to construct the VT.
2747   assert(II->SplatOperand > 1 && "Unexpected splat operand!");
2748   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
2749 
2750   // The more complex case is when the scalar is larger than XLenVT.
2751   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
2752          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
2753 
2754   // If this is a sign-extended 32-bit constant, we can truncate it and rely
2755   // on the instruction to sign-extend since SEW>XLEN.
2756   if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
2757     if (isInt<32>(CVal->getSExtValue())) {
2758       ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
2759       return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
2760     }
2761   }
2762 
2763   // We need to convert the scalar to a splat vector.
2764   // FIXME: Can we implicitly truncate the scalar if it is known to
2765   // be sign extended?
2766   // VL should be the last operand.
2767   SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
2768   assert(VL.getValueType() == XLenVT);
2769   ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
2770   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
2771 }
2772 
2773 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2774                                                      SelectionDAG &DAG) const {
2775   unsigned IntNo = Op.getConstantOperandVal(0);
2776   SDLoc DL(Op);
2777   MVT XLenVT = Subtarget.getXLenVT();
2778 
2779   switch (IntNo) {
2780   default:
2781     break; // Don't custom lower most intrinsics.
2782   case Intrinsic::thread_pointer: {
2783     EVT PtrVT = getPointerTy(DAG.getDataLayout());
2784     return DAG.getRegister(RISCV::X4, PtrVT);
2785   }
2786   case Intrinsic::riscv_vmv_x_s:
2787     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
2788     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
2789                        Op.getOperand(1));
2790   case Intrinsic::riscv_vmv_v_x: {
2791     SDValue Scalar = Op.getOperand(1);
2792     if (Scalar.getValueType().bitsLE(XLenVT)) {
2793       unsigned ExtOpc =
2794           isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2795       Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2796       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), Scalar,
2797                          Op.getOperand(2));
2798     }
2799 
2800     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
2801 
2802     // If this is a sign-extended 32-bit constant, we can truncate it and rely
2803     // on the instruction to sign-extend since SEW>XLEN.
2804     if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar)) {
2805       if (isInt<32>(CVal->getSExtValue()))
2806         return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(),
2807                            DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32),
2808                            Op.getOperand(2));
2809     }
2810 
2811     // Otherwise use the more complicated splatting algorithm.
2812     return splatSplitI64WithVL(DL, Op.getSimpleValueType(), Scalar,
2813                                Op.getOperand(2), DAG);
2814   }
2815   case Intrinsic::riscv_vfmv_v_f:
2816     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
2817                        Op.getOperand(1), Op.getOperand(2));
2818   case Intrinsic::riscv_vmv_s_x: {
2819     SDValue Scalar = Op.getOperand(2);
2820 
2821     if (Scalar.getValueType().bitsLE(XLenVT)) {
2822       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
2823       return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, Op.getValueType(),
2824                          Op.getOperand(1), Scalar, Op.getOperand(3));
2825     }
2826 
2827     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
2828 
2829     // This is an i64 value that lives in two scalar registers. We have to
2830     // insert this in a convoluted way. First we build vXi64 splat containing
2831     // the/ two values that we assemble using some bit math. Next we'll use
2832     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
2833     // to merge element 0 from our splat into the source vector.
2834     // FIXME: This is probably not the best way to do this, but it is
2835     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
2836     // point.
2837     //   vmv.v.x vX, hi
2838     //   vsll.vx vX, vX, /*32*/
2839     //   vmv.v.x vY, lo
2840     //   vsll.vx vY, vY, /*32*/
2841     //   vsrl.vx vY, vY, /*32*/
2842     //   vor.vv vX, vX, vY
2843     //
2844     //   vid.v      vVid
2845     //   vmseq.vx   mMask, vVid, 0
2846     //   vmerge.vvm vDest, vSrc, vVal, mMask
2847     MVT VT = Op.getSimpleValueType();
2848     SDValue Vec = Op.getOperand(1);
2849     SDValue VL = Op.getOperand(3);
2850 
2851     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
2852     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
2853                                       DAG.getConstant(0, DL, MVT::i32), VL);
2854 
2855     MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
2856     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2857     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
2858     SDValue SelectCond =
2859         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
2860                     DAG.getCondCode(ISD::SETEQ), Mask, VL);
2861     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
2862                        Vec, VL);
2863   }
2864   }
2865 
2866   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
2867 }
2868 
2869 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
2870                                                     SelectionDAG &DAG) const {
2871   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
2872 }
2873 
2874 static MVT getLMUL1VT(MVT VT) {
2875   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
2876          "Unexpected vector MVT");
2877   return MVT::getScalableVectorVT(
2878       VT.getVectorElementType(),
2879       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
2880 }
2881 
2882 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
2883   switch (ISDOpcode) {
2884   default:
2885     llvm_unreachable("Unhandled reduction");
2886   case ISD::VECREDUCE_ADD:
2887     return RISCVISD::VECREDUCE_ADD_VL;
2888   case ISD::VECREDUCE_UMAX:
2889     return RISCVISD::VECREDUCE_UMAX_VL;
2890   case ISD::VECREDUCE_SMAX:
2891     return RISCVISD::VECREDUCE_SMAX_VL;
2892   case ISD::VECREDUCE_UMIN:
2893     return RISCVISD::VECREDUCE_UMIN_VL;
2894   case ISD::VECREDUCE_SMIN:
2895     return RISCVISD::VECREDUCE_SMIN_VL;
2896   case ISD::VECREDUCE_AND:
2897     return RISCVISD::VECREDUCE_AND_VL;
2898   case ISD::VECREDUCE_OR:
2899     return RISCVISD::VECREDUCE_OR_VL;
2900   case ISD::VECREDUCE_XOR:
2901     return RISCVISD::VECREDUCE_XOR_VL;
2902   }
2903 }
2904 
2905 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
2906                                             SelectionDAG &DAG) const {
2907   SDLoc DL(Op);
2908   SDValue Vec = Op.getOperand(0);
2909   EVT VecEVT = Vec.getValueType();
2910 
2911   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
2912 
2913   // Due to ordering in legalize types we may have a vector type that needs to
2914   // be split. Do that manually so we can get down to a legal type.
2915   while (getTypeAction(*DAG.getContext(), VecEVT) ==
2916          TargetLowering::TypeSplitVector) {
2917     SDValue Lo, Hi;
2918     std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
2919     VecEVT = Lo.getValueType();
2920     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
2921   }
2922 
2923   // TODO: The type may need to be widened rather than split. Or widened before
2924   // it can be split.
2925   if (!isTypeLegal(VecEVT))
2926     return SDValue();
2927 
2928   MVT VecVT = VecEVT.getSimpleVT();
2929   MVT VecEltVT = VecVT.getVectorElementType();
2930   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
2931 
2932   MVT ContainerVT = VecVT;
2933   if (VecVT.isFixedLengthVector()) {
2934     ContainerVT = getContainerForFixedLengthVector(VecVT);
2935     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2936   }
2937 
2938   MVT M1VT = getLMUL1VT(ContainerVT);
2939 
2940   SDValue Mask, VL;
2941   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2942 
2943   // FIXME: This is a VLMAX splat which might be too large and can prevent
2944   // vsetvli removal.
2945   SDValue NeutralElem =
2946       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
2947   SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
2948   SDValue Reduction =
2949       DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL);
2950   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
2951                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
2952   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
2953 }
2954 
2955 // Given a reduction op, this function returns the matching reduction opcode,
2956 // the vector SDValue and the scalar SDValue required to lower this to a
2957 // RISCVISD node.
2958 static std::tuple<unsigned, SDValue, SDValue>
2959 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
2960   SDLoc DL(Op);
2961   switch (Op.getOpcode()) {
2962   default:
2963     llvm_unreachable("Unhandled reduction");
2964   case ISD::VECREDUCE_FADD:
2965     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
2966                            DAG.getConstantFP(0.0, DL, EltVT));
2967   case ISD::VECREDUCE_SEQ_FADD:
2968     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
2969                            Op.getOperand(0));
2970   }
2971 }
2972 
2973 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
2974                                               SelectionDAG &DAG) const {
2975   SDLoc DL(Op);
2976   MVT VecEltVT = Op.getSimpleValueType();
2977 
2978   unsigned RVVOpcode;
2979   SDValue VectorVal, ScalarVal;
2980   std::tie(RVVOpcode, VectorVal, ScalarVal) =
2981       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
2982   MVT VecVT = VectorVal.getSimpleValueType();
2983 
2984   MVT ContainerVT = VecVT;
2985   if (VecVT.isFixedLengthVector()) {
2986     ContainerVT = getContainerForFixedLengthVector(VecVT);
2987     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
2988   }
2989 
2990   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
2991 
2992   SDValue Mask, VL;
2993   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2994 
2995   // FIXME: This is a VLMAX splat which might be too large and can prevent
2996   // vsetvli removal.
2997   SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
2998   SDValue Reduction =
2999       DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL);
3000   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
3001                      DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3002 }
3003 
3004 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
3005                                                    SelectionDAG &DAG) const {
3006   SDValue Vec = Op.getOperand(0);
3007   SDValue SubVec = Op.getOperand(1);
3008   MVT VecVT = Vec.getSimpleValueType();
3009   MVT SubVecVT = SubVec.getSimpleValueType();
3010 
3011   SDLoc DL(Op);
3012   MVT XLenVT = Subtarget.getXLenVT();
3013   unsigned OrigIdx = Op.getConstantOperandVal(2);
3014   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3015 
3016   // We don't have the ability to slide mask vectors up indexed by their i1
3017   // elements; the smallest we can do is i8. Often we are able to bitcast to
3018   // equivalent i8 vectors. Note that when inserting a fixed-length vector
3019   // into a scalable one, we might not necessarily have enough scalable
3020   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
3021   if (SubVecVT.getVectorElementType() == MVT::i1 &&
3022       (OrigIdx != 0 || !Vec.isUndef())) {
3023     if (VecVT.getVectorMinNumElements() >= 8 &&
3024         SubVecVT.getVectorMinNumElements() >= 8) {
3025       assert(OrigIdx % 8 == 0 && "Invalid index");
3026       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
3027              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
3028              "Unexpected mask vector lowering");
3029       OrigIdx /= 8;
3030       SubVecVT =
3031           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
3032                            SubVecVT.isScalableVector());
3033       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
3034                                VecVT.isScalableVector());
3035       Vec = DAG.getBitcast(VecVT, Vec);
3036       SubVec = DAG.getBitcast(SubVecVT, SubVec);
3037     } else {
3038       // We can't slide this mask vector up indexed by its i1 elements.
3039       // This poses a problem when we wish to insert a scalable vector which
3040       // can't be re-expressed as a larger type. Just choose the slow path and
3041       // extend to a larger type, then truncate back down.
3042       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
3043       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
3044       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
3045       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
3046       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
3047                         Op.getOperand(2));
3048       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
3049       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
3050     }
3051   }
3052 
3053   // If the subvector vector is a fixed-length type, we cannot use subregister
3054   // manipulation to simplify the codegen; we don't know which register of a
3055   // LMUL group contains the specific subvector as we only know the minimum
3056   // register size. Therefore we must slide the vector group up the full
3057   // amount.
3058   if (SubVecVT.isFixedLengthVector()) {
3059     if (OrigIdx == 0 && Vec.isUndef())
3060       return Op;
3061     MVT ContainerVT = VecVT;
3062     if (VecVT.isFixedLengthVector()) {
3063       ContainerVT = getContainerForFixedLengthVector(VecVT);
3064       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3065     }
3066     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3067                          DAG.getUNDEF(ContainerVT), SubVec,
3068                          DAG.getConstant(0, DL, XLenVT));
3069     SDValue Mask =
3070         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
3071     // Set the vector length to only the number of elements we care about. Note
3072     // that for slideup this includes the offset.
3073     SDValue VL =
3074         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
3075     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
3076     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
3077                                   SubVec, SlideupAmt, Mask, VL);
3078     if (VecVT.isFixedLengthVector())
3079       Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
3080     return DAG.getBitcast(Op.getValueType(), Slideup);
3081   }
3082 
3083   unsigned SubRegIdx, RemIdx;
3084   std::tie(SubRegIdx, RemIdx) =
3085       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
3086           VecVT, SubVecVT, OrigIdx, TRI);
3087 
3088   RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
3089   bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 ||
3090                          SubVecLMUL == RISCVVLMUL::LMUL_F4 ||
3091                          SubVecLMUL == RISCVVLMUL::LMUL_F8;
3092 
3093   // 1. If the Idx has been completely eliminated and this subvector's size is
3094   // a vector register or a multiple thereof, or the surrounding elements are
3095   // undef, then this is a subvector insert which naturally aligns to a vector
3096   // register. These can easily be handled using subregister manipulation.
3097   // 2. If the subvector is smaller than a vector register, then the insertion
3098   // must preserve the undisturbed elements of the register. We do this by
3099   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
3100   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
3101   // subvector within the vector register, and an INSERT_SUBVECTOR of that
3102   // LMUL=1 type back into the larger vector (resolving to another subregister
3103   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
3104   // to avoid allocating a large register group to hold our subvector.
3105   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
3106     return Op;
3107 
3108   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
3109   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
3110   // (in our case undisturbed). This means we can set up a subvector insertion
3111   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
3112   // size of the subvector.
3113   MVT InterSubVT = VecVT;
3114   SDValue AlignedExtract = Vec;
3115   unsigned AlignedIdx = OrigIdx - RemIdx;
3116   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
3117     InterSubVT = getLMUL1VT(VecVT);
3118     // Extract a subvector equal to the nearest full vector register type. This
3119     // should resolve to a EXTRACT_SUBREG instruction.
3120     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
3121                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
3122   }
3123 
3124   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
3125   // For scalable vectors this must be further multiplied by vscale.
3126   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
3127 
3128   SDValue Mask, VL;
3129   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
3130 
3131   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
3132   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
3133   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
3134   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
3135 
3136   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
3137                        DAG.getUNDEF(InterSubVT), SubVec,
3138                        DAG.getConstant(0, DL, XLenVT));
3139 
3140   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
3141                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
3142 
3143   // If required, insert this subvector back into the correct vector register.
3144   // This should resolve to an INSERT_SUBREG instruction.
3145   if (VecVT.bitsGT(InterSubVT))
3146     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
3147                           DAG.getConstant(AlignedIdx, DL, XLenVT));
3148 
3149   // We might have bitcast from a mask type: cast back to the original type if
3150   // required.
3151   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
3152 }
3153 
3154 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
3155                                                     SelectionDAG &DAG) const {
3156   SDValue Vec = Op.getOperand(0);
3157   MVT SubVecVT = Op.getSimpleValueType();
3158   MVT VecVT = Vec.getSimpleValueType();
3159 
3160   SDLoc DL(Op);
3161   MVT XLenVT = Subtarget.getXLenVT();
3162   unsigned OrigIdx = Op.getConstantOperandVal(1);
3163   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3164 
3165   // We don't have the ability to slide mask vectors down indexed by their i1
3166   // elements; the smallest we can do is i8. Often we are able to bitcast to
3167   // equivalent i8 vectors. Note that when extracting a fixed-length vector
3168   // from a scalable one, we might not necessarily have enough scalable
3169   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
3170   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
3171     if (VecVT.getVectorMinNumElements() >= 8 &&
3172         SubVecVT.getVectorMinNumElements() >= 8) {
3173       assert(OrigIdx % 8 == 0 && "Invalid index");
3174       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
3175              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
3176              "Unexpected mask vector lowering");
3177       OrigIdx /= 8;
3178       SubVecVT =
3179           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
3180                            SubVecVT.isScalableVector());
3181       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
3182                                VecVT.isScalableVector());
3183       Vec = DAG.getBitcast(VecVT, Vec);
3184     } else {
3185       // We can't slide this mask vector down, indexed by its i1 elements.
3186       // This poses a problem when we wish to extract a scalable vector which
3187       // can't be re-expressed as a larger type. Just choose the slow path and
3188       // extend to a larger type, then truncate back down.
3189       // TODO: We could probably improve this when extracting certain fixed
3190       // from fixed, where we can extract as i8 and shift the correct element
3191       // right to reach the desired subvector?
3192       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
3193       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
3194       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
3195       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
3196                         Op.getOperand(1));
3197       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
3198       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
3199     }
3200   }
3201 
3202   // If the subvector vector is a fixed-length type, we cannot use subregister
3203   // manipulation to simplify the codegen; we don't know which register of a
3204   // LMUL group contains the specific subvector as we only know the minimum
3205   // register size. Therefore we must slide the vector group down the full
3206   // amount.
3207   if (SubVecVT.isFixedLengthVector()) {
3208     // With an index of 0 this is a cast-like subvector, which can be performed
3209     // with subregister operations.
3210     if (OrigIdx == 0)
3211       return Op;
3212     MVT ContainerVT = VecVT;
3213     if (VecVT.isFixedLengthVector()) {
3214       ContainerVT = getContainerForFixedLengthVector(VecVT);
3215       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3216     }
3217     SDValue Mask =
3218         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
3219     // Set the vector length to only the number of elements we care about. This
3220     // avoids sliding down elements we're going to discard straight away.
3221     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
3222     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
3223     SDValue Slidedown =
3224         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3225                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
3226     // Now we can use a cast-like subvector extract to get the result.
3227     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
3228                             DAG.getConstant(0, DL, XLenVT));
3229     return DAG.getBitcast(Op.getValueType(), Slidedown);
3230   }
3231 
3232   unsigned SubRegIdx, RemIdx;
3233   std::tie(SubRegIdx, RemIdx) =
3234       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
3235           VecVT, SubVecVT, OrigIdx, TRI);
3236 
3237   // If the Idx has been completely eliminated then this is a subvector extract
3238   // which naturally aligns to a vector register. These can easily be handled
3239   // using subregister manipulation.
3240   if (RemIdx == 0)
3241     return Op;
3242 
3243   // Else we must shift our vector register directly to extract the subvector.
3244   // Do this using VSLIDEDOWN.
3245 
3246   // If the vector type is an LMUL-group type, extract a subvector equal to the
3247   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
3248   // instruction.
3249   MVT InterSubVT = VecVT;
3250   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
3251     InterSubVT = getLMUL1VT(VecVT);
3252     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
3253                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
3254   }
3255 
3256   // Slide this vector register down by the desired number of elements in order
3257   // to place the desired subvector starting at element 0.
3258   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
3259   // For scalable vectors this must be further multiplied by vscale.
3260   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
3261 
3262   SDValue Mask, VL;
3263   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
3264   SDValue Slidedown =
3265       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
3266                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
3267 
3268   // Now the vector is in the right position, extract our final subvector. This
3269   // should resolve to a COPY.
3270   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
3271                           DAG.getConstant(0, DL, XLenVT));
3272 
3273   // We might have bitcast from a mask type: cast back to the original type if
3274   // required.
3275   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
3276 }
3277 
3278 // Implement step_vector to the vid instruction.
3279 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
3280                                               SelectionDAG &DAG) const {
3281   SDLoc DL(Op);
3282   assert(Op.getConstantOperandAPInt(0) == 1 && "Unexpected step value");
3283   MVT VT = Op.getSimpleValueType();
3284   SDValue Mask, VL;
3285   std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
3286   return DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
3287 }
3288 
3289 // Implement vector_reverse using vrgather.vv with indices determined by
3290 // subtracting the id of each element from (VLMAX-1). This will convert
3291 // the indices like so:
3292 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
3293 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
3294 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
3295                                                  SelectionDAG &DAG) const {
3296   SDLoc DL(Op);
3297   MVT VecVT = Op.getSimpleValueType();
3298   unsigned EltSize = VecVT.getScalarSizeInBits();
3299   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3300 
3301   unsigned MaxVLMAX = 0;
3302   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
3303   if (VectorBitsMax != 0)
3304     MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
3305 
3306   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
3307   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
3308 
3309   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
3310   // to use vrgatherei16.vv.
3311   // TODO: It's also possible to use vrgatherei16.vv for other types to
3312   // decrease register width for the index calculation.
3313   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
3314     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
3315     // Reverse each half, then reassemble them in reverse order.
3316     // NOTE: It's also possible that after splitting that VLMAX no longer
3317     // requires vrgatherei16.vv.
3318     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
3319       SDValue Lo, Hi;
3320       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
3321       EVT LoVT, HiVT;
3322       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
3323       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
3324       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
3325       // Reassemble the low and high pieces reversed.
3326       // FIXME: This is a CONCAT_VECTORS.
3327       SDValue Res =
3328           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
3329                       DAG.getIntPtrConstant(0, DL));
3330       return DAG.getNode(
3331           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
3332           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
3333     }
3334 
3335     // Just promote the int type to i16 which will double the LMUL.
3336     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
3337     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
3338   }
3339 
3340   MVT XLenVT = Subtarget.getXLenVT();
3341   SDValue Mask, VL;
3342   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
3343 
3344   // Calculate VLMAX-1 for the desired SEW.
3345   unsigned MinElts = VecVT.getVectorMinNumElements();
3346   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
3347                               DAG.getConstant(MinElts, DL, XLenVT));
3348   SDValue VLMinus1 =
3349       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
3350 
3351   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
3352   bool IsRV32E64 =
3353       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
3354   SDValue SplatVL;
3355   if (!IsRV32E64)
3356     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
3357   else
3358     SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
3359 
3360   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
3361   SDValue Indices =
3362       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
3363 
3364   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
3365 }
3366 
3367 SDValue
3368 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
3369                                                      SelectionDAG &DAG) const {
3370   auto *Load = cast<LoadSDNode>(Op);
3371 
3372   SDLoc DL(Op);
3373   MVT VT = Op.getSimpleValueType();
3374   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3375 
3376   SDValue VL =
3377       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
3378 
3379   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
3380   SDValue NewLoad = DAG.getMemIntrinsicNode(
3381       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
3382       Load->getMemoryVT(), Load->getMemOperand());
3383 
3384   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
3385   return DAG.getMergeValues({Result, Load->getChain()}, DL);
3386 }
3387 
3388 SDValue
3389 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
3390                                                       SelectionDAG &DAG) const {
3391   auto *Store = cast<StoreSDNode>(Op);
3392 
3393   SDLoc DL(Op);
3394   MVT VT = Store->getValue().getSimpleValueType();
3395 
3396   // FIXME: We probably need to zero any extra bits in a byte for mask stores.
3397   // This is tricky to do.
3398 
3399   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3400 
3401   SDValue VL =
3402       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
3403 
3404   SDValue NewValue =
3405       convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget);
3406   return DAG.getMemIntrinsicNode(
3407       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
3408       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
3409       Store->getMemoryVT(), Store->getMemOperand());
3410 }
3411 
3412 SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
3413   auto *Load = cast<MaskedLoadSDNode>(Op);
3414 
3415   SDLoc DL(Op);
3416   MVT VT = Op.getSimpleValueType();
3417   MVT XLenVT = Subtarget.getXLenVT();
3418 
3419   SDValue Mask = Load->getMask();
3420   SDValue PassThru = Load->getPassThru();
3421   SDValue VL;
3422 
3423   MVT ContainerVT = VT;
3424   if (VT.isFixedLengthVector()) {
3425     ContainerVT = getContainerForFixedLengthVector(VT);
3426     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3427 
3428     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
3429     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
3430     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
3431   } else
3432     VL = DAG.getRegister(RISCV::X0, XLenVT);
3433 
3434   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
3435   SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT);
3436   SDValue Ops[] = {Load->getChain(),   IntID, PassThru,
3437                    Load->getBasePtr(), Mask,  VL};
3438   SDValue Result =
3439       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
3440                               Load->getMemoryVT(), Load->getMemOperand());
3441   SDValue Chain = Result.getValue(1);
3442 
3443   if (VT.isFixedLengthVector())
3444     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
3445 
3446   return DAG.getMergeValues({Result, Chain}, DL);
3447 }
3448 
3449 SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const {
3450   auto *Store = cast<MaskedStoreSDNode>(Op);
3451 
3452   SDLoc DL(Op);
3453   SDValue Val = Store->getValue();
3454   SDValue Mask = Store->getMask();
3455   MVT VT = Val.getSimpleValueType();
3456   MVT XLenVT = Subtarget.getXLenVT();
3457   SDValue VL;
3458 
3459   MVT ContainerVT = VT;
3460   if (VT.isFixedLengthVector()) {
3461     ContainerVT = getContainerForFixedLengthVector(VT);
3462     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3463 
3464     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
3465     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
3466     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
3467   } else
3468     VL = DAG.getRegister(RISCV::X0, XLenVT);
3469 
3470   SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT);
3471   return DAG.getMemIntrinsicNode(
3472       ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
3473       {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL},
3474       Store->getMemoryVT(), Store->getMemOperand());
3475 }
3476 
3477 SDValue
3478 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
3479                                                       SelectionDAG &DAG) const {
3480   MVT InVT = Op.getOperand(0).getSimpleValueType();
3481   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
3482 
3483   MVT VT = Op.getSimpleValueType();
3484 
3485   SDValue Op1 =
3486       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
3487   SDValue Op2 =
3488       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
3489 
3490   SDLoc DL(Op);
3491   SDValue VL =
3492       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
3493 
3494   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3495   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3496 
3497   SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
3498                             Op.getOperand(2), Mask, VL);
3499 
3500   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
3501 }
3502 
3503 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
3504     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
3505   MVT VT = Op.getSimpleValueType();
3506 
3507   if (VT.getVectorElementType() == MVT::i1)
3508     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
3509 
3510   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
3511 }
3512 
3513 // Lower vector ABS to smax(X, sub(0, X)).
3514 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
3515   SDLoc DL(Op);
3516   MVT VT = Op.getSimpleValueType();
3517   SDValue X = Op.getOperand(0);
3518 
3519   assert(VT.isFixedLengthVector() && "Unexpected type");
3520 
3521   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3522   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
3523 
3524   SDValue Mask, VL;
3525   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3526 
3527   SDValue SplatZero =
3528       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
3529                   DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3530   SDValue NegX =
3531       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
3532   SDValue Max =
3533       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
3534 
3535   return convertFromScalableVector(VT, Max, DAG, Subtarget);
3536 }
3537 
3538 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
3539     SDValue Op, SelectionDAG &DAG) const {
3540   SDLoc DL(Op);
3541   MVT VT = Op.getSimpleValueType();
3542   SDValue Mag = Op.getOperand(0);
3543   SDValue Sign = Op.getOperand(1);
3544   assert(Mag.getValueType() == Sign.getValueType() &&
3545          "Can only handle COPYSIGN with matching types.");
3546 
3547   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3548   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
3549   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
3550 
3551   SDValue Mask, VL;
3552   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3553 
3554   SDValue CopySign =
3555       DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
3556 
3557   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
3558 }
3559 
3560 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
3561     SDValue Op, SelectionDAG &DAG) const {
3562   MVT VT = Op.getSimpleValueType();
3563   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3564 
3565   MVT I1ContainerVT =
3566       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3567 
3568   SDValue CC =
3569       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
3570   SDValue Op1 =
3571       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
3572   SDValue Op2 =
3573       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
3574 
3575   SDLoc DL(Op);
3576   SDValue Mask, VL;
3577   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3578 
3579   SDValue Select =
3580       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
3581 
3582   return convertFromScalableVector(VT, Select, DAG, Subtarget);
3583 }
3584 
3585 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
3586                                                unsigned NewOpc,
3587                                                bool HasMask) const {
3588   MVT VT = Op.getSimpleValueType();
3589   assert(useRVVForFixedLengthVectorVT(VT) &&
3590          "Only expected to lower fixed length vector operation!");
3591   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3592 
3593   // Create list of operands by converting existing ones to scalable types.
3594   SmallVector<SDValue, 6> Ops;
3595   for (const SDValue &V : Op->op_values()) {
3596     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
3597 
3598     // Pass through non-vector operands.
3599     if (!V.getValueType().isVector()) {
3600       Ops.push_back(V);
3601       continue;
3602     }
3603 
3604     // "cast" fixed length vector to a scalable vector.
3605     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
3606            "Only fixed length vectors are supported!");
3607     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
3608   }
3609 
3610   SDLoc DL(Op);
3611   SDValue Mask, VL;
3612   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3613   if (HasMask)
3614     Ops.push_back(Mask);
3615   Ops.push_back(VL);
3616 
3617   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
3618   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
3619 }
3620 
3621 // Custom lower MGATHER to a legalized form for RVV. It will then be matched to
3622 // a RVV indexed load. The RVV indexed load instructions only support the
3623 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
3624 // truncated to XLEN and are treated as byte offsets. Any signed or scaled
3625 // indexing is extended to the XLEN value type and scaled accordingly.
3626 SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
3627   auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
3628   SDLoc DL(Op);
3629 
3630   SDValue Index = MGN->getIndex();
3631   SDValue Mask = MGN->getMask();
3632   SDValue PassThru = MGN->getPassThru();
3633 
3634   MVT VT = Op.getSimpleValueType();
3635   MVT IndexVT = Index.getSimpleValueType();
3636   MVT XLenVT = Subtarget.getXLenVT();
3637 
3638   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
3639          "Unexpected VTs!");
3640   assert(MGN->getBasePtr().getSimpleValueType() == XLenVT &&
3641          "Unexpected pointer type");
3642   // Targets have to explicitly opt-in for extending vector loads.
3643   assert(MGN->getExtensionType() == ISD::NON_EXTLOAD &&
3644          "Unexpected extending MGATHER");
3645 
3646   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
3647   // the selection of the masked intrinsics doesn't do this for us.
3648   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
3649 
3650   SDValue VL;
3651   MVT ContainerVT = VT;
3652   if (VT.isFixedLengthVector()) {
3653     // We need to use the larger of the result and index type to determine the
3654     // scalable type to use so we don't increase LMUL for any operand/result.
3655     if (VT.bitsGE(IndexVT)) {
3656       ContainerVT = getContainerForFixedLengthVector(VT);
3657       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
3658                                  ContainerVT.getVectorElementCount());
3659     } else {
3660       IndexVT = getContainerForFixedLengthVector(IndexVT);
3661       ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
3662                                      IndexVT.getVectorElementCount());
3663     }
3664 
3665     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
3666 
3667     if (!IsUnmasked) {
3668       MVT MaskVT =
3669           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3670       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
3671       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
3672     }
3673 
3674     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
3675   } else
3676     VL = DAG.getRegister(RISCV::X0, XLenVT);
3677 
3678   unsigned IntID =
3679       IsUnmasked ? Intrinsic::riscv_vloxei : Intrinsic::riscv_vloxei_mask;
3680   SmallVector<SDValue, 8> Ops{MGN->getChain(),
3681                               DAG.getTargetConstant(IntID, DL, XLenVT)};
3682   if (!IsUnmasked)
3683     Ops.push_back(PassThru);
3684   Ops.push_back(MGN->getBasePtr());
3685   Ops.push_back(Index);
3686   if (!IsUnmasked)
3687     Ops.push_back(Mask);
3688   Ops.push_back(VL);
3689 
3690   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
3691   SDValue Result =
3692       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
3693                               MGN->getMemoryVT(), MGN->getMemOperand());
3694   SDValue Chain = Result.getValue(1);
3695 
3696   if (VT.isFixedLengthVector())
3697     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
3698 
3699   return DAG.getMergeValues({Result, Chain}, DL);
3700 }
3701 
3702 // Custom lower MSCATTER to a legalized form for RVV. It will then be matched to
3703 // a RVV indexed store. The RVV indexed store instructions only support the
3704 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
3705 // truncated to XLEN and are treated as byte offsets. Any signed or scaled
3706 // indexing is extended to the XLEN value type and scaled accordingly.
3707 SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
3708                                            SelectionDAG &DAG) const {
3709   auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
3710   SDLoc DL(Op);
3711   SDValue Index = MSN->getIndex();
3712   SDValue Mask = MSN->getMask();
3713   SDValue Val = MSN->getValue();
3714 
3715   MVT VT = Val.getSimpleValueType();
3716   MVT IndexVT = Index.getSimpleValueType();
3717   MVT XLenVT = Subtarget.getXLenVT();
3718 
3719   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
3720          "Unexpected VTs!");
3721   assert(MSN->getBasePtr().getSimpleValueType() == XLenVT &&
3722          "Unexpected pointer type");
3723   // Targets have to explicitly opt-in for extending vector loads and
3724   // truncating vector stores.
3725   assert(!MSN->isTruncatingStore() && "Unexpected extending MSCATTER");
3726 
3727   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
3728   // the selection of the masked intrinsics doesn't do this for us.
3729   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
3730 
3731   SDValue VL;
3732   if (VT.isFixedLengthVector()) {
3733     // We need to use the larger of the value and index type to determine the
3734     // scalable type to use so we don't increase LMUL for any operand/result.
3735     if (VT.bitsGE(IndexVT)) {
3736       VT = getContainerForFixedLengthVector(VT);
3737       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
3738                                  VT.getVectorElementCount());
3739     } else {
3740       IndexVT = getContainerForFixedLengthVector(IndexVT);
3741       VT = MVT::getVectorVT(VT.getVectorElementType(),
3742                             IndexVT.getVectorElementCount());
3743     }
3744 
3745     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
3746     Val = convertToScalableVector(VT, Val, DAG, Subtarget);
3747 
3748     if (!IsUnmasked) {
3749       MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
3750       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
3751     }
3752 
3753     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
3754   } else
3755     VL = DAG.getRegister(RISCV::X0, XLenVT);
3756 
3757   unsigned IntID =
3758       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
3759   SmallVector<SDValue, 8> Ops{MSN->getChain(),
3760                               DAG.getTargetConstant(IntID, DL, XLenVT)};
3761   Ops.push_back(Val);
3762   Ops.push_back(MSN->getBasePtr());
3763   Ops.push_back(Index);
3764   if (!IsUnmasked)
3765     Ops.push_back(Mask);
3766   Ops.push_back(VL);
3767 
3768   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, MSN->getVTList(), Ops,
3769                                  MSN->getMemoryVT(), MSN->getMemOperand());
3770 }
3771 
3772 // Returns the opcode of the target-specific SDNode that implements the 32-bit
3773 // form of the given Opcode.
3774 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
3775   switch (Opcode) {
3776   default:
3777     llvm_unreachable("Unexpected opcode");
3778   case ISD::SHL:
3779     return RISCVISD::SLLW;
3780   case ISD::SRA:
3781     return RISCVISD::SRAW;
3782   case ISD::SRL:
3783     return RISCVISD::SRLW;
3784   case ISD::SDIV:
3785     return RISCVISD::DIVW;
3786   case ISD::UDIV:
3787     return RISCVISD::DIVUW;
3788   case ISD::UREM:
3789     return RISCVISD::REMUW;
3790   case ISD::ROTL:
3791     return RISCVISD::ROLW;
3792   case ISD::ROTR:
3793     return RISCVISD::RORW;
3794   case RISCVISD::GREVI:
3795     return RISCVISD::GREVIW;
3796   case RISCVISD::GORCI:
3797     return RISCVISD::GORCIW;
3798   }
3799 }
3800 
3801 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
3802 // Because i32 isn't a legal type for RV64, these operations would otherwise
3803 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
3804 // later one because the fact the operation was originally of type i32 is
3805 // lost.
3806 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
3807                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
3808   SDLoc DL(N);
3809   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
3810   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
3811   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
3812   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
3813   // ReplaceNodeResults requires we maintain the same type for the return value.
3814   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
3815 }
3816 
3817 // Converts the given 32-bit operation to a i64 operation with signed extension
3818 // semantic to reduce the signed extension instructions.
3819 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
3820   SDLoc DL(N);
3821   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3822   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
3823   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
3824   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
3825                                DAG.getValueType(MVT::i32));
3826   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
3827 }
3828 
3829 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
3830                                              SmallVectorImpl<SDValue> &Results,
3831                                              SelectionDAG &DAG) const {
3832   SDLoc DL(N);
3833   switch (N->getOpcode()) {
3834   default:
3835     llvm_unreachable("Don't know how to custom type legalize this operation!");
3836   case ISD::STRICT_FP_TO_SINT:
3837   case ISD::STRICT_FP_TO_UINT:
3838   case ISD::FP_TO_SINT:
3839   case ISD::FP_TO_UINT: {
3840     bool IsStrict = N->isStrictFPOpcode();
3841     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3842            "Unexpected custom legalisation");
3843     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
3844     // If the FP type needs to be softened, emit a library call using the 'si'
3845     // version. If we left it to default legalization we'd end up with 'di'. If
3846     // the FP type doesn't need to be softened just let generic type
3847     // legalization promote the result type.
3848     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
3849         TargetLowering::TypeSoftenFloat)
3850       return;
3851     RTLIB::Libcall LC;
3852     if (N->getOpcode() == ISD::FP_TO_SINT ||
3853         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
3854       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
3855     else
3856       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
3857     MakeLibCallOptions CallOptions;
3858     EVT OpVT = Op0.getValueType();
3859     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
3860     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
3861     SDValue Result;
3862     std::tie(Result, Chain) =
3863         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
3864     Results.push_back(Result);
3865     if (IsStrict)
3866       Results.push_back(Chain);
3867     break;
3868   }
3869   case ISD::READCYCLECOUNTER: {
3870     assert(!Subtarget.is64Bit() &&
3871            "READCYCLECOUNTER only has custom type legalization on riscv32");
3872 
3873     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
3874     SDValue RCW =
3875         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
3876 
3877     Results.push_back(
3878         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
3879     Results.push_back(RCW.getValue(2));
3880     break;
3881   }
3882   case ISD::MUL: {
3883     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
3884     unsigned XLen = Subtarget.getXLen();
3885     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
3886     if (Size > XLen) {
3887       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
3888       SDValue LHS = N->getOperand(0);
3889       SDValue RHS = N->getOperand(1);
3890       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
3891 
3892       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
3893       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
3894       // We need exactly one side to be unsigned.
3895       if (LHSIsU == RHSIsU)
3896         return;
3897 
3898       auto MakeMULPair = [&](SDValue S, SDValue U) {
3899         MVT XLenVT = Subtarget.getXLenVT();
3900         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
3901         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
3902         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
3903         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
3904         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
3905       };
3906 
3907       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
3908       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
3909 
3910       // The other operand should be signed, but still prefer MULH when
3911       // possible.
3912       if (RHSIsU && LHSIsS && !RHSIsS)
3913         Results.push_back(MakeMULPair(LHS, RHS));
3914       else if (LHSIsU && RHSIsS && !LHSIsS)
3915         Results.push_back(MakeMULPair(RHS, LHS));
3916 
3917       return;
3918     }
3919     LLVM_FALLTHROUGH;
3920   }
3921   case ISD::ADD:
3922   case ISD::SUB:
3923     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3924            "Unexpected custom legalisation");
3925     if (N->getOperand(1).getOpcode() == ISD::Constant)
3926       return;
3927     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
3928     break;
3929   case ISD::SHL:
3930   case ISD::SRA:
3931   case ISD::SRL:
3932     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3933            "Unexpected custom legalisation");
3934     if (N->getOperand(1).getOpcode() == ISD::Constant)
3935       return;
3936     Results.push_back(customLegalizeToWOp(N, DAG));
3937     break;
3938   case ISD::ROTL:
3939   case ISD::ROTR:
3940     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3941            "Unexpected custom legalisation");
3942     Results.push_back(customLegalizeToWOp(N, DAG));
3943     break;
3944   case ISD::CTTZ:
3945   case ISD::CTTZ_ZERO_UNDEF:
3946   case ISD::CTLZ:
3947   case ISD::CTLZ_ZERO_UNDEF: {
3948     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3949            "Unexpected custom legalisation");
3950 
3951     SDValue NewOp0 =
3952         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3953     bool IsCTZ =
3954         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
3955     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
3956     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
3957     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
3958     return;
3959   }
3960   case ISD::SDIV:
3961   case ISD::UDIV:
3962   case ISD::UREM: {
3963     MVT VT = N->getSimpleValueType(0);
3964     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
3965            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
3966            "Unexpected custom legalisation");
3967     if (N->getOperand(0).getOpcode() == ISD::Constant ||
3968         N->getOperand(1).getOpcode() == ISD::Constant)
3969       return;
3970 
3971     // If the input is i32, use ANY_EXTEND since the W instructions don't read
3972     // the upper 32 bits. For other types we need to sign or zero extend
3973     // based on the opcode.
3974     unsigned ExtOpc = ISD::ANY_EXTEND;
3975     if (VT != MVT::i32)
3976       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
3977                                            : ISD::ZERO_EXTEND;
3978 
3979     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
3980     break;
3981   }
3982   case ISD::UADDO:
3983   case ISD::USUBO: {
3984     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3985            "Unexpected custom legalisation");
3986     bool IsAdd = N->getOpcode() == ISD::UADDO;
3987     // Create an ADDW or SUBW.
3988     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3989     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
3990     SDValue Res =
3991         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
3992     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
3993                       DAG.getValueType(MVT::i32));
3994 
3995     // Sign extend the LHS and perform an unsigned compare with the ADDW result.
3996     // Since the inputs are sign extended from i32, this is equivalent to
3997     // comparing the lower 32 bits.
3998     LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
3999     SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
4000                                     IsAdd ? ISD::SETULT : ISD::SETUGT);
4001 
4002     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4003     Results.push_back(Overflow);
4004     return;
4005   }
4006   case ISD::UADDSAT:
4007   case ISD::USUBSAT: {
4008     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4009            "Unexpected custom legalisation");
4010     if (Subtarget.hasStdExtZbb()) {
4011       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
4012       // sign extend allows overflow of the lower 32 bits to be detected on
4013       // the promoted size.
4014       SDValue LHS =
4015           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
4016       SDValue RHS =
4017           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
4018       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
4019       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
4020       return;
4021     }
4022 
4023     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
4024     // promotion for UADDO/USUBO.
4025     Results.push_back(expandAddSubSat(N, DAG));
4026     return;
4027   }
4028   case ISD::BITCAST: {
4029     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4030              Subtarget.hasStdExtF()) ||
4031             (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
4032            "Unexpected custom legalisation");
4033     SDValue Op0 = N->getOperand(0);
4034     if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
4035       if (Op0.getValueType() != MVT::f16)
4036         return;
4037       SDValue FPConv =
4038           DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
4039       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
4040     } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4041                Subtarget.hasStdExtF()) {
4042       if (Op0.getValueType() != MVT::f32)
4043         return;
4044       SDValue FPConv =
4045           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
4046       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
4047     }
4048     break;
4049   }
4050   case RISCVISD::GREVI:
4051   case RISCVISD::GORCI: {
4052     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4053            "Unexpected custom legalisation");
4054     // This is similar to customLegalizeToWOp, except that we pass the second
4055     // operand (a TargetConstant) straight through: it is already of type
4056     // XLenVT.
4057     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
4058     SDValue NewOp0 =
4059         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4060     SDValue NewRes =
4061         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
4062     // ReplaceNodeResults requires we maintain the same type for the return
4063     // value.
4064     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
4065     break;
4066   }
4067   case RISCVISD::SHFLI: {
4068     // There is no SHFLIW instruction, but we can just promote the operation.
4069     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4070            "Unexpected custom legalisation");
4071     SDValue NewOp0 =
4072         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4073     SDValue NewRes =
4074         DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1));
4075     // ReplaceNodeResults requires we maintain the same type for the return
4076     // value.
4077     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
4078     break;
4079   }
4080   case ISD::BSWAP:
4081   case ISD::BITREVERSE: {
4082     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4083            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
4084     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4085                                  N->getOperand(0));
4086     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
4087     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
4088                                  DAG.getTargetConstant(Imm, DL,
4089                                                        Subtarget.getXLenVT()));
4090     // ReplaceNodeResults requires we maintain the same type for the return
4091     // value.
4092     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
4093     break;
4094   }
4095   case ISD::FSHL:
4096   case ISD::FSHR: {
4097     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4098            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
4099     SDValue NewOp0 =
4100         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4101     SDValue NewOp1 =
4102         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4103     SDValue NewOp2 =
4104         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
4105     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
4106     // Mask the shift amount to 5 bits.
4107     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
4108                          DAG.getConstant(0x1f, DL, MVT::i64));
4109     unsigned Opc =
4110         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
4111     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
4112     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
4113     break;
4114   }
4115   case ISD::EXTRACT_VECTOR_ELT: {
4116     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
4117     // type is illegal (currently only vXi64 RV32).
4118     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
4119     // transferred to the destination register. We issue two of these from the
4120     // upper- and lower- halves of the SEW-bit vector element, slid down to the
4121     // first element.
4122     SDValue Vec = N->getOperand(0);
4123     SDValue Idx = N->getOperand(1);
4124 
4125     // The vector type hasn't been legalized yet so we can't issue target
4126     // specific nodes if it needs legalization.
4127     // FIXME: We would manually legalize if it's important.
4128     if (!isTypeLegal(Vec.getValueType()))
4129       return;
4130 
4131     MVT VecVT = Vec.getSimpleValueType();
4132 
4133     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
4134            VecVT.getVectorElementType() == MVT::i64 &&
4135            "Unexpected EXTRACT_VECTOR_ELT legalization");
4136 
4137     // If this is a fixed vector, we need to convert it to a scalable vector.
4138     MVT ContainerVT = VecVT;
4139     if (VecVT.isFixedLengthVector()) {
4140       ContainerVT = getContainerForFixedLengthVector(VecVT);
4141       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4142     }
4143 
4144     MVT XLenVT = Subtarget.getXLenVT();
4145 
4146     // Use a VL of 1 to avoid processing more elements than we need.
4147     MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
4148     SDValue VL = DAG.getConstant(1, DL, XLenVT);
4149     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4150 
4151     // Unless the index is known to be 0, we must slide the vector down to get
4152     // the desired element into index 0.
4153     if (!isNullConstant(Idx)) {
4154       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
4155                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
4156     }
4157 
4158     // Extract the lower XLEN bits of the correct vector element.
4159     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
4160 
4161     // To extract the upper XLEN bits of the vector element, shift the first
4162     // element right by 32 bits and re-extract the lower XLEN bits.
4163     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4164                                      DAG.getConstant(32, DL, XLenVT), VL);
4165     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
4166                                  ThirtyTwoV, Mask, VL);
4167 
4168     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
4169 
4170     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
4171     break;
4172   }
4173   case ISD::INTRINSIC_WO_CHAIN: {
4174     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4175     switch (IntNo) {
4176     default:
4177       llvm_unreachable(
4178           "Don't know how to custom type legalize this intrinsic!");
4179     case Intrinsic::riscv_vmv_x_s: {
4180       EVT VT = N->getValueType(0);
4181       MVT XLenVT = Subtarget.getXLenVT();
4182       if (VT.bitsLT(XLenVT)) {
4183         // Simple case just extract using vmv.x.s and truncate.
4184         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
4185                                       Subtarget.getXLenVT(), N->getOperand(1));
4186         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
4187         return;
4188       }
4189 
4190       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
4191              "Unexpected custom legalization");
4192 
4193       // We need to do the move in two steps.
4194       SDValue Vec = N->getOperand(1);
4195       MVT VecVT = Vec.getSimpleValueType();
4196 
4197       // First extract the lower XLEN bits of the element.
4198       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
4199 
4200       // To extract the upper XLEN bits of the vector element, shift the first
4201       // element right by 32 bits and re-extract the lower XLEN bits.
4202       SDValue VL = DAG.getConstant(1, DL, XLenVT);
4203       MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
4204       SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4205       SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
4206                                        DAG.getConstant(32, DL, XLenVT), VL);
4207       SDValue LShr32 =
4208           DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
4209       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
4210 
4211       Results.push_back(
4212           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
4213       break;
4214     }
4215     }
4216     break;
4217   }
4218   case ISD::VECREDUCE_ADD:
4219   case ISD::VECREDUCE_AND:
4220   case ISD::VECREDUCE_OR:
4221   case ISD::VECREDUCE_XOR:
4222   case ISD::VECREDUCE_SMAX:
4223   case ISD::VECREDUCE_UMAX:
4224   case ISD::VECREDUCE_SMIN:
4225   case ISD::VECREDUCE_UMIN:
4226     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
4227       Results.push_back(V);
4228     break;
4229   }
4230 }
4231 
4232 // A structure to hold one of the bit-manipulation patterns below. Together, a
4233 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
4234 //   (or (and (shl x, 1), 0xAAAAAAAA),
4235 //       (and (srl x, 1), 0x55555555))
4236 struct RISCVBitmanipPat {
4237   SDValue Op;
4238   unsigned ShAmt;
4239   bool IsSHL;
4240 
4241   bool formsPairWith(const RISCVBitmanipPat &Other) const {
4242     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
4243   }
4244 };
4245 
4246 // Matches patterns of the form
4247 //   (and (shl x, C2), (C1 << C2))
4248 //   (and (srl x, C2), C1)
4249 //   (shl (and x, C1), C2)
4250 //   (srl (and x, (C1 << C2)), C2)
4251 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
4252 // The expected masks for each shift amount are specified in BitmanipMasks where
4253 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
4254 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
4255 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
4256 // XLen is 64.
4257 static Optional<RISCVBitmanipPat>
4258 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
4259   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
4260          "Unexpected number of masks");
4261   Optional<uint64_t> Mask;
4262   // Optionally consume a mask around the shift operation.
4263   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
4264     Mask = Op.getConstantOperandVal(1);
4265     Op = Op.getOperand(0);
4266   }
4267   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
4268     return None;
4269   bool IsSHL = Op.getOpcode() == ISD::SHL;
4270 
4271   if (!isa<ConstantSDNode>(Op.getOperand(1)))
4272     return None;
4273   uint64_t ShAmt = Op.getConstantOperandVal(1);
4274 
4275   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
4276   if (ShAmt >= Width && !isPowerOf2_64(ShAmt))
4277     return None;
4278   // If we don't have enough masks for 64 bit, then we must be trying to
4279   // match SHFL so we're only allowed to shift 1/4 of the width.
4280   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
4281     return None;
4282 
4283   SDValue Src = Op.getOperand(0);
4284 
4285   // The expected mask is shifted left when the AND is found around SHL
4286   // patterns.
4287   //   ((x >> 1) & 0x55555555)
4288   //   ((x << 1) & 0xAAAAAAAA)
4289   bool SHLExpMask = IsSHL;
4290 
4291   if (!Mask) {
4292     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
4293     // the mask is all ones: consume that now.
4294     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
4295       Mask = Src.getConstantOperandVal(1);
4296       Src = Src.getOperand(0);
4297       // The expected mask is now in fact shifted left for SRL, so reverse the
4298       // decision.
4299       //   ((x & 0xAAAAAAAA) >> 1)
4300       //   ((x & 0x55555555) << 1)
4301       SHLExpMask = !SHLExpMask;
4302     } else {
4303       // Use a default shifted mask of all-ones if there's no AND, truncated
4304       // down to the expected width. This simplifies the logic later on.
4305       Mask = maskTrailingOnes<uint64_t>(Width);
4306       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
4307     }
4308   }
4309 
4310   unsigned MaskIdx = Log2_32(ShAmt);
4311   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
4312 
4313   if (SHLExpMask)
4314     ExpMask <<= ShAmt;
4315 
4316   if (Mask != ExpMask)
4317     return None;
4318 
4319   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
4320 }
4321 
4322 // Matches any of the following bit-manipulation patterns:
4323 //   (and (shl x, 1), (0x55555555 << 1))
4324 //   (and (srl x, 1), 0x55555555)
4325 //   (shl (and x, 0x55555555), 1)
4326 //   (srl (and x, (0x55555555 << 1)), 1)
4327 // where the shift amount and mask may vary thus:
4328 //   [1]  = 0x55555555 / 0xAAAAAAAA
4329 //   [2]  = 0x33333333 / 0xCCCCCCCC
4330 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
4331 //   [8]  = 0x00FF00FF / 0xFF00FF00
4332 //   [16] = 0x0000FFFF / 0xFFFFFFFF
4333 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
4334 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
4335   // These are the unshifted masks which we use to match bit-manipulation
4336   // patterns. They may be shifted left in certain circumstances.
4337   static const uint64_t BitmanipMasks[] = {
4338       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
4339       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
4340 
4341   return matchRISCVBitmanipPat(Op, BitmanipMasks);
4342 }
4343 
4344 // Match the following pattern as a GREVI(W) operation
4345 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
4346 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
4347                                const RISCVSubtarget &Subtarget) {
4348   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
4349   EVT VT = Op.getValueType();
4350 
4351   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
4352     auto LHS = matchGREVIPat(Op.getOperand(0));
4353     auto RHS = matchGREVIPat(Op.getOperand(1));
4354     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
4355       SDLoc DL(Op);
4356       return DAG.getNode(
4357           RISCVISD::GREVI, DL, VT, LHS->Op,
4358           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
4359     }
4360   }
4361   return SDValue();
4362 }
4363 
4364 // Matches any the following pattern as a GORCI(W) operation
4365 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
4366 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
4367 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
4368 // Note that with the variant of 3.,
4369 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
4370 // the inner pattern will first be matched as GREVI and then the outer
4371 // pattern will be matched to GORC via the first rule above.
4372 // 4.  (or (rotl/rotr x, bitwidth/2), x)
4373 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
4374                                const RISCVSubtarget &Subtarget) {
4375   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
4376   EVT VT = Op.getValueType();
4377 
4378   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
4379     SDLoc DL(Op);
4380     SDValue Op0 = Op.getOperand(0);
4381     SDValue Op1 = Op.getOperand(1);
4382 
4383     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
4384       if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
4385           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
4386         return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
4387       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
4388       if ((Reverse.getOpcode() == ISD::ROTL ||
4389            Reverse.getOpcode() == ISD::ROTR) &&
4390           Reverse.getOperand(0) == X &&
4391           isa<ConstantSDNode>(Reverse.getOperand(1))) {
4392         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
4393         if (RotAmt == (VT.getSizeInBits() / 2))
4394           return DAG.getNode(
4395               RISCVISD::GORCI, DL, VT, X,
4396               DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
4397       }
4398       return SDValue();
4399     };
4400 
4401     // Check for either commutable permutation of (or (GREVI x, shamt), x)
4402     if (SDValue V = MatchOROfReverse(Op0, Op1))
4403       return V;
4404     if (SDValue V = MatchOROfReverse(Op1, Op0))
4405       return V;
4406 
4407     // OR is commutable so canonicalize its OR operand to the left
4408     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
4409       std::swap(Op0, Op1);
4410     if (Op0.getOpcode() != ISD::OR)
4411       return SDValue();
4412     SDValue OrOp0 = Op0.getOperand(0);
4413     SDValue OrOp1 = Op0.getOperand(1);
4414     auto LHS = matchGREVIPat(OrOp0);
4415     // OR is commutable so swap the operands and try again: x might have been
4416     // on the left
4417     if (!LHS) {
4418       std::swap(OrOp0, OrOp1);
4419       LHS = matchGREVIPat(OrOp0);
4420     }
4421     auto RHS = matchGREVIPat(Op1);
4422     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
4423       return DAG.getNode(
4424           RISCVISD::GORCI, DL, VT, LHS->Op,
4425           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
4426     }
4427   }
4428   return SDValue();
4429 }
4430 
4431 // Matches any of the following bit-manipulation patterns:
4432 //   (and (shl x, 1), (0x22222222 << 1))
4433 //   (and (srl x, 1), 0x22222222)
4434 //   (shl (and x, 0x22222222), 1)
4435 //   (srl (and x, (0x22222222 << 1)), 1)
4436 // where the shift amount and mask may vary thus:
4437 //   [1]  = 0x22222222 / 0x44444444
4438 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
4439 //   [4]  = 0x00F000F0 / 0x0F000F00
4440 //   [8]  = 0x0000FF00 / 0x00FF0000
4441 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
4442 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
4443   // These are the unshifted masks which we use to match bit-manipulation
4444   // patterns. They may be shifted left in certain circumstances.
4445   static const uint64_t BitmanipMasks[] = {
4446       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
4447       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
4448 
4449   return matchRISCVBitmanipPat(Op, BitmanipMasks);
4450 }
4451 
4452 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
4453 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
4454                                const RISCVSubtarget &Subtarget) {
4455   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
4456   EVT VT = Op.getValueType();
4457 
4458   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
4459     return SDValue();
4460 
4461   SDValue Op0 = Op.getOperand(0);
4462   SDValue Op1 = Op.getOperand(1);
4463 
4464   // Or is commutable so canonicalize the second OR to the LHS.
4465   if (Op0.getOpcode() != ISD::OR)
4466     std::swap(Op0, Op1);
4467   if (Op0.getOpcode() != ISD::OR)
4468     return SDValue();
4469 
4470   // We found an inner OR, so our operands are the operands of the inner OR
4471   // and the other operand of the outer OR.
4472   SDValue A = Op0.getOperand(0);
4473   SDValue B = Op0.getOperand(1);
4474   SDValue C = Op1;
4475 
4476   auto Match1 = matchSHFLPat(A);
4477   auto Match2 = matchSHFLPat(B);
4478 
4479   // If neither matched, we failed.
4480   if (!Match1 && !Match2)
4481     return SDValue();
4482 
4483   // We had at least one match. if one failed, try the remaining C operand.
4484   if (!Match1) {
4485     std::swap(A, C);
4486     Match1 = matchSHFLPat(A);
4487     if (!Match1)
4488       return SDValue();
4489   } else if (!Match2) {
4490     std::swap(B, C);
4491     Match2 = matchSHFLPat(B);
4492     if (!Match2)
4493       return SDValue();
4494   }
4495   assert(Match1 && Match2);
4496 
4497   // Make sure our matches pair up.
4498   if (!Match1->formsPairWith(*Match2))
4499     return SDValue();
4500 
4501   // All the remains is to make sure C is an AND with the same input, that masks
4502   // out the bits that are being shuffled.
4503   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
4504       C.getOperand(0) != Match1->Op)
4505     return SDValue();
4506 
4507   uint64_t Mask = C.getConstantOperandVal(1);
4508 
4509   static const uint64_t BitmanipMasks[] = {
4510       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
4511       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
4512   };
4513 
4514   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
4515   unsigned MaskIdx = Log2_32(Match1->ShAmt);
4516   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
4517 
4518   if (Mask != ExpMask)
4519     return SDValue();
4520 
4521   SDLoc DL(Op);
4522   return DAG.getNode(
4523       RISCVISD::SHFLI, DL, VT, Match1->Op,
4524       DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT()));
4525 }
4526 
4527 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
4528 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
4529 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
4530 // not undo itself, but they are redundant.
4531 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
4532   unsigned ShAmt1 = N->getConstantOperandVal(1);
4533   SDValue Src = N->getOperand(0);
4534 
4535   if (Src.getOpcode() != N->getOpcode())
4536     return SDValue();
4537 
4538   unsigned ShAmt2 = Src.getConstantOperandVal(1);
4539   Src = Src.getOperand(0);
4540 
4541   unsigned CombinedShAmt;
4542   if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
4543     CombinedShAmt = ShAmt1 | ShAmt2;
4544   else
4545     CombinedShAmt = ShAmt1 ^ ShAmt2;
4546 
4547   if (CombinedShAmt == 0)
4548     return Src;
4549 
4550   SDLoc DL(N);
4551   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
4552                      DAG.getTargetConstant(CombinedShAmt, DL,
4553                                            N->getOperand(1).getValueType()));
4554 }
4555 
4556 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
4557                                                DAGCombinerInfo &DCI) const {
4558   SelectionDAG &DAG = DCI.DAG;
4559 
4560   switch (N->getOpcode()) {
4561   default:
4562     break;
4563   case RISCVISD::SplitF64: {
4564     SDValue Op0 = N->getOperand(0);
4565     // If the input to SplitF64 is just BuildPairF64 then the operation is
4566     // redundant. Instead, use BuildPairF64's operands directly.
4567     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
4568       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
4569 
4570     SDLoc DL(N);
4571 
4572     // It's cheaper to materialise two 32-bit integers than to load a double
4573     // from the constant pool and transfer it to integer registers through the
4574     // stack.
4575     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
4576       APInt V = C->getValueAPF().bitcastToAPInt();
4577       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
4578       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
4579       return DCI.CombineTo(N, Lo, Hi);
4580     }
4581 
4582     // This is a target-specific version of a DAGCombine performed in
4583     // DAGCombiner::visitBITCAST. It performs the equivalent of:
4584     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
4585     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
4586     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
4587         !Op0.getNode()->hasOneUse())
4588       break;
4589     SDValue NewSplitF64 =
4590         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
4591                     Op0.getOperand(0));
4592     SDValue Lo = NewSplitF64.getValue(0);
4593     SDValue Hi = NewSplitF64.getValue(1);
4594     APInt SignBit = APInt::getSignMask(32);
4595     if (Op0.getOpcode() == ISD::FNEG) {
4596       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
4597                                   DAG.getConstant(SignBit, DL, MVT::i32));
4598       return DCI.CombineTo(N, Lo, NewHi);
4599     }
4600     assert(Op0.getOpcode() == ISD::FABS);
4601     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
4602                                 DAG.getConstant(~SignBit, DL, MVT::i32));
4603     return DCI.CombineTo(N, Lo, NewHi);
4604   }
4605   case RISCVISD::SLLW:
4606   case RISCVISD::SRAW:
4607   case RISCVISD::SRLW:
4608   case RISCVISD::ROLW:
4609   case RISCVISD::RORW: {
4610     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
4611     SDValue LHS = N->getOperand(0);
4612     SDValue RHS = N->getOperand(1);
4613     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
4614     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
4615     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
4616         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
4617       if (N->getOpcode() != ISD::DELETED_NODE)
4618         DCI.AddToWorklist(N);
4619       return SDValue(N, 0);
4620     }
4621     break;
4622   }
4623   case RISCVISD::CLZW:
4624   case RISCVISD::CTZW: {
4625     // Only the lower 32 bits of the first operand are read
4626     SDValue Op0 = N->getOperand(0);
4627     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
4628     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
4629       if (N->getOpcode() != ISD::DELETED_NODE)
4630         DCI.AddToWorklist(N);
4631       return SDValue(N, 0);
4632     }
4633     break;
4634   }
4635   case RISCVISD::FSL:
4636   case RISCVISD::FSR: {
4637     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
4638     SDValue ShAmt = N->getOperand(2);
4639     unsigned BitWidth = ShAmt.getValueSizeInBits();
4640     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
4641     APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
4642     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
4643       if (N->getOpcode() != ISD::DELETED_NODE)
4644         DCI.AddToWorklist(N);
4645       return SDValue(N, 0);
4646     }
4647     break;
4648   }
4649   case RISCVISD::FSLW:
4650   case RISCVISD::FSRW: {
4651     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
4652     // read.
4653     SDValue Op0 = N->getOperand(0);
4654     SDValue Op1 = N->getOperand(1);
4655     SDValue ShAmt = N->getOperand(2);
4656     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
4657     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
4658     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
4659         SimplifyDemandedBits(Op1, OpMask, DCI) ||
4660         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
4661       if (N->getOpcode() != ISD::DELETED_NODE)
4662         DCI.AddToWorklist(N);
4663       return SDValue(N, 0);
4664     }
4665     break;
4666   }
4667   case RISCVISD::GREVIW:
4668   case RISCVISD::GORCIW: {
4669     // Only the lower 32 bits of the first operand are read
4670     SDValue Op0 = N->getOperand(0);
4671     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
4672     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
4673       if (N->getOpcode() != ISD::DELETED_NODE)
4674         DCI.AddToWorklist(N);
4675       return SDValue(N, 0);
4676     }
4677 
4678     return combineGREVI_GORCI(N, DCI.DAG);
4679   }
4680   case RISCVISD::FMV_X_ANYEXTW_RV64: {
4681     SDLoc DL(N);
4682     SDValue Op0 = N->getOperand(0);
4683     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
4684     // conversion is unnecessary and can be replaced with an ANY_EXTEND
4685     // of the FMV_W_X_RV64 operand.
4686     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
4687       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
4688              "Unexpected value type!");
4689       return Op0.getOperand(0);
4690     }
4691 
4692     // This is a target-specific version of a DAGCombine performed in
4693     // DAGCombiner::visitBITCAST. It performs the equivalent of:
4694     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
4695     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
4696     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
4697         !Op0.getNode()->hasOneUse())
4698       break;
4699     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
4700                                  Op0.getOperand(0));
4701     APInt SignBit = APInt::getSignMask(32).sext(64);
4702     if (Op0.getOpcode() == ISD::FNEG)
4703       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
4704                          DAG.getConstant(SignBit, DL, MVT::i64));
4705 
4706     assert(Op0.getOpcode() == ISD::FABS);
4707     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
4708                        DAG.getConstant(~SignBit, DL, MVT::i64));
4709   }
4710   case RISCVISD::GREVI:
4711   case RISCVISD::GORCI:
4712     return combineGREVI_GORCI(N, DCI.DAG);
4713   case ISD::OR:
4714     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
4715       return GREV;
4716     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
4717       return GORC;
4718     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget))
4719       return SHFL;
4720     break;
4721   case RISCVISD::SELECT_CC: {
4722     // Transform
4723     SDValue LHS = N->getOperand(0);
4724     SDValue RHS = N->getOperand(1);
4725     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
4726     if (!ISD::isIntEqualitySetCC(CCVal))
4727       break;
4728 
4729     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
4730     //      (select_cc X, Y, lt, trueV, falseV)
4731     // Sometimes the setcc is introduced after select_cc has been formed.
4732     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
4733         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
4734       // If we're looking for eq 0 instead of ne 0, we need to invert the
4735       // condition.
4736       bool Invert = CCVal == ISD::SETEQ;
4737       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
4738       if (Invert)
4739         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
4740 
4741       SDLoc DL(N);
4742       RHS = LHS.getOperand(1);
4743       LHS = LHS.getOperand(0);
4744       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
4745 
4746       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
4747       return DAG.getNode(
4748           RISCVISD::SELECT_CC, DL, N->getValueType(0),
4749           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
4750     }
4751 
4752     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
4753     //      (select_cc X, Y, eq/ne, trueV, falseV)
4754     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
4755       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
4756                          {LHS.getOperand(0), LHS.getOperand(1),
4757                           N->getOperand(2), N->getOperand(3),
4758                           N->getOperand(4)});
4759     // (select_cc X, 1, setne, trueV, falseV) ->
4760     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
4761     // This can occur when legalizing some floating point comparisons.
4762     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
4763     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
4764       SDLoc DL(N);
4765       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
4766       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
4767       RHS = DAG.getConstant(0, DL, LHS.getValueType());
4768       return DAG.getNode(
4769           RISCVISD::SELECT_CC, DL, N->getValueType(0),
4770           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
4771     }
4772 
4773     break;
4774   }
4775   case RISCVISD::BR_CC: {
4776     SDValue LHS = N->getOperand(1);
4777     SDValue RHS = N->getOperand(2);
4778     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
4779     if (!ISD::isIntEqualitySetCC(CCVal))
4780       break;
4781 
4782     // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
4783     //      (br_cc X, Y, lt, dest)
4784     // Sometimes the setcc is introduced after br_cc has been formed.
4785     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
4786         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
4787       // If we're looking for eq 0 instead of ne 0, we need to invert the
4788       // condition.
4789       bool Invert = CCVal == ISD::SETEQ;
4790       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
4791       if (Invert)
4792         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
4793 
4794       SDLoc DL(N);
4795       RHS = LHS.getOperand(1);
4796       LHS = LHS.getOperand(0);
4797       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
4798 
4799       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
4800                          N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
4801                          N->getOperand(4));
4802     }
4803 
4804     // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
4805     //      (br_cc X, Y, eq/ne, trueV, falseV)
4806     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
4807       return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
4808                          N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
4809                          N->getOperand(3), N->getOperand(4));
4810 
4811     // (br_cc X, 1, setne, br_cc) ->
4812     // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
4813     // This can occur when legalizing some floating point comparisons.
4814     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
4815     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
4816       SDLoc DL(N);
4817       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
4818       SDValue TargetCC = DAG.getCondCode(CCVal);
4819       RHS = DAG.getConstant(0, DL, LHS.getValueType());
4820       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
4821                          N->getOperand(0), LHS, RHS, TargetCC,
4822                          N->getOperand(4));
4823     }
4824     break;
4825   }
4826   case ISD::FCOPYSIGN: {
4827     EVT VT = N->getValueType(0);
4828     if (!VT.isVector())
4829       break;
4830     // There is a form of VFSGNJ which injects the negated sign of its second
4831     // operand. Try and bubble any FNEG up after the extend/round to produce
4832     // this optimized pattern. Avoid modifying cases where FP_ROUND and
4833     // TRUNC=1.
4834     SDValue In2 = N->getOperand(1);
4835     // Avoid cases where the extend/round has multiple uses, as duplicating
4836     // those is typically more expensive than removing a fneg.
4837     if (!In2.hasOneUse())
4838       break;
4839     if (In2.getOpcode() != ISD::FP_EXTEND &&
4840         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
4841       break;
4842     In2 = In2.getOperand(0);
4843     if (In2.getOpcode() != ISD::FNEG)
4844       break;
4845     SDLoc DL(N);
4846     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
4847     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
4848                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
4849   }
4850   case ISD::MGATHER:
4851   case ISD::MSCATTER: {
4852     if (!DCI.isBeforeLegalize())
4853       break;
4854     MaskedGatherScatterSDNode *MGSN = cast<MaskedGatherScatterSDNode>(N);
4855     SDValue Index = MGSN->getIndex();
4856     EVT IndexVT = Index.getValueType();
4857     MVT XLenVT = Subtarget.getXLenVT();
4858     // RISCV indexed loads only support the "unsigned unscaled" addressing
4859     // mode, so anything else must be manually legalized.
4860     bool NeedsIdxLegalization = MGSN->isIndexScaled() ||
4861                                 (MGSN->isIndexSigned() &&
4862                                  IndexVT.getVectorElementType().bitsLT(XLenVT));
4863     if (!NeedsIdxLegalization)
4864       break;
4865 
4866     SDLoc DL(N);
4867 
4868     // Any index legalization should first promote to XLenVT, so we don't lose
4869     // bits when scaling. This may create an illegal index type so we let
4870     // LLVM's legalization take care of the splitting.
4871     if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
4872       IndexVT = IndexVT.changeVectorElementType(XLenVT);
4873       Index = DAG.getNode(MGSN->isIndexSigned() ? ISD::SIGN_EXTEND
4874                                                 : ISD::ZERO_EXTEND,
4875                           DL, IndexVT, Index);
4876     }
4877 
4878     unsigned Scale = N->getConstantOperandVal(5);
4879     if (MGSN->isIndexScaled() && Scale != 1) {
4880       // Manually scale the indices by the element size.
4881       // TODO: Sanitize the scale operand here?
4882       assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
4883       SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
4884       Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
4885     }
4886 
4887     ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
4888     if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) {
4889       return DAG.getMaskedGather(
4890           N->getVTList(), MGSN->getMemoryVT(), DL,
4891           {MGSN->getChain(), MGN->getPassThru(), MGSN->getMask(),
4892            MGSN->getBasePtr(), Index, MGN->getScale()},
4893           MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
4894     }
4895     const auto *MSN = cast<MaskedScatterSDNode>(N);
4896     return DAG.getMaskedScatter(
4897         N->getVTList(), MGSN->getMemoryVT(), DL,
4898         {MGSN->getChain(), MSN->getValue(), MGSN->getMask(), MGSN->getBasePtr(),
4899          Index, MGSN->getScale()},
4900         MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
4901   }
4902   }
4903 
4904   return SDValue();
4905 }
4906 
4907 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
4908     const SDNode *N, CombineLevel Level) const {
4909   // The following folds are only desirable if `(OP _, c1 << c2)` can be
4910   // materialised in fewer instructions than `(OP _, c1)`:
4911   //
4912   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
4913   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
4914   SDValue N0 = N->getOperand(0);
4915   EVT Ty = N0.getValueType();
4916   if (Ty.isScalarInteger() &&
4917       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
4918     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
4919     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
4920     if (C1 && C2) {
4921       const APInt &C1Int = C1->getAPIntValue();
4922       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
4923 
4924       // We can materialise `c1 << c2` into an add immediate, so it's "free",
4925       // and the combine should happen, to potentially allow further combines
4926       // later.
4927       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
4928           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
4929         return true;
4930 
4931       // We can materialise `c1` in an add immediate, so it's "free", and the
4932       // combine should be prevented.
4933       if (C1Int.getMinSignedBits() <= 64 &&
4934           isLegalAddImmediate(C1Int.getSExtValue()))
4935         return false;
4936 
4937       // Neither constant will fit into an immediate, so find materialisation
4938       // costs.
4939       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
4940                                               Subtarget.is64Bit());
4941       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
4942           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
4943 
4944       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
4945       // combine should be prevented.
4946       if (C1Cost < ShiftedC1Cost)
4947         return false;
4948     }
4949   }
4950   return true;
4951 }
4952 
4953 bool RISCVTargetLowering::targetShrinkDemandedConstant(
4954     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4955     TargetLoweringOpt &TLO) const {
4956   // Delay this optimization as late as possible.
4957   if (!TLO.LegalOps)
4958     return false;
4959 
4960   EVT VT = Op.getValueType();
4961   if (VT.isVector())
4962     return false;
4963 
4964   // Only handle AND for now.
4965   if (Op.getOpcode() != ISD::AND)
4966     return false;
4967 
4968   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4969   if (!C)
4970     return false;
4971 
4972   const APInt &Mask = C->getAPIntValue();
4973 
4974   // Clear all non-demanded bits initially.
4975   APInt ShrunkMask = Mask & DemandedBits;
4976 
4977   // Try to make a smaller immediate by setting undemanded bits.
4978 
4979   APInt ExpandedMask = Mask | ~DemandedBits;
4980 
4981   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
4982     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
4983   };
4984   auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
4985     if (NewMask == Mask)
4986       return true;
4987     SDLoc DL(Op);
4988     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
4989     SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
4990     return TLO.CombineTo(Op, NewOp);
4991   };
4992 
4993   // If the shrunk mask fits in sign extended 12 bits, let the target
4994   // independent code apply it.
4995   if (ShrunkMask.isSignedIntN(12))
4996     return false;
4997 
4998   // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
4999   if (VT == MVT::i64) {
5000     APInt NewMask = APInt(64, 0xffffffff);
5001     if (IsLegalMask(NewMask))
5002       return UseMask(NewMask);
5003   }
5004 
5005   // For the remaining optimizations, we need to be able to make a negative
5006   // number through a combination of mask and undemanded bits.
5007   if (!ExpandedMask.isNegative())
5008     return false;
5009 
5010   // What is the fewest number of bits we need to represent the negative number.
5011   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
5012 
5013   // Try to make a 12 bit negative immediate. If that fails try to make a 32
5014   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
5015   APInt NewMask = ShrunkMask;
5016   if (MinSignedBits <= 12)
5017     NewMask.setBitsFrom(11);
5018   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
5019     NewMask.setBitsFrom(31);
5020   else
5021     return false;
5022 
5023   // Sanity check that our new mask is a subset of the demanded mask.
5024   assert(IsLegalMask(NewMask));
5025   return UseMask(NewMask);
5026 }
5027 
5028 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
5029                                                         KnownBits &Known,
5030                                                         const APInt &DemandedElts,
5031                                                         const SelectionDAG &DAG,
5032                                                         unsigned Depth) const {
5033   unsigned BitWidth = Known.getBitWidth();
5034   unsigned Opc = Op.getOpcode();
5035   assert((Opc >= ISD::BUILTIN_OP_END ||
5036           Opc == ISD::INTRINSIC_WO_CHAIN ||
5037           Opc == ISD::INTRINSIC_W_CHAIN ||
5038           Opc == ISD::INTRINSIC_VOID) &&
5039          "Should use MaskedValueIsZero if you don't know whether Op"
5040          " is a target node!");
5041 
5042   Known.resetAll();
5043   switch (Opc) {
5044   default: break;
5045   case RISCVISD::SELECT_CC: {
5046     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
5047     // If we don't know any bits, early out.
5048     if (Known.isUnknown())
5049       break;
5050     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
5051 
5052     // Only known if known in both the LHS and RHS.
5053     Known = KnownBits::commonBits(Known, Known2);
5054     break;
5055   }
5056   case RISCVISD::REMUW: {
5057     KnownBits Known2;
5058     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
5059     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
5060     // We only care about the lower 32 bits.
5061     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
5062     // Restore the original width by sign extending.
5063     Known = Known.sext(BitWidth);
5064     break;
5065   }
5066   case RISCVISD::DIVUW: {
5067     KnownBits Known2;
5068     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
5069     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
5070     // We only care about the lower 32 bits.
5071     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
5072     // Restore the original width by sign extending.
5073     Known = Known.sext(BitWidth);
5074     break;
5075   }
5076   case RISCVISD::CTZW: {
5077     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
5078     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
5079     unsigned LowBits = Log2_32(PossibleTZ) + 1;
5080     Known.Zero.setBitsFrom(LowBits);
5081     break;
5082   }
5083   case RISCVISD::CLZW: {
5084     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
5085     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
5086     unsigned LowBits = Log2_32(PossibleLZ) + 1;
5087     Known.Zero.setBitsFrom(LowBits);
5088     break;
5089   }
5090   case RISCVISD::READ_VLENB:
5091     // We assume VLENB is at least 8 bytes.
5092     // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
5093     Known.Zero.setLowBits(3);
5094     break;
5095   }
5096 }
5097 
5098 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
5099     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
5100     unsigned Depth) const {
5101   switch (Op.getOpcode()) {
5102   default:
5103     break;
5104   case RISCVISD::SLLW:
5105   case RISCVISD::SRAW:
5106   case RISCVISD::SRLW:
5107   case RISCVISD::DIVW:
5108   case RISCVISD::DIVUW:
5109   case RISCVISD::REMUW:
5110   case RISCVISD::ROLW:
5111   case RISCVISD::RORW:
5112   case RISCVISD::GREVIW:
5113   case RISCVISD::GORCIW:
5114   case RISCVISD::FSLW:
5115   case RISCVISD::FSRW:
5116     // TODO: As the result is sign-extended, this is conservatively correct. A
5117     // more precise answer could be calculated for SRAW depending on known
5118     // bits in the shift amount.
5119     return 33;
5120   case RISCVISD::SHFLI: {
5121     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
5122     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
5123     // will stay within the upper 32 bits. If there were more than 32 sign bits
5124     // before there will be at least 33 sign bits after.
5125     if (Op.getValueType() == MVT::i64 &&
5126         (Op.getConstantOperandVal(1) & 0x10) == 0) {
5127       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
5128       if (Tmp > 32)
5129         return 33;
5130     }
5131     break;
5132   }
5133   case RISCVISD::VMV_X_S:
5134     // The number of sign bits of the scalar result is computed by obtaining the
5135     // element type of the input vector operand, subtracting its width from the
5136     // XLEN, and then adding one (sign bit within the element type). If the
5137     // element type is wider than XLen, the least-significant XLEN bits are
5138     // taken.
5139     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
5140       return 1;
5141     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
5142   }
5143 
5144   return 1;
5145 }
5146 
5147 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
5148                                                   MachineBasicBlock *BB) {
5149   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
5150 
5151   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
5152   // Should the count have wrapped while it was being read, we need to try
5153   // again.
5154   // ...
5155   // read:
5156   // rdcycleh x3 # load high word of cycle
5157   // rdcycle  x2 # load low word of cycle
5158   // rdcycleh x4 # load high word of cycle
5159   // bne x3, x4, read # check if high word reads match, otherwise try again
5160   // ...
5161 
5162   MachineFunction &MF = *BB->getParent();
5163   const BasicBlock *LLVM_BB = BB->getBasicBlock();
5164   MachineFunction::iterator It = ++BB->getIterator();
5165 
5166   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
5167   MF.insert(It, LoopMBB);
5168 
5169   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
5170   MF.insert(It, DoneMBB);
5171 
5172   // Transfer the remainder of BB and its successor edges to DoneMBB.
5173   DoneMBB->splice(DoneMBB->begin(), BB,
5174                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
5175   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
5176 
5177   BB->addSuccessor(LoopMBB);
5178 
5179   MachineRegisterInfo &RegInfo = MF.getRegInfo();
5180   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
5181   Register LoReg = MI.getOperand(0).getReg();
5182   Register HiReg = MI.getOperand(1).getReg();
5183   DebugLoc DL = MI.getDebugLoc();
5184 
5185   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
5186   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
5187       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
5188       .addReg(RISCV::X0);
5189   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
5190       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
5191       .addReg(RISCV::X0);
5192   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
5193       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
5194       .addReg(RISCV::X0);
5195 
5196   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
5197       .addReg(HiReg)
5198       .addReg(ReadAgainReg)
5199       .addMBB(LoopMBB);
5200 
5201   LoopMBB->addSuccessor(LoopMBB);
5202   LoopMBB->addSuccessor(DoneMBB);
5203 
5204   MI.eraseFromParent();
5205 
5206   return DoneMBB;
5207 }
5208 
5209 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
5210                                              MachineBasicBlock *BB) {
5211   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
5212 
5213   MachineFunction &MF = *BB->getParent();
5214   DebugLoc DL = MI.getDebugLoc();
5215   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
5216   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
5217   Register LoReg = MI.getOperand(0).getReg();
5218   Register HiReg = MI.getOperand(1).getReg();
5219   Register SrcReg = MI.getOperand(2).getReg();
5220   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
5221   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
5222 
5223   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
5224                           RI);
5225   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
5226   MachineMemOperand *MMOLo =
5227       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
5228   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
5229       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
5230   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
5231       .addFrameIndex(FI)
5232       .addImm(0)
5233       .addMemOperand(MMOLo);
5234   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
5235       .addFrameIndex(FI)
5236       .addImm(4)
5237       .addMemOperand(MMOHi);
5238   MI.eraseFromParent(); // The pseudo instruction is gone now.
5239   return BB;
5240 }
5241 
5242 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
5243                                                  MachineBasicBlock *BB) {
5244   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
5245          "Unexpected instruction");
5246 
5247   MachineFunction &MF = *BB->getParent();
5248   DebugLoc DL = MI.getDebugLoc();
5249   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
5250   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
5251   Register DstReg = MI.getOperand(0).getReg();
5252   Register LoReg = MI.getOperand(1).getReg();
5253   Register HiReg = MI.getOperand(2).getReg();
5254   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
5255   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
5256 
5257   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
5258   MachineMemOperand *MMOLo =
5259       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
5260   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
5261       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
5262   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
5263       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
5264       .addFrameIndex(FI)
5265       .addImm(0)
5266       .addMemOperand(MMOLo);
5267   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
5268       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
5269       .addFrameIndex(FI)
5270       .addImm(4)
5271       .addMemOperand(MMOHi);
5272   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
5273   MI.eraseFromParent(); // The pseudo instruction is gone now.
5274   return BB;
5275 }
5276 
5277 static bool isSelectPseudo(MachineInstr &MI) {
5278   switch (MI.getOpcode()) {
5279   default:
5280     return false;
5281   case RISCV::Select_GPR_Using_CC_GPR:
5282   case RISCV::Select_FPR16_Using_CC_GPR:
5283   case RISCV::Select_FPR32_Using_CC_GPR:
5284   case RISCV::Select_FPR64_Using_CC_GPR:
5285     return true;
5286   }
5287 }
5288 
5289 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
5290                                            MachineBasicBlock *BB) {
5291   // To "insert" Select_* instructions, we actually have to insert the triangle
5292   // control-flow pattern.  The incoming instructions know the destination vreg
5293   // to set, the condition code register to branch on, the true/false values to
5294   // select between, and the condcode to use to select the appropriate branch.
5295   //
5296   // We produce the following control flow:
5297   //     HeadMBB
5298   //     |  \
5299   //     |  IfFalseMBB
5300   //     | /
5301   //    TailMBB
5302   //
5303   // When we find a sequence of selects we attempt to optimize their emission
5304   // by sharing the control flow. Currently we only handle cases where we have
5305   // multiple selects with the exact same condition (same LHS, RHS and CC).
5306   // The selects may be interleaved with other instructions if the other
5307   // instructions meet some requirements we deem safe:
5308   // - They are debug instructions. Otherwise,
5309   // - They do not have side-effects, do not access memory and their inputs do
5310   //   not depend on the results of the select pseudo-instructions.
5311   // The TrueV/FalseV operands of the selects cannot depend on the result of
5312   // previous selects in the sequence.
5313   // These conditions could be further relaxed. See the X86 target for a
5314   // related approach and more information.
5315   Register LHS = MI.getOperand(1).getReg();
5316   Register RHS = MI.getOperand(2).getReg();
5317   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
5318 
5319   SmallVector<MachineInstr *, 4> SelectDebugValues;
5320   SmallSet<Register, 4> SelectDests;
5321   SelectDests.insert(MI.getOperand(0).getReg());
5322 
5323   MachineInstr *LastSelectPseudo = &MI;
5324 
5325   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
5326        SequenceMBBI != E; ++SequenceMBBI) {
5327     if (SequenceMBBI->isDebugInstr())
5328       continue;
5329     else if (isSelectPseudo(*SequenceMBBI)) {
5330       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
5331           SequenceMBBI->getOperand(2).getReg() != RHS ||
5332           SequenceMBBI->getOperand(3).getImm() != CC ||
5333           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
5334           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
5335         break;
5336       LastSelectPseudo = &*SequenceMBBI;
5337       SequenceMBBI->collectDebugValues(SelectDebugValues);
5338       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
5339     } else {
5340       if (SequenceMBBI->hasUnmodeledSideEffects() ||
5341           SequenceMBBI->mayLoadOrStore())
5342         break;
5343       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
5344             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
5345           }))
5346         break;
5347     }
5348   }
5349 
5350   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
5351   const BasicBlock *LLVM_BB = BB->getBasicBlock();
5352   DebugLoc DL = MI.getDebugLoc();
5353   MachineFunction::iterator I = ++BB->getIterator();
5354 
5355   MachineBasicBlock *HeadMBB = BB;
5356   MachineFunction *F = BB->getParent();
5357   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
5358   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
5359 
5360   F->insert(I, IfFalseMBB);
5361   F->insert(I, TailMBB);
5362 
5363   // Transfer debug instructions associated with the selects to TailMBB.
5364   for (MachineInstr *DebugInstr : SelectDebugValues) {
5365     TailMBB->push_back(DebugInstr->removeFromParent());
5366   }
5367 
5368   // Move all instructions after the sequence to TailMBB.
5369   TailMBB->splice(TailMBB->end(), HeadMBB,
5370                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
5371   // Update machine-CFG edges by transferring all successors of the current
5372   // block to the new block which will contain the Phi nodes for the selects.
5373   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
5374   // Set the successors for HeadMBB.
5375   HeadMBB->addSuccessor(IfFalseMBB);
5376   HeadMBB->addSuccessor(TailMBB);
5377 
5378   // Insert appropriate branch.
5379   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
5380 
5381   BuildMI(HeadMBB, DL, TII.get(Opcode))
5382     .addReg(LHS)
5383     .addReg(RHS)
5384     .addMBB(TailMBB);
5385 
5386   // IfFalseMBB just falls through to TailMBB.
5387   IfFalseMBB->addSuccessor(TailMBB);
5388 
5389   // Create PHIs for all of the select pseudo-instructions.
5390   auto SelectMBBI = MI.getIterator();
5391   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
5392   auto InsertionPoint = TailMBB->begin();
5393   while (SelectMBBI != SelectEnd) {
5394     auto Next = std::next(SelectMBBI);
5395     if (isSelectPseudo(*SelectMBBI)) {
5396       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
5397       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
5398               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
5399           .addReg(SelectMBBI->getOperand(4).getReg())
5400           .addMBB(HeadMBB)
5401           .addReg(SelectMBBI->getOperand(5).getReg())
5402           .addMBB(IfFalseMBB);
5403       SelectMBBI->eraseFromParent();
5404     }
5405     SelectMBBI = Next;
5406   }
5407 
5408   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
5409   return TailMBB;
5410 }
5411 
5412 static MachineInstr *elideCopies(MachineInstr *MI,
5413                                  const MachineRegisterInfo &MRI) {
5414   while (true) {
5415     if (!MI->isFullCopy())
5416       return MI;
5417     if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
5418       return nullptr;
5419     MI = MRI.getVRegDef(MI->getOperand(1).getReg());
5420     if (!MI)
5421       return nullptr;
5422   }
5423 }
5424 
5425 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
5426                                     int VLIndex, unsigned SEWIndex,
5427                                     RISCVVLMUL VLMul, bool ForceTailAgnostic) {
5428   MachineFunction &MF = *BB->getParent();
5429   DebugLoc DL = MI.getDebugLoc();
5430   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
5431 
5432   unsigned SEW = MI.getOperand(SEWIndex).getImm();
5433   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
5434   RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
5435 
5436   MachineRegisterInfo &MRI = MF.getRegInfo();
5437 
5438   auto BuildVSETVLI = [&]() {
5439     if (VLIndex >= 0) {
5440       Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
5441       Register VLReg = MI.getOperand(VLIndex).getReg();
5442 
5443       // VL might be a compile time constant, but isel would have to put it
5444       // in a register. See if VL comes from an ADDI X0, imm.
5445       if (VLReg.isVirtual()) {
5446         MachineInstr *Def = MRI.getVRegDef(VLReg);
5447         if (Def && Def->getOpcode() == RISCV::ADDI &&
5448             Def->getOperand(1).getReg() == RISCV::X0 &&
5449             Def->getOperand(2).isImm()) {
5450           uint64_t Imm = Def->getOperand(2).getImm();
5451           // VSETIVLI allows a 5-bit zero extended immediate.
5452           if (isUInt<5>(Imm))
5453             return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI))
5454                 .addReg(DestReg, RegState::Define | RegState::Dead)
5455                 .addImm(Imm);
5456         }
5457       }
5458 
5459       return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
5460           .addReg(DestReg, RegState::Define | RegState::Dead)
5461           .addReg(VLReg);
5462     }
5463 
5464     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
5465     return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
5466         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
5467         .addReg(RISCV::X0, RegState::Kill);
5468   };
5469 
5470   MachineInstrBuilder MIB = BuildVSETVLI();
5471 
5472   // Default to tail agnostic unless the destination is tied to a source. In
5473   // that case the user would have some control over the tail values. The tail
5474   // policy is also ignored on instructions that only update element 0 like
5475   // vmv.s.x or reductions so use agnostic there to match the common case.
5476   // FIXME: This is conservatively correct, but we might want to detect that
5477   // the input is undefined.
5478   bool TailAgnostic = true;
5479   unsigned UseOpIdx;
5480   if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
5481     TailAgnostic = false;
5482     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
5483     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
5484     MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
5485     if (UseMI) {
5486       UseMI = elideCopies(UseMI, MRI);
5487       if (UseMI && UseMI->isImplicitDef())
5488         TailAgnostic = true;
5489     }
5490   }
5491 
5492   // For simplicity we reuse the vtype representation here.
5493   MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
5494                                      /*TailAgnostic*/ TailAgnostic,
5495                                      /*MaskAgnostic*/ false));
5496 
5497   // Remove (now) redundant operands from pseudo
5498   if (VLIndex >= 0) {
5499     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
5500     MI.getOperand(VLIndex).setIsKill(false);
5501   }
5502 
5503   return BB;
5504 }
5505 
5506 MachineBasicBlock *
5507 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
5508                                                  MachineBasicBlock *BB) const {
5509   uint64_t TSFlags = MI.getDesc().TSFlags;
5510 
5511   if (TSFlags & RISCVII::HasSEWOpMask) {
5512     unsigned NumOperands = MI.getNumExplicitOperands();
5513     int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
5514     unsigned SEWIndex = NumOperands - 1;
5515     bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask;
5516 
5517     RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
5518                                                RISCVII::VLMulShift);
5519     return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic);
5520   }
5521 
5522   switch (MI.getOpcode()) {
5523   default:
5524     llvm_unreachable("Unexpected instr type to insert");
5525   case RISCV::ReadCycleWide:
5526     assert(!Subtarget.is64Bit() &&
5527            "ReadCycleWrite is only to be used on riscv32");
5528     return emitReadCycleWidePseudo(MI, BB);
5529   case RISCV::Select_GPR_Using_CC_GPR:
5530   case RISCV::Select_FPR16_Using_CC_GPR:
5531   case RISCV::Select_FPR32_Using_CC_GPR:
5532   case RISCV::Select_FPR64_Using_CC_GPR:
5533     return emitSelectPseudo(MI, BB);
5534   case RISCV::BuildPairF64Pseudo:
5535     return emitBuildPairF64Pseudo(MI, BB);
5536   case RISCV::SplitF64Pseudo:
5537     return emitSplitF64Pseudo(MI, BB);
5538   }
5539 }
5540 
5541 // Calling Convention Implementation.
5542 // The expectations for frontend ABI lowering vary from target to target.
5543 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
5544 // details, but this is a longer term goal. For now, we simply try to keep the
5545 // role of the frontend as simple and well-defined as possible. The rules can
5546 // be summarised as:
5547 // * Never split up large scalar arguments. We handle them here.
5548 // * If a hardfloat calling convention is being used, and the struct may be
5549 // passed in a pair of registers (fp+fp, int+fp), and both registers are
5550 // available, then pass as two separate arguments. If either the GPRs or FPRs
5551 // are exhausted, then pass according to the rule below.
5552 // * If a struct could never be passed in registers or directly in a stack
5553 // slot (as it is larger than 2*XLEN and the floating point rules don't
5554 // apply), then pass it using a pointer with the byval attribute.
5555 // * If a struct is less than 2*XLEN, then coerce to either a two-element
5556 // word-sized array or a 2*XLEN scalar (depending on alignment).
5557 // * The frontend can determine whether a struct is returned by reference or
5558 // not based on its size and fields. If it will be returned by reference, the
5559 // frontend must modify the prototype so a pointer with the sret annotation is
5560 // passed as the first argument. This is not necessary for large scalar
5561 // returns.
5562 // * Struct return values and varargs should be coerced to structs containing
5563 // register-size fields in the same situations they would be for fixed
5564 // arguments.
5565 
5566 static const MCPhysReg ArgGPRs[] = {
5567   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
5568   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
5569 };
5570 static const MCPhysReg ArgFPR16s[] = {
5571   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
5572   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
5573 };
5574 static const MCPhysReg ArgFPR32s[] = {
5575   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
5576   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
5577 };
5578 static const MCPhysReg ArgFPR64s[] = {
5579   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
5580   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
5581 };
5582 // This is an interim calling convention and it may be changed in the future.
5583 static const MCPhysReg ArgVRs[] = {
5584     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
5585     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
5586     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
5587 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
5588                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
5589                                      RISCV::V20M2, RISCV::V22M2};
5590 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
5591                                      RISCV::V20M4};
5592 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
5593 
5594 // Pass a 2*XLEN argument that has been split into two XLEN values through
5595 // registers or the stack as necessary.
5596 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
5597                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
5598                                 MVT ValVT2, MVT LocVT2,
5599                                 ISD::ArgFlagsTy ArgFlags2) {
5600   unsigned XLenInBytes = XLen / 8;
5601   if (Register Reg = State.AllocateReg(ArgGPRs)) {
5602     // At least one half can be passed via register.
5603     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
5604                                      VA1.getLocVT(), CCValAssign::Full));
5605   } else {
5606     // Both halves must be passed on the stack, with proper alignment.
5607     Align StackAlign =
5608         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
5609     State.addLoc(
5610         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
5611                             State.AllocateStack(XLenInBytes, StackAlign),
5612                             VA1.getLocVT(), CCValAssign::Full));
5613     State.addLoc(CCValAssign::getMem(
5614         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
5615         LocVT2, CCValAssign::Full));
5616     return false;
5617   }
5618 
5619   if (Register Reg = State.AllocateReg(ArgGPRs)) {
5620     // The second half can also be passed via register.
5621     State.addLoc(
5622         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
5623   } else {
5624     // The second half is passed via the stack, without additional alignment.
5625     State.addLoc(CCValAssign::getMem(
5626         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
5627         LocVT2, CCValAssign::Full));
5628   }
5629 
5630   return false;
5631 }
5632 
5633 // Implements the RISC-V calling convention. Returns true upon failure.
5634 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
5635                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
5636                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
5637                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
5638                      Optional<unsigned> FirstMaskArgument) {
5639   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
5640   assert(XLen == 32 || XLen == 64);
5641   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
5642 
5643   // Any return value split in to more than two values can't be returned
5644   // directly. Vectors are returned via the available vector registers.
5645   if (!LocVT.isVector() && IsRet && ValNo > 1)
5646     return true;
5647 
5648   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
5649   // variadic argument, or if no F16/F32 argument registers are available.
5650   bool UseGPRForF16_F32 = true;
5651   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
5652   // variadic argument, or if no F64 argument registers are available.
5653   bool UseGPRForF64 = true;
5654 
5655   switch (ABI) {
5656   default:
5657     llvm_unreachable("Unexpected ABI");
5658   case RISCVABI::ABI_ILP32:
5659   case RISCVABI::ABI_LP64:
5660     break;
5661   case RISCVABI::ABI_ILP32F:
5662   case RISCVABI::ABI_LP64F:
5663     UseGPRForF16_F32 = !IsFixed;
5664     break;
5665   case RISCVABI::ABI_ILP32D:
5666   case RISCVABI::ABI_LP64D:
5667     UseGPRForF16_F32 = !IsFixed;
5668     UseGPRForF64 = !IsFixed;
5669     break;
5670   }
5671 
5672   // FPR16, FPR32, and FPR64 alias each other.
5673   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
5674     UseGPRForF16_F32 = true;
5675     UseGPRForF64 = true;
5676   }
5677 
5678   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
5679   // similar local variables rather than directly checking against the target
5680   // ABI.
5681 
5682   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
5683     LocVT = XLenVT;
5684     LocInfo = CCValAssign::BCvt;
5685   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
5686     LocVT = MVT::i64;
5687     LocInfo = CCValAssign::BCvt;
5688   }
5689 
5690   // If this is a variadic argument, the RISC-V calling convention requires
5691   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
5692   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
5693   // be used regardless of whether the original argument was split during
5694   // legalisation or not. The argument will not be passed by registers if the
5695   // original type is larger than 2*XLEN, so the register alignment rule does
5696   // not apply.
5697   unsigned TwoXLenInBytes = (2 * XLen) / 8;
5698   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
5699       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
5700     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
5701     // Skip 'odd' register if necessary.
5702     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
5703       State.AllocateReg(ArgGPRs);
5704   }
5705 
5706   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
5707   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
5708       State.getPendingArgFlags();
5709 
5710   assert(PendingLocs.size() == PendingArgFlags.size() &&
5711          "PendingLocs and PendingArgFlags out of sync");
5712 
5713   // Handle passing f64 on RV32D with a soft float ABI or when floating point
5714   // registers are exhausted.
5715   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
5716     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
5717            "Can't lower f64 if it is split");
5718     // Depending on available argument GPRS, f64 may be passed in a pair of
5719     // GPRs, split between a GPR and the stack, or passed completely on the
5720     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
5721     // cases.
5722     Register Reg = State.AllocateReg(ArgGPRs);
5723     LocVT = MVT::i32;
5724     if (!Reg) {
5725       unsigned StackOffset = State.AllocateStack(8, Align(8));
5726       State.addLoc(
5727           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
5728       return false;
5729     }
5730     if (!State.AllocateReg(ArgGPRs))
5731       State.AllocateStack(4, Align(4));
5732     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5733     return false;
5734   }
5735 
5736   // Fixed-length vectors are located in the corresponding scalable-vector
5737   // container types.
5738   if (ValVT.isFixedLengthVector())
5739     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
5740 
5741   // Split arguments might be passed indirectly, so keep track of the pending
5742   // values. Split vectors are passed via a mix of registers and indirectly, so
5743   // treat them as we would any other argument.
5744   if (!LocVT.isVector() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
5745     LocVT = XLenVT;
5746     LocInfo = CCValAssign::Indirect;
5747     PendingLocs.push_back(
5748         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
5749     PendingArgFlags.push_back(ArgFlags);
5750     if (!ArgFlags.isSplitEnd()) {
5751       return false;
5752     }
5753   }
5754 
5755   // If the split argument only had two elements, it should be passed directly
5756   // in registers or on the stack.
5757   if (!LocVT.isVector() && ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
5758     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
5759     // Apply the normal calling convention rules to the first half of the
5760     // split argument.
5761     CCValAssign VA = PendingLocs[0];
5762     ISD::ArgFlagsTy AF = PendingArgFlags[0];
5763     PendingLocs.clear();
5764     PendingArgFlags.clear();
5765     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
5766                                ArgFlags);
5767   }
5768 
5769   // Allocate to a register if possible, or else a stack slot.
5770   Register Reg;
5771   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
5772     Reg = State.AllocateReg(ArgFPR16s);
5773   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
5774     Reg = State.AllocateReg(ArgFPR32s);
5775   else if (ValVT == MVT::f64 && !UseGPRForF64)
5776     Reg = State.AllocateReg(ArgFPR64s);
5777   else if (ValVT.isVector()) {
5778     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
5779     if (RC == &RISCV::VRRegClass) {
5780       // Assign the first mask argument to V0.
5781       // This is an interim calling convention and it may be changed in the
5782       // future.
5783       if (FirstMaskArgument.hasValue() &&
5784           ValNo == FirstMaskArgument.getValue()) {
5785         Reg = State.AllocateReg(RISCV::V0);
5786       } else {
5787         Reg = State.AllocateReg(ArgVRs);
5788       }
5789     } else if (RC == &RISCV::VRM2RegClass) {
5790       Reg = State.AllocateReg(ArgVRM2s);
5791     } else if (RC == &RISCV::VRM4RegClass) {
5792       Reg = State.AllocateReg(ArgVRM4s);
5793     } else if (RC == &RISCV::VRM8RegClass) {
5794       Reg = State.AllocateReg(ArgVRM8s);
5795     } else {
5796       llvm_unreachable("Unhandled class register for ValueType");
5797     }
5798     if (!Reg) {
5799       // For return values, the vector must be passed fully via registers or
5800       // via the stack.
5801       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
5802       // but we're using all of them.
5803       if (IsRet)
5804         return true;
5805       LocInfo = CCValAssign::Indirect;
5806       // Try using a GPR to pass the address
5807       Reg = State.AllocateReg(ArgGPRs);
5808       LocVT = XLenVT;
5809     }
5810   } else
5811     Reg = State.AllocateReg(ArgGPRs);
5812   unsigned StackOffset =
5813       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
5814 
5815   // If we reach this point and PendingLocs is non-empty, we must be at the
5816   // end of a split argument that must be passed indirectly.
5817   if (!PendingLocs.empty()) {
5818     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
5819     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
5820 
5821     for (auto &It : PendingLocs) {
5822       if (Reg)
5823         It.convertToReg(Reg);
5824       else
5825         It.convertToMem(StackOffset);
5826       State.addLoc(It);
5827     }
5828     PendingLocs.clear();
5829     PendingArgFlags.clear();
5830     return false;
5831   }
5832 
5833   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
5834           (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) &&
5835          "Expected an XLenVT or vector types at this stage");
5836 
5837   if (Reg) {
5838     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5839     return false;
5840   }
5841 
5842   // When a floating-point value is passed on the stack, no bit-conversion is
5843   // needed.
5844   if (ValVT.isFloatingPoint()) {
5845     LocVT = ValVT;
5846     LocInfo = CCValAssign::Full;
5847   }
5848   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
5849   return false;
5850 }
5851 
5852 template <typename ArgTy>
5853 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
5854   for (const auto &ArgIdx : enumerate(Args)) {
5855     MVT ArgVT = ArgIdx.value().VT;
5856     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
5857       return ArgIdx.index();
5858   }
5859   return None;
5860 }
5861 
5862 void RISCVTargetLowering::analyzeInputArgs(
5863     MachineFunction &MF, CCState &CCInfo,
5864     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
5865   unsigned NumArgs = Ins.size();
5866   FunctionType *FType = MF.getFunction().getFunctionType();
5867 
5868   Optional<unsigned> FirstMaskArgument;
5869   if (Subtarget.hasStdExtV())
5870     FirstMaskArgument = preAssignMask(Ins);
5871 
5872   for (unsigned i = 0; i != NumArgs; ++i) {
5873     MVT ArgVT = Ins[i].VT;
5874     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
5875 
5876     Type *ArgTy = nullptr;
5877     if (IsRet)
5878       ArgTy = FType->getReturnType();
5879     else if (Ins[i].isOrigArg())
5880       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
5881 
5882     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
5883     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
5884                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
5885                  FirstMaskArgument)) {
5886       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
5887                         << EVT(ArgVT).getEVTString() << '\n');
5888       llvm_unreachable(nullptr);
5889     }
5890   }
5891 }
5892 
5893 void RISCVTargetLowering::analyzeOutputArgs(
5894     MachineFunction &MF, CCState &CCInfo,
5895     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
5896     CallLoweringInfo *CLI) const {
5897   unsigned NumArgs = Outs.size();
5898 
5899   Optional<unsigned> FirstMaskArgument;
5900   if (Subtarget.hasStdExtV())
5901     FirstMaskArgument = preAssignMask(Outs);
5902 
5903   for (unsigned i = 0; i != NumArgs; i++) {
5904     MVT ArgVT = Outs[i].VT;
5905     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5906     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
5907 
5908     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
5909     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
5910                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
5911                  FirstMaskArgument)) {
5912       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
5913                         << EVT(ArgVT).getEVTString() << "\n");
5914       llvm_unreachable(nullptr);
5915     }
5916   }
5917 }
5918 
5919 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5920 // values.
5921 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
5922                                    const CCValAssign &VA, const SDLoc &DL,
5923                                    const RISCVSubtarget &Subtarget) {
5924   switch (VA.getLocInfo()) {
5925   default:
5926     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5927   case CCValAssign::Full:
5928     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
5929       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
5930     break;
5931   case CCValAssign::BCvt:
5932     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
5933       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
5934     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5935       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
5936     else
5937       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5938     break;
5939   }
5940   return Val;
5941 }
5942 
5943 // The caller is responsible for loading the full value if the argument is
5944 // passed with CCValAssign::Indirect.
5945 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
5946                                 const CCValAssign &VA, const SDLoc &DL,
5947                                 const RISCVTargetLowering &TLI) {
5948   MachineFunction &MF = DAG.getMachineFunction();
5949   MachineRegisterInfo &RegInfo = MF.getRegInfo();
5950   EVT LocVT = VA.getLocVT();
5951   SDValue Val;
5952   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
5953   Register VReg = RegInfo.createVirtualRegister(RC);
5954   RegInfo.addLiveIn(VA.getLocReg(), VReg);
5955   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
5956 
5957   if (VA.getLocInfo() == CCValAssign::Indirect)
5958     return Val;
5959 
5960   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
5961 }
5962 
5963 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
5964                                    const CCValAssign &VA, const SDLoc &DL,
5965                                    const RISCVSubtarget &Subtarget) {
5966   EVT LocVT = VA.getLocVT();
5967 
5968   switch (VA.getLocInfo()) {
5969   default:
5970     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5971   case CCValAssign::Full:
5972     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
5973       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
5974     break;
5975   case CCValAssign::BCvt:
5976     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
5977       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
5978     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5979       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
5980     else
5981       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5982     break;
5983   }
5984   return Val;
5985 }
5986 
5987 // The caller is responsible for loading the full value if the argument is
5988 // passed with CCValAssign::Indirect.
5989 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
5990                                 const CCValAssign &VA, const SDLoc &DL) {
5991   MachineFunction &MF = DAG.getMachineFunction();
5992   MachineFrameInfo &MFI = MF.getFrameInfo();
5993   EVT LocVT = VA.getLocVT();
5994   EVT ValVT = VA.getValVT();
5995   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
5996   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
5997                                  VA.getLocMemOffset(), /*Immutable=*/true);
5998   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
5999   SDValue Val;
6000 
6001   ISD::LoadExtType ExtType;
6002   switch (VA.getLocInfo()) {
6003   default:
6004     llvm_unreachable("Unexpected CCValAssign::LocInfo");
6005   case CCValAssign::Full:
6006   case CCValAssign::Indirect:
6007   case CCValAssign::BCvt:
6008     ExtType = ISD::NON_EXTLOAD;
6009     break;
6010   }
6011   Val = DAG.getExtLoad(
6012       ExtType, DL, LocVT, Chain, FIN,
6013       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
6014   return Val;
6015 }
6016 
6017 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
6018                                        const CCValAssign &VA, const SDLoc &DL) {
6019   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
6020          "Unexpected VA");
6021   MachineFunction &MF = DAG.getMachineFunction();
6022   MachineFrameInfo &MFI = MF.getFrameInfo();
6023   MachineRegisterInfo &RegInfo = MF.getRegInfo();
6024 
6025   if (VA.isMemLoc()) {
6026     // f64 is passed on the stack.
6027     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
6028     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
6029     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
6030                        MachinePointerInfo::getFixedStack(MF, FI));
6031   }
6032 
6033   assert(VA.isRegLoc() && "Expected register VA assignment");
6034 
6035   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
6036   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
6037   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
6038   SDValue Hi;
6039   if (VA.getLocReg() == RISCV::X17) {
6040     // Second half of f64 is passed on the stack.
6041     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
6042     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
6043     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
6044                      MachinePointerInfo::getFixedStack(MF, FI));
6045   } else {
6046     // Second half of f64 is passed in another GPR.
6047     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
6048     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
6049     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
6050   }
6051   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6052 }
6053 
6054 // FastCC has less than 1% performance improvement for some particular
6055 // benchmark. But theoretically, it may has benenfit for some cases.
6056 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
6057                             CCValAssign::LocInfo LocInfo,
6058                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
6059 
6060   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
6061     // X5 and X6 might be used for save-restore libcall.
6062     static const MCPhysReg GPRList[] = {
6063         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
6064         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
6065         RISCV::X29, RISCV::X30, RISCV::X31};
6066     if (unsigned Reg = State.AllocateReg(GPRList)) {
6067       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6068       return false;
6069     }
6070   }
6071 
6072   if (LocVT == MVT::f16) {
6073     static const MCPhysReg FPR16List[] = {
6074         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
6075         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
6076         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
6077         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
6078     if (unsigned Reg = State.AllocateReg(FPR16List)) {
6079       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6080       return false;
6081     }
6082   }
6083 
6084   if (LocVT == MVT::f32) {
6085     static const MCPhysReg FPR32List[] = {
6086         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
6087         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
6088         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
6089         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
6090     if (unsigned Reg = State.AllocateReg(FPR32List)) {
6091       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6092       return false;
6093     }
6094   }
6095 
6096   if (LocVT == MVT::f64) {
6097     static const MCPhysReg FPR64List[] = {
6098         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
6099         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
6100         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
6101         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
6102     if (unsigned Reg = State.AllocateReg(FPR64List)) {
6103       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6104       return false;
6105     }
6106   }
6107 
6108   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
6109     unsigned Offset4 = State.AllocateStack(4, Align(4));
6110     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
6111     return false;
6112   }
6113 
6114   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
6115     unsigned Offset5 = State.AllocateStack(8, Align(8));
6116     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
6117     return false;
6118   }
6119 
6120   return true; // CC didn't match.
6121 }
6122 
6123 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
6124                          CCValAssign::LocInfo LocInfo,
6125                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
6126 
6127   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
6128     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
6129     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
6130     static const MCPhysReg GPRList[] = {
6131         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
6132         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
6133     if (unsigned Reg = State.AllocateReg(GPRList)) {
6134       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6135       return false;
6136     }
6137   }
6138 
6139   if (LocVT == MVT::f32) {
6140     // Pass in STG registers: F1, ..., F6
6141     //                        fs0 ... fs5
6142     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
6143                                           RISCV::F18_F, RISCV::F19_F,
6144                                           RISCV::F20_F, RISCV::F21_F};
6145     if (unsigned Reg = State.AllocateReg(FPR32List)) {
6146       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6147       return false;
6148     }
6149   }
6150 
6151   if (LocVT == MVT::f64) {
6152     // Pass in STG registers: D1, ..., D6
6153     //                        fs6 ... fs11
6154     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
6155                                           RISCV::F24_D, RISCV::F25_D,
6156                                           RISCV::F26_D, RISCV::F27_D};
6157     if (unsigned Reg = State.AllocateReg(FPR64List)) {
6158       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6159       return false;
6160     }
6161   }
6162 
6163   report_fatal_error("No registers left in GHC calling convention");
6164   return true;
6165 }
6166 
6167 // Transform physical registers into virtual registers.
6168 SDValue RISCVTargetLowering::LowerFormalArguments(
6169     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
6170     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
6171     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6172 
6173   MachineFunction &MF = DAG.getMachineFunction();
6174 
6175   switch (CallConv) {
6176   default:
6177     report_fatal_error("Unsupported calling convention");
6178   case CallingConv::C:
6179   case CallingConv::Fast:
6180     break;
6181   case CallingConv::GHC:
6182     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
6183         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
6184       report_fatal_error(
6185         "GHC calling convention requires the F and D instruction set extensions");
6186   }
6187 
6188   const Function &Func = MF.getFunction();
6189   if (Func.hasFnAttribute("interrupt")) {
6190     if (!Func.arg_empty())
6191       report_fatal_error(
6192         "Functions with the interrupt attribute cannot have arguments!");
6193 
6194     StringRef Kind =
6195       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
6196 
6197     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
6198       report_fatal_error(
6199         "Function interrupt attribute argument not supported!");
6200   }
6201 
6202   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6203   MVT XLenVT = Subtarget.getXLenVT();
6204   unsigned XLenInBytes = Subtarget.getXLen() / 8;
6205   // Used with vargs to acumulate store chains.
6206   std::vector<SDValue> OutChains;
6207 
6208   // Assign locations to all of the incoming arguments.
6209   SmallVector<CCValAssign, 16> ArgLocs;
6210   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6211 
6212   if (CallConv == CallingConv::Fast)
6213     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
6214   else if (CallConv == CallingConv::GHC)
6215     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
6216   else
6217     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
6218 
6219   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
6220     CCValAssign &VA = ArgLocs[i];
6221     SDValue ArgValue;
6222     // Passing f64 on RV32D with a soft float ABI must be handled as a special
6223     // case.
6224     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
6225       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
6226     else if (VA.isRegLoc())
6227       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
6228     else
6229       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
6230 
6231     if (VA.getLocInfo() == CCValAssign::Indirect) {
6232       // If the original argument was split and passed by reference (e.g. i128
6233       // on RV32), we need to load all parts of it here (using the same
6234       // address). Vectors may be partly split to registers and partly to the
6235       // stack, in which case the base address is partly offset and subsequent
6236       // stores are relative to that.
6237       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
6238                                    MachinePointerInfo()));
6239       unsigned ArgIndex = Ins[i].OrigArgIndex;
6240       unsigned ArgPartOffset = Ins[i].PartOffset;
6241       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
6242       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
6243         CCValAssign &PartVA = ArgLocs[i + 1];
6244         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
6245         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
6246                                       DAG.getIntPtrConstant(PartOffset, DL));
6247         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
6248                                      MachinePointerInfo()));
6249         ++i;
6250       }
6251       continue;
6252     }
6253     InVals.push_back(ArgValue);
6254   }
6255 
6256   if (IsVarArg) {
6257     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
6258     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
6259     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
6260     MachineFrameInfo &MFI = MF.getFrameInfo();
6261     MachineRegisterInfo &RegInfo = MF.getRegInfo();
6262     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
6263 
6264     // Offset of the first variable argument from stack pointer, and size of
6265     // the vararg save area. For now, the varargs save area is either zero or
6266     // large enough to hold a0-a7.
6267     int VaArgOffset, VarArgsSaveSize;
6268 
6269     // If all registers are allocated, then all varargs must be passed on the
6270     // stack and we don't need to save any argregs.
6271     if (ArgRegs.size() == Idx) {
6272       VaArgOffset = CCInfo.getNextStackOffset();
6273       VarArgsSaveSize = 0;
6274     } else {
6275       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
6276       VaArgOffset = -VarArgsSaveSize;
6277     }
6278 
6279     // Record the frame index of the first variable argument
6280     // which is a value necessary to VASTART.
6281     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
6282     RVFI->setVarArgsFrameIndex(FI);
6283 
6284     // If saving an odd number of registers then create an extra stack slot to
6285     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
6286     // offsets to even-numbered registered remain 2*XLEN-aligned.
6287     if (Idx % 2) {
6288       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
6289       VarArgsSaveSize += XLenInBytes;
6290     }
6291 
6292     // Copy the integer registers that may have been used for passing varargs
6293     // to the vararg save area.
6294     for (unsigned I = Idx; I < ArgRegs.size();
6295          ++I, VaArgOffset += XLenInBytes) {
6296       const Register Reg = RegInfo.createVirtualRegister(RC);
6297       RegInfo.addLiveIn(ArgRegs[I], Reg);
6298       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
6299       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
6300       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
6301       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
6302                                    MachinePointerInfo::getFixedStack(MF, FI));
6303       cast<StoreSDNode>(Store.getNode())
6304           ->getMemOperand()
6305           ->setValue((Value *)nullptr);
6306       OutChains.push_back(Store);
6307     }
6308     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
6309   }
6310 
6311   // All stores are grouped in one node to allow the matching between
6312   // the size of Ins and InVals. This only happens for vararg functions.
6313   if (!OutChains.empty()) {
6314     OutChains.push_back(Chain);
6315     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
6316   }
6317 
6318   return Chain;
6319 }
6320 
6321 /// isEligibleForTailCallOptimization - Check whether the call is eligible
6322 /// for tail call optimization.
6323 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
6324 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
6325     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
6326     const SmallVector<CCValAssign, 16> &ArgLocs) const {
6327 
6328   auto &Callee = CLI.Callee;
6329   auto CalleeCC = CLI.CallConv;
6330   auto &Outs = CLI.Outs;
6331   auto &Caller = MF.getFunction();
6332   auto CallerCC = Caller.getCallingConv();
6333 
6334   // Exception-handling functions need a special set of instructions to
6335   // indicate a return to the hardware. Tail-calling another function would
6336   // probably break this.
6337   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
6338   // should be expanded as new function attributes are introduced.
6339   if (Caller.hasFnAttribute("interrupt"))
6340     return false;
6341 
6342   // Do not tail call opt if the stack is used to pass parameters.
6343   if (CCInfo.getNextStackOffset() != 0)
6344     return false;
6345 
6346   // Do not tail call opt if any parameters need to be passed indirectly.
6347   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
6348   // passed indirectly. So the address of the value will be passed in a
6349   // register, or if not available, then the address is put on the stack. In
6350   // order to pass indirectly, space on the stack often needs to be allocated
6351   // in order to store the value. In this case the CCInfo.getNextStackOffset()
6352   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
6353   // are passed CCValAssign::Indirect.
6354   for (auto &VA : ArgLocs)
6355     if (VA.getLocInfo() == CCValAssign::Indirect)
6356       return false;
6357 
6358   // Do not tail call opt if either caller or callee uses struct return
6359   // semantics.
6360   auto IsCallerStructRet = Caller.hasStructRetAttr();
6361   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
6362   if (IsCallerStructRet || IsCalleeStructRet)
6363     return false;
6364 
6365   // Externally-defined functions with weak linkage should not be
6366   // tail-called. The behaviour of branch instructions in this situation (as
6367   // used for tail calls) is implementation-defined, so we cannot rely on the
6368   // linker replacing the tail call with a return.
6369   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
6370     const GlobalValue *GV = G->getGlobal();
6371     if (GV->hasExternalWeakLinkage())
6372       return false;
6373   }
6374 
6375   // The callee has to preserve all registers the caller needs to preserve.
6376   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
6377   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
6378   if (CalleeCC != CallerCC) {
6379     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
6380     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
6381       return false;
6382   }
6383 
6384   // Byval parameters hand the function a pointer directly into the stack area
6385   // we want to reuse during a tail call. Working around this *is* possible
6386   // but less efficient and uglier in LowerCall.
6387   for (auto &Arg : Outs)
6388     if (Arg.Flags.isByVal())
6389       return false;
6390 
6391   return true;
6392 }
6393 
6394 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
6395 // and output parameter nodes.
6396 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
6397                                        SmallVectorImpl<SDValue> &InVals) const {
6398   SelectionDAG &DAG = CLI.DAG;
6399   SDLoc &DL = CLI.DL;
6400   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
6401   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
6402   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
6403   SDValue Chain = CLI.Chain;
6404   SDValue Callee = CLI.Callee;
6405   bool &IsTailCall = CLI.IsTailCall;
6406   CallingConv::ID CallConv = CLI.CallConv;
6407   bool IsVarArg = CLI.IsVarArg;
6408   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6409   MVT XLenVT = Subtarget.getXLenVT();
6410 
6411   MachineFunction &MF = DAG.getMachineFunction();
6412 
6413   // Analyze the operands of the call, assigning locations to each operand.
6414   SmallVector<CCValAssign, 16> ArgLocs;
6415   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6416 
6417   if (CallConv == CallingConv::Fast)
6418     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
6419   else if (CallConv == CallingConv::GHC)
6420     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
6421   else
6422     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
6423 
6424   // Check if it's really possible to do a tail call.
6425   if (IsTailCall)
6426     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
6427 
6428   if (IsTailCall)
6429     ++NumTailCalls;
6430   else if (CLI.CB && CLI.CB->isMustTailCall())
6431     report_fatal_error("failed to perform tail call elimination on a call "
6432                        "site marked musttail");
6433 
6434   // Get a count of how many bytes are to be pushed on the stack.
6435   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
6436 
6437   // Create local copies for byval args
6438   SmallVector<SDValue, 8> ByValArgs;
6439   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
6440     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6441     if (!Flags.isByVal())
6442       continue;
6443 
6444     SDValue Arg = OutVals[i];
6445     unsigned Size = Flags.getByValSize();
6446     Align Alignment = Flags.getNonZeroByValAlign();
6447 
6448     int FI =
6449         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
6450     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
6451     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
6452 
6453     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
6454                           /*IsVolatile=*/false,
6455                           /*AlwaysInline=*/false, IsTailCall,
6456                           MachinePointerInfo(), MachinePointerInfo());
6457     ByValArgs.push_back(FIPtr);
6458   }
6459 
6460   if (!IsTailCall)
6461     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
6462 
6463   // Copy argument values to their designated locations.
6464   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
6465   SmallVector<SDValue, 8> MemOpChains;
6466   SDValue StackPtr;
6467   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
6468     CCValAssign &VA = ArgLocs[i];
6469     SDValue ArgValue = OutVals[i];
6470     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6471 
6472     // Handle passing f64 on RV32D with a soft float ABI as a special case.
6473     bool IsF64OnRV32DSoftABI =
6474         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
6475     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
6476       SDValue SplitF64 = DAG.getNode(
6477           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
6478       SDValue Lo = SplitF64.getValue(0);
6479       SDValue Hi = SplitF64.getValue(1);
6480 
6481       Register RegLo = VA.getLocReg();
6482       RegsToPass.push_back(std::make_pair(RegLo, Lo));
6483 
6484       if (RegLo == RISCV::X17) {
6485         // Second half of f64 is passed on the stack.
6486         // Work out the address of the stack slot.
6487         if (!StackPtr.getNode())
6488           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
6489         // Emit the store.
6490         MemOpChains.push_back(
6491             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
6492       } else {
6493         // Second half of f64 is passed in another GPR.
6494         assert(RegLo < RISCV::X31 && "Invalid register pair");
6495         Register RegHigh = RegLo + 1;
6496         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
6497       }
6498       continue;
6499     }
6500 
6501     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
6502     // as any other MemLoc.
6503 
6504     // Promote the value if needed.
6505     // For now, only handle fully promoted and indirect arguments.
6506     if (VA.getLocInfo() == CCValAssign::Indirect) {
6507       // Store the argument in a stack slot and pass its address.
6508       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
6509       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
6510       MemOpChains.push_back(
6511           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
6512                        MachinePointerInfo::getFixedStack(MF, FI)));
6513       // If the original argument was split (e.g. i128), we need
6514       // to store the required parts of it here (and pass just one address).
6515       // Vectors may be partly split to registers and partly to the stack, in
6516       // which case the base address is partly offset and subsequent stores are
6517       // relative to that.
6518       unsigned ArgIndex = Outs[i].OrigArgIndex;
6519       unsigned ArgPartOffset = Outs[i].PartOffset;
6520       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
6521       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
6522         SDValue PartValue = OutVals[i + 1];
6523         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
6524         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
6525                                       DAG.getIntPtrConstant(PartOffset, DL));
6526         MemOpChains.push_back(
6527             DAG.getStore(Chain, DL, PartValue, Address,
6528                          MachinePointerInfo::getFixedStack(MF, FI)));
6529         ++i;
6530       }
6531       ArgValue = SpillSlot;
6532     } else {
6533       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
6534     }
6535 
6536     // Use local copy if it is a byval arg.
6537     if (Flags.isByVal())
6538       ArgValue = ByValArgs[j++];
6539 
6540     if (VA.isRegLoc()) {
6541       // Queue up the argument copies and emit them at the end.
6542       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
6543     } else {
6544       assert(VA.isMemLoc() && "Argument not register or memory");
6545       assert(!IsTailCall && "Tail call not allowed if stack is used "
6546                             "for passing parameters");
6547 
6548       // Work out the address of the stack slot.
6549       if (!StackPtr.getNode())
6550         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
6551       SDValue Address =
6552           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
6553                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
6554 
6555       // Emit the store.
6556       MemOpChains.push_back(
6557           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
6558     }
6559   }
6560 
6561   // Join the stores, which are independent of one another.
6562   if (!MemOpChains.empty())
6563     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
6564 
6565   SDValue Glue;
6566 
6567   // Build a sequence of copy-to-reg nodes, chained and glued together.
6568   for (auto &Reg : RegsToPass) {
6569     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
6570     Glue = Chain.getValue(1);
6571   }
6572 
6573   // Validate that none of the argument registers have been marked as
6574   // reserved, if so report an error. Do the same for the return address if this
6575   // is not a tailcall.
6576   validateCCReservedRegs(RegsToPass, MF);
6577   if (!IsTailCall &&
6578       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
6579     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
6580         MF.getFunction(),
6581         "Return address register required, but has been reserved."});
6582 
6583   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
6584   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
6585   // split it and then direct call can be matched by PseudoCALL.
6586   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
6587     const GlobalValue *GV = S->getGlobal();
6588 
6589     unsigned OpFlags = RISCVII::MO_CALL;
6590     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
6591       OpFlags = RISCVII::MO_PLT;
6592 
6593     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
6594   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
6595     unsigned OpFlags = RISCVII::MO_CALL;
6596 
6597     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
6598                                                  nullptr))
6599       OpFlags = RISCVII::MO_PLT;
6600 
6601     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
6602   }
6603 
6604   // The first call operand is the chain and the second is the target address.
6605   SmallVector<SDValue, 8> Ops;
6606   Ops.push_back(Chain);
6607   Ops.push_back(Callee);
6608 
6609   // Add argument registers to the end of the list so that they are
6610   // known live into the call.
6611   for (auto &Reg : RegsToPass)
6612     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
6613 
6614   if (!IsTailCall) {
6615     // Add a register mask operand representing the call-preserved registers.
6616     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
6617     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
6618     assert(Mask && "Missing call preserved mask for calling convention");
6619     Ops.push_back(DAG.getRegisterMask(Mask));
6620   }
6621 
6622   // Glue the call to the argument copies, if any.
6623   if (Glue.getNode())
6624     Ops.push_back(Glue);
6625 
6626   // Emit the call.
6627   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6628 
6629   if (IsTailCall) {
6630     MF.getFrameInfo().setHasTailCall();
6631     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
6632   }
6633 
6634   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
6635   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
6636   Glue = Chain.getValue(1);
6637 
6638   // Mark the end of the call, which is glued to the call itself.
6639   Chain = DAG.getCALLSEQ_END(Chain,
6640                              DAG.getConstant(NumBytes, DL, PtrVT, true),
6641                              DAG.getConstant(0, DL, PtrVT, true),
6642                              Glue, DL);
6643   Glue = Chain.getValue(1);
6644 
6645   // Assign locations to each value returned by this call.
6646   SmallVector<CCValAssign, 16> RVLocs;
6647   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
6648   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
6649 
6650   // Copy all of the result registers out of their specified physreg.
6651   for (auto &VA : RVLocs) {
6652     // Copy the value out
6653     SDValue RetValue =
6654         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
6655     // Glue the RetValue to the end of the call sequence
6656     Chain = RetValue.getValue(1);
6657     Glue = RetValue.getValue(2);
6658 
6659     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
6660       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
6661       SDValue RetValue2 =
6662           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
6663       Chain = RetValue2.getValue(1);
6664       Glue = RetValue2.getValue(2);
6665       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
6666                              RetValue2);
6667     }
6668 
6669     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
6670 
6671     InVals.push_back(RetValue);
6672   }
6673 
6674   return Chain;
6675 }
6676 
6677 bool RISCVTargetLowering::CanLowerReturn(
6678     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
6679     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
6680   SmallVector<CCValAssign, 16> RVLocs;
6681   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
6682 
6683   Optional<unsigned> FirstMaskArgument;
6684   if (Subtarget.hasStdExtV())
6685     FirstMaskArgument = preAssignMask(Outs);
6686 
6687   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
6688     MVT VT = Outs[i].VT;
6689     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6690     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
6691     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
6692                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
6693                  *this, FirstMaskArgument))
6694       return false;
6695   }
6696   return true;
6697 }
6698 
6699 SDValue
6700 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6701                                  bool IsVarArg,
6702                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
6703                                  const SmallVectorImpl<SDValue> &OutVals,
6704                                  const SDLoc &DL, SelectionDAG &DAG) const {
6705   const MachineFunction &MF = DAG.getMachineFunction();
6706   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
6707 
6708   // Stores the assignment of the return value to a location.
6709   SmallVector<CCValAssign, 16> RVLocs;
6710 
6711   // Info about the registers and stack slot.
6712   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
6713                  *DAG.getContext());
6714 
6715   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
6716                     nullptr);
6717 
6718   if (CallConv == CallingConv::GHC && !RVLocs.empty())
6719     report_fatal_error("GHC functions return void only");
6720 
6721   SDValue Glue;
6722   SmallVector<SDValue, 4> RetOps(1, Chain);
6723 
6724   // Copy the result values into the output registers.
6725   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
6726     SDValue Val = OutVals[i];
6727     CCValAssign &VA = RVLocs[i];
6728     assert(VA.isRegLoc() && "Can only return in registers!");
6729 
6730     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
6731       // Handle returning f64 on RV32D with a soft float ABI.
6732       assert(VA.isRegLoc() && "Expected return via registers");
6733       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
6734                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
6735       SDValue Lo = SplitF64.getValue(0);
6736       SDValue Hi = SplitF64.getValue(1);
6737       Register RegLo = VA.getLocReg();
6738       assert(RegLo < RISCV::X31 && "Invalid register pair");
6739       Register RegHi = RegLo + 1;
6740 
6741       if (STI.isRegisterReservedByUser(RegLo) ||
6742           STI.isRegisterReservedByUser(RegHi))
6743         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
6744             MF.getFunction(),
6745             "Return value register required, but has been reserved."});
6746 
6747       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
6748       Glue = Chain.getValue(1);
6749       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
6750       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
6751       Glue = Chain.getValue(1);
6752       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
6753     } else {
6754       // Handle a 'normal' return.
6755       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
6756       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
6757 
6758       if (STI.isRegisterReservedByUser(VA.getLocReg()))
6759         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
6760             MF.getFunction(),
6761             "Return value register required, but has been reserved."});
6762 
6763       // Guarantee that all emitted copies are stuck together.
6764       Glue = Chain.getValue(1);
6765       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6766     }
6767   }
6768 
6769   RetOps[0] = Chain; // Update chain.
6770 
6771   // Add the glue node if we have it.
6772   if (Glue.getNode()) {
6773     RetOps.push_back(Glue);
6774   }
6775 
6776   // Interrupt service routines use different return instructions.
6777   const Function &Func = DAG.getMachineFunction().getFunction();
6778   if (Func.hasFnAttribute("interrupt")) {
6779     if (!Func.getReturnType()->isVoidTy())
6780       report_fatal_error(
6781           "Functions with the interrupt attribute must have void return type!");
6782 
6783     MachineFunction &MF = DAG.getMachineFunction();
6784     StringRef Kind =
6785       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
6786 
6787     unsigned RetOpc;
6788     if (Kind == "user")
6789       RetOpc = RISCVISD::URET_FLAG;
6790     else if (Kind == "supervisor")
6791       RetOpc = RISCVISD::SRET_FLAG;
6792     else
6793       RetOpc = RISCVISD::MRET_FLAG;
6794 
6795     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
6796   }
6797 
6798   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
6799 }
6800 
6801 void RISCVTargetLowering::validateCCReservedRegs(
6802     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
6803     MachineFunction &MF) const {
6804   const Function &F = MF.getFunction();
6805   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
6806 
6807   if (llvm::any_of(Regs, [&STI](auto Reg) {
6808         return STI.isRegisterReservedByUser(Reg.first);
6809       }))
6810     F.getContext().diagnose(DiagnosticInfoUnsupported{
6811         F, "Argument register required, but has been reserved."});
6812 }
6813 
6814 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
6815   return CI->isTailCall();
6816 }
6817 
6818 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
6819 #define NODE_NAME_CASE(NODE)                                                   \
6820   case RISCVISD::NODE:                                                         \
6821     return "RISCVISD::" #NODE;
6822   // clang-format off
6823   switch ((RISCVISD::NodeType)Opcode) {
6824   case RISCVISD::FIRST_NUMBER:
6825     break;
6826   NODE_NAME_CASE(RET_FLAG)
6827   NODE_NAME_CASE(URET_FLAG)
6828   NODE_NAME_CASE(SRET_FLAG)
6829   NODE_NAME_CASE(MRET_FLAG)
6830   NODE_NAME_CASE(CALL)
6831   NODE_NAME_CASE(SELECT_CC)
6832   NODE_NAME_CASE(BR_CC)
6833   NODE_NAME_CASE(BuildPairF64)
6834   NODE_NAME_CASE(SplitF64)
6835   NODE_NAME_CASE(TAIL)
6836   NODE_NAME_CASE(MULHSU)
6837   NODE_NAME_CASE(SLLW)
6838   NODE_NAME_CASE(SRAW)
6839   NODE_NAME_CASE(SRLW)
6840   NODE_NAME_CASE(DIVW)
6841   NODE_NAME_CASE(DIVUW)
6842   NODE_NAME_CASE(REMUW)
6843   NODE_NAME_CASE(ROLW)
6844   NODE_NAME_CASE(RORW)
6845   NODE_NAME_CASE(CLZW)
6846   NODE_NAME_CASE(CTZW)
6847   NODE_NAME_CASE(FSLW)
6848   NODE_NAME_CASE(FSRW)
6849   NODE_NAME_CASE(FSL)
6850   NODE_NAME_CASE(FSR)
6851   NODE_NAME_CASE(FMV_H_X)
6852   NODE_NAME_CASE(FMV_X_ANYEXTH)
6853   NODE_NAME_CASE(FMV_W_X_RV64)
6854   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
6855   NODE_NAME_CASE(READ_CYCLE_WIDE)
6856   NODE_NAME_CASE(GREVI)
6857   NODE_NAME_CASE(GREVIW)
6858   NODE_NAME_CASE(GORCI)
6859   NODE_NAME_CASE(GORCIW)
6860   NODE_NAME_CASE(SHFLI)
6861   NODE_NAME_CASE(VMV_V_X_VL)
6862   NODE_NAME_CASE(VFMV_V_F_VL)
6863   NODE_NAME_CASE(VMV_X_S)
6864   NODE_NAME_CASE(VMV_S_XF_VL)
6865   NODE_NAME_CASE(SPLAT_VECTOR_I64)
6866   NODE_NAME_CASE(READ_VLENB)
6867   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
6868   NODE_NAME_CASE(VLEFF)
6869   NODE_NAME_CASE(VLEFF_MASK)
6870   NODE_NAME_CASE(VSLIDEUP_VL)
6871   NODE_NAME_CASE(VSLIDE1UP_VL)
6872   NODE_NAME_CASE(VSLIDEDOWN_VL)
6873   NODE_NAME_CASE(VID_VL)
6874   NODE_NAME_CASE(VFNCVT_ROD_VL)
6875   NODE_NAME_CASE(VECREDUCE_ADD_VL)
6876   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
6877   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
6878   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
6879   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
6880   NODE_NAME_CASE(VECREDUCE_AND_VL)
6881   NODE_NAME_CASE(VECREDUCE_OR_VL)
6882   NODE_NAME_CASE(VECREDUCE_XOR_VL)
6883   NODE_NAME_CASE(VECREDUCE_FADD_VL)
6884   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
6885   NODE_NAME_CASE(ADD_VL)
6886   NODE_NAME_CASE(AND_VL)
6887   NODE_NAME_CASE(MUL_VL)
6888   NODE_NAME_CASE(OR_VL)
6889   NODE_NAME_CASE(SDIV_VL)
6890   NODE_NAME_CASE(SHL_VL)
6891   NODE_NAME_CASE(SREM_VL)
6892   NODE_NAME_CASE(SRA_VL)
6893   NODE_NAME_CASE(SRL_VL)
6894   NODE_NAME_CASE(SUB_VL)
6895   NODE_NAME_CASE(UDIV_VL)
6896   NODE_NAME_CASE(UREM_VL)
6897   NODE_NAME_CASE(XOR_VL)
6898   NODE_NAME_CASE(FADD_VL)
6899   NODE_NAME_CASE(FSUB_VL)
6900   NODE_NAME_CASE(FMUL_VL)
6901   NODE_NAME_CASE(FDIV_VL)
6902   NODE_NAME_CASE(FNEG_VL)
6903   NODE_NAME_CASE(FABS_VL)
6904   NODE_NAME_CASE(FSQRT_VL)
6905   NODE_NAME_CASE(FMA_VL)
6906   NODE_NAME_CASE(FCOPYSIGN_VL)
6907   NODE_NAME_CASE(SMIN_VL)
6908   NODE_NAME_CASE(SMAX_VL)
6909   NODE_NAME_CASE(UMIN_VL)
6910   NODE_NAME_CASE(UMAX_VL)
6911   NODE_NAME_CASE(MULHS_VL)
6912   NODE_NAME_CASE(MULHU_VL)
6913   NODE_NAME_CASE(FP_TO_SINT_VL)
6914   NODE_NAME_CASE(FP_TO_UINT_VL)
6915   NODE_NAME_CASE(SINT_TO_FP_VL)
6916   NODE_NAME_CASE(UINT_TO_FP_VL)
6917   NODE_NAME_CASE(FP_EXTEND_VL)
6918   NODE_NAME_CASE(FP_ROUND_VL)
6919   NODE_NAME_CASE(SETCC_VL)
6920   NODE_NAME_CASE(VSELECT_VL)
6921   NODE_NAME_CASE(VMAND_VL)
6922   NODE_NAME_CASE(VMOR_VL)
6923   NODE_NAME_CASE(VMXOR_VL)
6924   NODE_NAME_CASE(VMCLR_VL)
6925   NODE_NAME_CASE(VMSET_VL)
6926   NODE_NAME_CASE(VRGATHER_VX_VL)
6927   NODE_NAME_CASE(VRGATHER_VV_VL)
6928   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
6929   NODE_NAME_CASE(VSEXT_VL)
6930   NODE_NAME_CASE(VZEXT_VL)
6931   NODE_NAME_CASE(VLE_VL)
6932   NODE_NAME_CASE(VSE_VL)
6933   }
6934   // clang-format on
6935   return nullptr;
6936 #undef NODE_NAME_CASE
6937 }
6938 
6939 /// getConstraintType - Given a constraint letter, return the type of
6940 /// constraint it is for this target.
6941 RISCVTargetLowering::ConstraintType
6942 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
6943   if (Constraint.size() == 1) {
6944     switch (Constraint[0]) {
6945     default:
6946       break;
6947     case 'f':
6948     case 'v':
6949       return C_RegisterClass;
6950     case 'I':
6951     case 'J':
6952     case 'K':
6953       return C_Immediate;
6954     case 'A':
6955       return C_Memory;
6956     }
6957   }
6958   return TargetLowering::getConstraintType(Constraint);
6959 }
6960 
6961 std::pair<unsigned, const TargetRegisterClass *>
6962 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
6963                                                   StringRef Constraint,
6964                                                   MVT VT) const {
6965   // First, see if this is a constraint that directly corresponds to a
6966   // RISCV register class.
6967   if (Constraint.size() == 1) {
6968     switch (Constraint[0]) {
6969     case 'r':
6970       return std::make_pair(0U, &RISCV::GPRRegClass);
6971     case 'f':
6972       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
6973         return std::make_pair(0U, &RISCV::FPR16RegClass);
6974       if (Subtarget.hasStdExtF() && VT == MVT::f32)
6975         return std::make_pair(0U, &RISCV::FPR32RegClass);
6976       if (Subtarget.hasStdExtD() && VT == MVT::f64)
6977         return std::make_pair(0U, &RISCV::FPR64RegClass);
6978       break;
6979     case 'v':
6980       for (const auto *RC :
6981            {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
6982             &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
6983         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
6984           return std::make_pair(0U, RC);
6985       }
6986       break;
6987     default:
6988       break;
6989     }
6990   }
6991 
6992   // Clang will correctly decode the usage of register name aliases into their
6993   // official names. However, other frontends like `rustc` do not. This allows
6994   // users of these frontends to use the ABI names for registers in LLVM-style
6995   // register constraints.
6996   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
6997                                .Case("{zero}", RISCV::X0)
6998                                .Case("{ra}", RISCV::X1)
6999                                .Case("{sp}", RISCV::X2)
7000                                .Case("{gp}", RISCV::X3)
7001                                .Case("{tp}", RISCV::X4)
7002                                .Case("{t0}", RISCV::X5)
7003                                .Case("{t1}", RISCV::X6)
7004                                .Case("{t2}", RISCV::X7)
7005                                .Cases("{s0}", "{fp}", RISCV::X8)
7006                                .Case("{s1}", RISCV::X9)
7007                                .Case("{a0}", RISCV::X10)
7008                                .Case("{a1}", RISCV::X11)
7009                                .Case("{a2}", RISCV::X12)
7010                                .Case("{a3}", RISCV::X13)
7011                                .Case("{a4}", RISCV::X14)
7012                                .Case("{a5}", RISCV::X15)
7013                                .Case("{a6}", RISCV::X16)
7014                                .Case("{a7}", RISCV::X17)
7015                                .Case("{s2}", RISCV::X18)
7016                                .Case("{s3}", RISCV::X19)
7017                                .Case("{s4}", RISCV::X20)
7018                                .Case("{s5}", RISCV::X21)
7019                                .Case("{s6}", RISCV::X22)
7020                                .Case("{s7}", RISCV::X23)
7021                                .Case("{s8}", RISCV::X24)
7022                                .Case("{s9}", RISCV::X25)
7023                                .Case("{s10}", RISCV::X26)
7024                                .Case("{s11}", RISCV::X27)
7025                                .Case("{t3}", RISCV::X28)
7026                                .Case("{t4}", RISCV::X29)
7027                                .Case("{t5}", RISCV::X30)
7028                                .Case("{t6}", RISCV::X31)
7029                                .Default(RISCV::NoRegister);
7030   if (XRegFromAlias != RISCV::NoRegister)
7031     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
7032 
7033   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
7034   // TableGen record rather than the AsmName to choose registers for InlineAsm
7035   // constraints, plus we want to match those names to the widest floating point
7036   // register type available, manually select floating point registers here.
7037   //
7038   // The second case is the ABI name of the register, so that frontends can also
7039   // use the ABI names in register constraint lists.
7040   if (Subtarget.hasStdExtF()) {
7041     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
7042                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
7043                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
7044                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
7045                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
7046                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
7047                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
7048                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
7049                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
7050                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
7051                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
7052                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
7053                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
7054                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
7055                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
7056                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
7057                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
7058                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
7059                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
7060                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
7061                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
7062                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
7063                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
7064                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
7065                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
7066                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
7067                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
7068                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
7069                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
7070                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
7071                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
7072                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
7073                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
7074                         .Default(RISCV::NoRegister);
7075     if (FReg != RISCV::NoRegister) {
7076       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
7077       if (Subtarget.hasStdExtD()) {
7078         unsigned RegNo = FReg - RISCV::F0_F;
7079         unsigned DReg = RISCV::F0_D + RegNo;
7080         return std::make_pair(DReg, &RISCV::FPR64RegClass);
7081       }
7082       return std::make_pair(FReg, &RISCV::FPR32RegClass);
7083     }
7084   }
7085 
7086   if (Subtarget.hasStdExtV()) {
7087     Register VReg = StringSwitch<Register>(Constraint.lower())
7088                         .Case("{v0}", RISCV::V0)
7089                         .Case("{v1}", RISCV::V1)
7090                         .Case("{v2}", RISCV::V2)
7091                         .Case("{v3}", RISCV::V3)
7092                         .Case("{v4}", RISCV::V4)
7093                         .Case("{v5}", RISCV::V5)
7094                         .Case("{v6}", RISCV::V6)
7095                         .Case("{v7}", RISCV::V7)
7096                         .Case("{v8}", RISCV::V8)
7097                         .Case("{v9}", RISCV::V9)
7098                         .Case("{v10}", RISCV::V10)
7099                         .Case("{v11}", RISCV::V11)
7100                         .Case("{v12}", RISCV::V12)
7101                         .Case("{v13}", RISCV::V13)
7102                         .Case("{v14}", RISCV::V14)
7103                         .Case("{v15}", RISCV::V15)
7104                         .Case("{v16}", RISCV::V16)
7105                         .Case("{v17}", RISCV::V17)
7106                         .Case("{v18}", RISCV::V18)
7107                         .Case("{v19}", RISCV::V19)
7108                         .Case("{v20}", RISCV::V20)
7109                         .Case("{v21}", RISCV::V21)
7110                         .Case("{v22}", RISCV::V22)
7111                         .Case("{v23}", RISCV::V23)
7112                         .Case("{v24}", RISCV::V24)
7113                         .Case("{v25}", RISCV::V25)
7114                         .Case("{v26}", RISCV::V26)
7115                         .Case("{v27}", RISCV::V27)
7116                         .Case("{v28}", RISCV::V28)
7117                         .Case("{v29}", RISCV::V29)
7118                         .Case("{v30}", RISCV::V30)
7119                         .Case("{v31}", RISCV::V31)
7120                         .Default(RISCV::NoRegister);
7121     if (VReg != RISCV::NoRegister) {
7122       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
7123         return std::make_pair(VReg, &RISCV::VMRegClass);
7124       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
7125         return std::make_pair(VReg, &RISCV::VRRegClass);
7126       for (const auto *RC :
7127            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
7128         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
7129           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
7130           return std::make_pair(VReg, RC);
7131         }
7132       }
7133     }
7134   }
7135 
7136   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
7137 }
7138 
7139 unsigned
7140 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
7141   // Currently only support length 1 constraints.
7142   if (ConstraintCode.size() == 1) {
7143     switch (ConstraintCode[0]) {
7144     case 'A':
7145       return InlineAsm::Constraint_A;
7146     default:
7147       break;
7148     }
7149   }
7150 
7151   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
7152 }
7153 
7154 void RISCVTargetLowering::LowerAsmOperandForConstraint(
7155     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
7156     SelectionDAG &DAG) const {
7157   // Currently only support length 1 constraints.
7158   if (Constraint.length() == 1) {
7159     switch (Constraint[0]) {
7160     case 'I':
7161       // Validate & create a 12-bit signed immediate operand.
7162       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
7163         uint64_t CVal = C->getSExtValue();
7164         if (isInt<12>(CVal))
7165           Ops.push_back(
7166               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
7167       }
7168       return;
7169     case 'J':
7170       // Validate & create an integer zero operand.
7171       if (auto *C = dyn_cast<ConstantSDNode>(Op))
7172         if (C->getZExtValue() == 0)
7173           Ops.push_back(
7174               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
7175       return;
7176     case 'K':
7177       // Validate & create a 5-bit unsigned immediate operand.
7178       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
7179         uint64_t CVal = C->getZExtValue();
7180         if (isUInt<5>(CVal))
7181           Ops.push_back(
7182               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
7183       }
7184       return;
7185     default:
7186       break;
7187     }
7188   }
7189   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
7190 }
7191 
7192 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
7193                                                    Instruction *Inst,
7194                                                    AtomicOrdering Ord) const {
7195   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
7196     return Builder.CreateFence(Ord);
7197   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
7198     return Builder.CreateFence(AtomicOrdering::Release);
7199   return nullptr;
7200 }
7201 
7202 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
7203                                                     Instruction *Inst,
7204                                                     AtomicOrdering Ord) const {
7205   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
7206     return Builder.CreateFence(AtomicOrdering::Acquire);
7207   return nullptr;
7208 }
7209 
7210 TargetLowering::AtomicExpansionKind
7211 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
7212   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
7213   // point operations can't be used in an lr/sc sequence without breaking the
7214   // forward-progress guarantee.
7215   if (AI->isFloatingPointOperation())
7216     return AtomicExpansionKind::CmpXChg;
7217 
7218   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
7219   if (Size == 8 || Size == 16)
7220     return AtomicExpansionKind::MaskedIntrinsic;
7221   return AtomicExpansionKind::None;
7222 }
7223 
7224 static Intrinsic::ID
7225 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
7226   if (XLen == 32) {
7227     switch (BinOp) {
7228     default:
7229       llvm_unreachable("Unexpected AtomicRMW BinOp");
7230     case AtomicRMWInst::Xchg:
7231       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
7232     case AtomicRMWInst::Add:
7233       return Intrinsic::riscv_masked_atomicrmw_add_i32;
7234     case AtomicRMWInst::Sub:
7235       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
7236     case AtomicRMWInst::Nand:
7237       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
7238     case AtomicRMWInst::Max:
7239       return Intrinsic::riscv_masked_atomicrmw_max_i32;
7240     case AtomicRMWInst::Min:
7241       return Intrinsic::riscv_masked_atomicrmw_min_i32;
7242     case AtomicRMWInst::UMax:
7243       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
7244     case AtomicRMWInst::UMin:
7245       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
7246     }
7247   }
7248 
7249   if (XLen == 64) {
7250     switch (BinOp) {
7251     default:
7252       llvm_unreachable("Unexpected AtomicRMW BinOp");
7253     case AtomicRMWInst::Xchg:
7254       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
7255     case AtomicRMWInst::Add:
7256       return Intrinsic::riscv_masked_atomicrmw_add_i64;
7257     case AtomicRMWInst::Sub:
7258       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
7259     case AtomicRMWInst::Nand:
7260       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
7261     case AtomicRMWInst::Max:
7262       return Intrinsic::riscv_masked_atomicrmw_max_i64;
7263     case AtomicRMWInst::Min:
7264       return Intrinsic::riscv_masked_atomicrmw_min_i64;
7265     case AtomicRMWInst::UMax:
7266       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
7267     case AtomicRMWInst::UMin:
7268       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
7269     }
7270   }
7271 
7272   llvm_unreachable("Unexpected XLen\n");
7273 }
7274 
7275 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
7276     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
7277     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
7278   unsigned XLen = Subtarget.getXLen();
7279   Value *Ordering =
7280       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
7281   Type *Tys[] = {AlignedAddr->getType()};
7282   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
7283       AI->getModule(),
7284       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
7285 
7286   if (XLen == 64) {
7287     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
7288     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
7289     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
7290   }
7291 
7292   Value *Result;
7293 
7294   // Must pass the shift amount needed to sign extend the loaded value prior
7295   // to performing a signed comparison for min/max. ShiftAmt is the number of
7296   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
7297   // is the number of bits to left+right shift the value in order to
7298   // sign-extend.
7299   if (AI->getOperation() == AtomicRMWInst::Min ||
7300       AI->getOperation() == AtomicRMWInst::Max) {
7301     const DataLayout &DL = AI->getModule()->getDataLayout();
7302     unsigned ValWidth =
7303         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
7304     Value *SextShamt =
7305         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
7306     Result = Builder.CreateCall(LrwOpScwLoop,
7307                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
7308   } else {
7309     Result =
7310         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
7311   }
7312 
7313   if (XLen == 64)
7314     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
7315   return Result;
7316 }
7317 
7318 TargetLowering::AtomicExpansionKind
7319 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
7320     AtomicCmpXchgInst *CI) const {
7321   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
7322   if (Size == 8 || Size == 16)
7323     return AtomicExpansionKind::MaskedIntrinsic;
7324   return AtomicExpansionKind::None;
7325 }
7326 
7327 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
7328     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
7329     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
7330   unsigned XLen = Subtarget.getXLen();
7331   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
7332   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
7333   if (XLen == 64) {
7334     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
7335     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
7336     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
7337     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
7338   }
7339   Type *Tys[] = {AlignedAddr->getType()};
7340   Function *MaskedCmpXchg =
7341       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
7342   Value *Result = Builder.CreateCall(
7343       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
7344   if (XLen == 64)
7345     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
7346   return Result;
7347 }
7348 
7349 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
7350   return false;
7351 }
7352 
7353 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
7354                                                      EVT VT) const {
7355   VT = VT.getScalarType();
7356 
7357   if (!VT.isSimple())
7358     return false;
7359 
7360   switch (VT.getSimpleVT().SimpleTy) {
7361   case MVT::f16:
7362     return Subtarget.hasStdExtZfh();
7363   case MVT::f32:
7364     return Subtarget.hasStdExtF();
7365   case MVT::f64:
7366     return Subtarget.hasStdExtD();
7367   default:
7368     break;
7369   }
7370 
7371   return false;
7372 }
7373 
7374 Register RISCVTargetLowering::getExceptionPointerRegister(
7375     const Constant *PersonalityFn) const {
7376   return RISCV::X10;
7377 }
7378 
7379 Register RISCVTargetLowering::getExceptionSelectorRegister(
7380     const Constant *PersonalityFn) const {
7381   return RISCV::X11;
7382 }
7383 
7384 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
7385   // Return false to suppress the unnecessary extensions if the LibCall
7386   // arguments or return value is f32 type for LP64 ABI.
7387   RISCVABI::ABI ABI = Subtarget.getTargetABI();
7388   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
7389     return false;
7390 
7391   return true;
7392 }
7393 
7394 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
7395   if (Subtarget.is64Bit() && Type == MVT::i32)
7396     return true;
7397 
7398   return IsSigned;
7399 }
7400 
7401 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
7402                                                  SDValue C) const {
7403   // Check integral scalar types.
7404   if (VT.isScalarInteger()) {
7405     // Omit the optimization if the sub target has the M extension and the data
7406     // size exceeds XLen.
7407     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
7408       return false;
7409     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
7410       // Break the MUL to a SLLI and an ADD/SUB.
7411       const APInt &Imm = ConstNode->getAPIntValue();
7412       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
7413           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
7414         return true;
7415       // Omit the following optimization if the sub target has the M extension
7416       // and the data size >= XLen.
7417       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
7418         return false;
7419       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
7420       // a pair of LUI/ADDI.
7421       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
7422         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
7423         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
7424             (1 - ImmS).isPowerOf2())
7425         return true;
7426       }
7427     }
7428   }
7429 
7430   return false;
7431 }
7432 
7433 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
7434   if (!Subtarget.useRVVForFixedLengthVectors())
7435     return false;
7436 
7437   if (!VT.isFixedLengthVector())
7438     return false;
7439 
7440   // Don't use RVV for vectors we cannot scalarize if required.
7441   switch (VT.getVectorElementType().SimpleTy) {
7442   // i1 is supported but has different rules.
7443   default:
7444     return false;
7445   case MVT::i1:
7446     // Masks can only use a single register.
7447     if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits())
7448       return false;
7449     break;
7450   case MVT::i8:
7451   case MVT::i16:
7452   case MVT::i32:
7453   case MVT::i64:
7454     break;
7455   case MVT::f16:
7456     if (!Subtarget.hasStdExtZfh())
7457       return false;
7458     break;
7459   case MVT::f32:
7460     if (!Subtarget.hasStdExtF())
7461       return false;
7462     break;
7463   case MVT::f64:
7464     if (!Subtarget.hasStdExtD())
7465       return false;
7466     break;
7467   }
7468 
7469   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
7470   // Don't use RVV for types that don't fit.
7471   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
7472     return false;
7473 
7474   // TODO: Perhaps an artificial restriction, but worth having whilst getting
7475   // the base fixed length RVV support in place.
7476   if (!VT.isPow2VectorType())
7477     return false;
7478 
7479   return true;
7480 }
7481 
7482 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
7483     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7484     bool *Fast) const {
7485   if (!VT.isScalableVector())
7486     return false;
7487 
7488   EVT ElemVT = VT.getVectorElementType();
7489   if (Alignment >= ElemVT.getStoreSize()) {
7490     if (Fast)
7491       *Fast = true;
7492     return true;
7493   }
7494 
7495   return false;
7496 }
7497 
7498 bool RISCVTargetLowering::splitValueIntoRegisterParts(
7499     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
7500     unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
7501   bool IsABIRegCopy = CC.hasValue();
7502   EVT ValueVT = Val.getValueType();
7503   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
7504     // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
7505     // and cast to f32.
7506     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
7507     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
7508     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
7509                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
7510     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
7511     Parts[0] = Val;
7512     return true;
7513   }
7514 
7515   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
7516     LLVMContext &Context = *DAG.getContext();
7517     EVT ValueEltVT = ValueVT.getVectorElementType();
7518     EVT PartEltVT = PartVT.getVectorElementType();
7519     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
7520     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
7521     if (PartVTBitSize % ValueVTBitSize == 0) {
7522       // If the element types are different, bitcast to the same element type of
7523       // PartVT first.
7524       if (ValueEltVT != PartEltVT) {
7525         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
7526         assert(Count != 0 && "The number of element should not be zero.");
7527         EVT SameEltTypeVT =
7528             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
7529         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
7530       }
7531       Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
7532                         Val, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
7533       Parts[0] = Val;
7534       return true;
7535     }
7536   }
7537   return false;
7538 }
7539 
7540 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
7541     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
7542     MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
7543   bool IsABIRegCopy = CC.hasValue();
7544   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
7545     SDValue Val = Parts[0];
7546 
7547     // Cast the f32 to i32, truncate to i16, and cast back to f16.
7548     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
7549     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
7550     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
7551     return Val;
7552   }
7553 
7554   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
7555     LLVMContext &Context = *DAG.getContext();
7556     SDValue Val = Parts[0];
7557     EVT ValueEltVT = ValueVT.getVectorElementType();
7558     EVT PartEltVT = PartVT.getVectorElementType();
7559     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
7560     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
7561     if (PartVTBitSize % ValueVTBitSize == 0) {
7562       EVT SameEltTypeVT = ValueVT;
7563       // If the element types are different, convert it to the same element type
7564       // of PartVT.
7565       if (ValueEltVT != PartEltVT) {
7566         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
7567         assert(Count != 0 && "The number of element should not be zero.");
7568         SameEltTypeVT =
7569             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
7570       }
7571       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val,
7572                         DAG.getConstant(0, DL, Subtarget.getXLenVT()));
7573       if (ValueEltVT != PartEltVT)
7574         Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
7575       return Val;
7576     }
7577   }
7578   return SDValue();
7579 }
7580 
7581 #define GET_REGISTER_MATCHER
7582 #include "RISCVGenAsmMatcher.inc"
7583 
7584 Register
7585 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
7586                                        const MachineFunction &MF) const {
7587   Register Reg = MatchRegisterAltName(RegName);
7588   if (Reg == RISCV::NoRegister)
7589     Reg = MatchRegisterName(RegName);
7590   if (Reg == RISCV::NoRegister)
7591     report_fatal_error(
7592         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
7593   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
7594   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
7595     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
7596                              StringRef(RegName) + "\"."));
7597   return Reg;
7598 }
7599 
7600 namespace llvm {
7601 namespace RISCVVIntrinsicsTable {
7602 
7603 #define GET_RISCVVIntrinsicsTable_IMPL
7604 #include "RISCVGenSearchableTables.inc"
7605 
7606 } // namespace RISCVVIntrinsicsTable
7607 
7608 } // namespace llvm
7609