1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineJumpTableInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/CodeGen/ValueTypes.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/DiagnosticPrinter.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/KnownBits.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/raw_ostream.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "riscv-lower"
45 
46 STATISTIC(NumTailCalls, "Number of tail calls");
47 
48 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
49                                          const RISCVSubtarget &STI)
50     : TargetLowering(TM), Subtarget(STI) {
51 
52   if (Subtarget.isRV32E())
53     report_fatal_error("Codegen not yet implemented for RV32E");
54 
55   RISCVABI::ABI ABI = Subtarget.getTargetABI();
56   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
57 
58   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
59       !Subtarget.hasStdExtF()) {
60     errs() << "Hard-float 'f' ABI can't be used for a target that "
61                 "doesn't support the F instruction set extension (ignoring "
62                           "target-abi)\n";
63     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
64   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
65              !Subtarget.hasStdExtD()) {
66     errs() << "Hard-float 'd' ABI can't be used for a target that "
67               "doesn't support the D instruction set extension (ignoring "
68               "target-abi)\n";
69     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
70   }
71 
72   switch (ABI) {
73   default:
74     report_fatal_error("Don't know how to lower this ABI");
75   case RISCVABI::ABI_ILP32:
76   case RISCVABI::ABI_ILP32F:
77   case RISCVABI::ABI_ILP32D:
78   case RISCVABI::ABI_LP64:
79   case RISCVABI::ABI_LP64F:
80   case RISCVABI::ABI_LP64D:
81     break;
82   }
83 
84   MVT XLenVT = Subtarget.getXLenVT();
85 
86   // Set up the register classes.
87   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
88 
89   if (Subtarget.hasStdExtZfh())
90     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
91   if (Subtarget.hasStdExtF())
92     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
93   if (Subtarget.hasStdExtD())
94     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
95 
96   static const MVT::SimpleValueType BoolVecVTs[] = {
97       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
98       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
99   static const MVT::SimpleValueType IntVecVTs[] = {
100       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
101       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
102       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
103       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
104       MVT::nxv4i64, MVT::nxv8i64};
105   static const MVT::SimpleValueType F16VecVTs[] = {
106       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
107       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
108   static const MVT::SimpleValueType F32VecVTs[] = {
109       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
110   static const MVT::SimpleValueType F64VecVTs[] = {
111       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
112 
113   if (Subtarget.hasVInstructions()) {
114     auto addRegClassForRVV = [this](MVT VT) {
115       unsigned Size = VT.getSizeInBits().getKnownMinValue();
116       assert(Size <= 512 && isPowerOf2_32(Size));
117       const TargetRegisterClass *RC;
118       if (Size <= 64)
119         RC = &RISCV::VRRegClass;
120       else if (Size == 128)
121         RC = &RISCV::VRM2RegClass;
122       else if (Size == 256)
123         RC = &RISCV::VRM4RegClass;
124       else
125         RC = &RISCV::VRM8RegClass;
126 
127       addRegisterClass(VT, RC);
128     };
129 
130     for (MVT VT : BoolVecVTs)
131       addRegClassForRVV(VT);
132     for (MVT VT : IntVecVTs) {
133       if (VT.getVectorElementType() == MVT::i64 &&
134           !Subtarget.hasVInstructionsI64())
135         continue;
136       addRegClassForRVV(VT);
137     }
138 
139     if (Subtarget.hasVInstructionsF16())
140       for (MVT VT : F16VecVTs)
141         addRegClassForRVV(VT);
142 
143     if (Subtarget.hasVInstructionsF32())
144       for (MVT VT : F32VecVTs)
145         addRegClassForRVV(VT);
146 
147     if (Subtarget.hasVInstructionsF64())
148       for (MVT VT : F64VecVTs)
149         addRegClassForRVV(VT);
150 
151     if (Subtarget.useRVVForFixedLengthVectors()) {
152       auto addRegClassForFixedVectors = [this](MVT VT) {
153         MVT ContainerVT = getContainerForFixedLengthVector(VT);
154         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
155         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
156         addRegisterClass(VT, TRI.getRegClass(RCID));
157       };
158       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
159         if (useRVVForFixedLengthVectorVT(VT))
160           addRegClassForFixedVectors(VT);
161 
162       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
163         if (useRVVForFixedLengthVectorVT(VT))
164           addRegClassForFixedVectors(VT);
165     }
166   }
167 
168   // Compute derived properties from the register classes.
169   computeRegisterProperties(STI.getRegisterInfo());
170 
171   setStackPointerRegisterToSaveRestore(RISCV::X2);
172 
173   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
174     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
175 
176   // TODO: add all necessary setOperationAction calls.
177   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
178 
179   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
180   setOperationAction(ISD::BR_CC, XLenVT, Expand);
181   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
182   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
183 
184   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
185   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
186 
187   setOperationAction(ISD::VASTART, MVT::Other, Custom);
188   setOperationAction(ISD::VAARG, MVT::Other, Expand);
189   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
190   setOperationAction(ISD::VAEND, MVT::Other, Expand);
191 
192   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
193   if (!Subtarget.hasStdExtZbb()) {
194     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
195     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
196   }
197 
198   if (Subtarget.is64Bit()) {
199     setOperationAction(ISD::ADD, MVT::i32, Custom);
200     setOperationAction(ISD::SUB, MVT::i32, Custom);
201     setOperationAction(ISD::SHL, MVT::i32, Custom);
202     setOperationAction(ISD::SRA, MVT::i32, Custom);
203     setOperationAction(ISD::SRL, MVT::i32, Custom);
204 
205     setOperationAction(ISD::UADDO, MVT::i32, Custom);
206     setOperationAction(ISD::USUBO, MVT::i32, Custom);
207     setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
208     setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
209   } else {
210     setLibcallName(RTLIB::SHL_I128, nullptr);
211     setLibcallName(RTLIB::SRL_I128, nullptr);
212     setLibcallName(RTLIB::SRA_I128, nullptr);
213     setLibcallName(RTLIB::MUL_I128, nullptr);
214     setLibcallName(RTLIB::MULO_I64, nullptr);
215   }
216 
217   if (!Subtarget.hasStdExtM()) {
218     setOperationAction(ISD::MUL, XLenVT, Expand);
219     setOperationAction(ISD::MULHS, XLenVT, Expand);
220     setOperationAction(ISD::MULHU, XLenVT, Expand);
221     setOperationAction(ISD::SDIV, XLenVT, Expand);
222     setOperationAction(ISD::UDIV, XLenVT, Expand);
223     setOperationAction(ISD::SREM, XLenVT, Expand);
224     setOperationAction(ISD::UREM, XLenVT, Expand);
225   } else {
226     if (Subtarget.is64Bit()) {
227       setOperationAction(ISD::MUL, MVT::i32, Custom);
228       setOperationAction(ISD::MUL, MVT::i128, Custom);
229 
230       setOperationAction(ISD::SDIV, MVT::i8, Custom);
231       setOperationAction(ISD::UDIV, MVT::i8, Custom);
232       setOperationAction(ISD::UREM, MVT::i8, Custom);
233       setOperationAction(ISD::SDIV, MVT::i16, Custom);
234       setOperationAction(ISD::UDIV, MVT::i16, Custom);
235       setOperationAction(ISD::UREM, MVT::i16, Custom);
236       setOperationAction(ISD::SDIV, MVT::i32, Custom);
237       setOperationAction(ISD::UDIV, MVT::i32, Custom);
238       setOperationAction(ISD::UREM, MVT::i32, Custom);
239     } else {
240       setOperationAction(ISD::MUL, MVT::i64, Custom);
241     }
242   }
243 
244   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
245   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
246   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
247   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
248 
249   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
250   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
251   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
252 
253   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
254       Subtarget.hasStdExtZbkb()) {
255     if (Subtarget.is64Bit()) {
256       setOperationAction(ISD::ROTL, MVT::i32, Custom);
257       setOperationAction(ISD::ROTR, MVT::i32, Custom);
258     }
259   } else {
260     setOperationAction(ISD::ROTL, XLenVT, Expand);
261     setOperationAction(ISD::ROTR, XLenVT, Expand);
262   }
263 
264   if (Subtarget.hasStdExtZbp()) {
265     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
266     // more combining.
267     setOperationAction(ISD::BITREVERSE, XLenVT,   Custom);
268     setOperationAction(ISD::BSWAP,      XLenVT,   Custom);
269     setOperationAction(ISD::BITREVERSE, MVT::i8,  Custom);
270     // BSWAP i8 doesn't exist.
271     setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
272     setOperationAction(ISD::BSWAP,      MVT::i16, Custom);
273 
274     if (Subtarget.is64Bit()) {
275       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
276       setOperationAction(ISD::BSWAP,      MVT::i32, Custom);
277     }
278   } else {
279     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
280     // pattern match it directly in isel.
281     setOperationAction(ISD::BSWAP, XLenVT,
282                        (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
283                            ? Legal
284                            : Expand);
285     // Zbkb can use rev8+brev8 to implement bitreverse.
286     setOperationAction(ISD::BITREVERSE, XLenVT,
287                        Subtarget.hasStdExtZbkb() ? Custom : Expand);
288   }
289 
290   if (Subtarget.hasStdExtZbb()) {
291     setOperationAction(ISD::SMIN, XLenVT, Legal);
292     setOperationAction(ISD::SMAX, XLenVT, Legal);
293     setOperationAction(ISD::UMIN, XLenVT, Legal);
294     setOperationAction(ISD::UMAX, XLenVT, Legal);
295 
296     if (Subtarget.is64Bit()) {
297       setOperationAction(ISD::CTTZ, MVT::i32, Custom);
298       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
299       setOperationAction(ISD::CTLZ, MVT::i32, Custom);
300       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
301     }
302   } else {
303     setOperationAction(ISD::CTTZ, XLenVT, Expand);
304     setOperationAction(ISD::CTLZ, XLenVT, Expand);
305     setOperationAction(ISD::CTPOP, XLenVT, Expand);
306   }
307 
308   if (Subtarget.hasStdExtZbt()) {
309     setOperationAction(ISD::FSHL, XLenVT, Custom);
310     setOperationAction(ISD::FSHR, XLenVT, Custom);
311     setOperationAction(ISD::SELECT, XLenVT, Legal);
312 
313     if (Subtarget.is64Bit()) {
314       setOperationAction(ISD::FSHL, MVT::i32, Custom);
315       setOperationAction(ISD::FSHR, MVT::i32, Custom);
316     }
317   } else {
318     setOperationAction(ISD::SELECT, XLenVT, Custom);
319   }
320 
321   static const ISD::CondCode FPCCToExpand[] = {
322       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
323       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
324       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
325 
326   static const ISD::NodeType FPOpToExpand[] = {
327       ISD::FSIN, ISD::FCOS,       ISD::FSINCOS,   ISD::FPOW,
328       ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
329 
330   if (Subtarget.hasStdExtZfh())
331     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
332 
333   if (Subtarget.hasStdExtZfh()) {
334     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
335     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
336     setOperationAction(ISD::LRINT, MVT::f16, Legal);
337     setOperationAction(ISD::LLRINT, MVT::f16, Legal);
338     setOperationAction(ISD::LROUND, MVT::f16, Legal);
339     setOperationAction(ISD::LLROUND, MVT::f16, Legal);
340     setOperationAction(ISD::STRICT_LRINT, MVT::f16, Legal);
341     setOperationAction(ISD::STRICT_LLRINT, MVT::f16, Legal);
342     setOperationAction(ISD::STRICT_LROUND, MVT::f16, Legal);
343     setOperationAction(ISD::STRICT_LLROUND, MVT::f16, Legal);
344     setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal);
345     setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal);
346     setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal);
347     setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal);
348     setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal);
349     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
350     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
351     setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal);
352     setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Legal);
353     setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Legal);
354     for (auto CC : FPCCToExpand)
355       setCondCodeAction(CC, MVT::f16, Expand);
356     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
357     setOperationAction(ISD::SELECT, MVT::f16, Custom);
358     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
359 
360     setOperationAction(ISD::FREM,       MVT::f16, Promote);
361     setOperationAction(ISD::FCEIL,      MVT::f16, Promote);
362     setOperationAction(ISD::FFLOOR,     MVT::f16, Promote);
363     setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
364     setOperationAction(ISD::FRINT,      MVT::f16, Promote);
365     setOperationAction(ISD::FROUND,     MVT::f16, Promote);
366     setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
367     setOperationAction(ISD::FTRUNC,     MVT::f16, Promote);
368     setOperationAction(ISD::FPOW,       MVT::f16, Promote);
369     setOperationAction(ISD::FPOWI,      MVT::f16, Promote);
370     setOperationAction(ISD::FCOS,       MVT::f16, Promote);
371     setOperationAction(ISD::FSIN,       MVT::f16, Promote);
372     setOperationAction(ISD::FSINCOS,    MVT::f16, Promote);
373     setOperationAction(ISD::FEXP,       MVT::f16, Promote);
374     setOperationAction(ISD::FEXP2,      MVT::f16, Promote);
375     setOperationAction(ISD::FLOG,       MVT::f16, Promote);
376     setOperationAction(ISD::FLOG2,      MVT::f16, Promote);
377     setOperationAction(ISD::FLOG10,     MVT::f16, Promote);
378 
379     // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
380     // complete support for all operations in LegalizeDAG.
381 
382     // We need to custom promote this.
383     if (Subtarget.is64Bit())
384       setOperationAction(ISD::FPOWI, MVT::i32, Custom);
385   }
386 
387   if (Subtarget.hasStdExtF()) {
388     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
389     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
390     setOperationAction(ISD::LRINT, MVT::f32, Legal);
391     setOperationAction(ISD::LLRINT, MVT::f32, Legal);
392     setOperationAction(ISD::LROUND, MVT::f32, Legal);
393     setOperationAction(ISD::LLROUND, MVT::f32, Legal);
394     setOperationAction(ISD::STRICT_LRINT, MVT::f32, Legal);
395     setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Legal);
396     setOperationAction(ISD::STRICT_LROUND, MVT::f32, Legal);
397     setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Legal);
398     setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
399     setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
400     setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
401     setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
402     setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
403     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
404     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
405     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
406     for (auto CC : FPCCToExpand)
407       setCondCodeAction(CC, MVT::f32, Expand);
408     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
409     setOperationAction(ISD::SELECT, MVT::f32, Custom);
410     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
411     for (auto Op : FPOpToExpand)
412       setOperationAction(Op, MVT::f32, Expand);
413     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
414     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
415   }
416 
417   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
418     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
419 
420   if (Subtarget.hasStdExtD()) {
421     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
422     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
423     setOperationAction(ISD::LRINT, MVT::f64, Legal);
424     setOperationAction(ISD::LLRINT, MVT::f64, Legal);
425     setOperationAction(ISD::LROUND, MVT::f64, Legal);
426     setOperationAction(ISD::LLROUND, MVT::f64, Legal);
427     setOperationAction(ISD::STRICT_LRINT, MVT::f64, Legal);
428     setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Legal);
429     setOperationAction(ISD::STRICT_LROUND, MVT::f64, Legal);
430     setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Legal);
431     setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
432     setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
433     setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
434     setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
435     setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
436     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
437     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
438     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
439     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
440     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
441     for (auto CC : FPCCToExpand)
442       setCondCodeAction(CC, MVT::f64, Expand);
443     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
444     setOperationAction(ISD::SELECT, MVT::f64, Custom);
445     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
446     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
447     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
448     for (auto Op : FPOpToExpand)
449       setOperationAction(Op, MVT::f64, Expand);
450     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
451     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
452   }
453 
454   if (Subtarget.is64Bit()) {
455     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
456     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
457     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
458     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
459   }
460 
461   if (Subtarget.hasStdExtF()) {
462     setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
463     setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
464 
465     setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal);
466     setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal);
467     setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal);
468     setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal);
469 
470     setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
471     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
472   }
473 
474   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
475   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
476   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
477   setOperationAction(ISD::JumpTable, XLenVT, Custom);
478 
479   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
480 
481   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
482   // Unfortunately this can't be determined just from the ISA naming string.
483   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
484                      Subtarget.is64Bit() ? Legal : Custom);
485 
486   setOperationAction(ISD::TRAP, MVT::Other, Legal);
487   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
488   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
489   if (Subtarget.is64Bit())
490     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
491 
492   if (Subtarget.hasStdExtA()) {
493     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
494     setMinCmpXchgSizeInBits(32);
495   } else {
496     setMaxAtomicSizeInBitsSupported(0);
497   }
498 
499   setBooleanContents(ZeroOrOneBooleanContent);
500 
501   if (Subtarget.hasVInstructions()) {
502     setBooleanVectorContents(ZeroOrOneBooleanContent);
503 
504     setOperationAction(ISD::VSCALE, XLenVT, Custom);
505 
506     // RVV intrinsics may have illegal operands.
507     // We also need to custom legalize vmv.x.s.
508     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
509     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
510     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
511     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
512     if (Subtarget.is64Bit()) {
513       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
514     } else {
515       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
516       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
517     }
518 
519     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
520     setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
521 
522     static const unsigned IntegerVPOps[] = {
523         ISD::VP_ADD,         ISD::VP_SUB,         ISD::VP_MUL,
524         ISD::VP_SDIV,        ISD::VP_UDIV,        ISD::VP_SREM,
525         ISD::VP_UREM,        ISD::VP_AND,         ISD::VP_OR,
526         ISD::VP_XOR,         ISD::VP_ASHR,        ISD::VP_LSHR,
527         ISD::VP_SHL,         ISD::VP_REDUCE_ADD,  ISD::VP_REDUCE_AND,
528         ISD::VP_REDUCE_OR,   ISD::VP_REDUCE_XOR,  ISD::VP_REDUCE_SMAX,
529         ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
530         ISD::VP_MERGE,       ISD::VP_SELECT};
531 
532     static const unsigned FloatingPointVPOps[] = {
533         ISD::VP_FADD,        ISD::VP_FSUB,        ISD::VP_FMUL,
534         ISD::VP_FDIV,        ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
535         ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
536         ISD::VP_SELECT};
537 
538     if (!Subtarget.is64Bit()) {
539       // We must custom-lower certain vXi64 operations on RV32 due to the vector
540       // element type being illegal.
541       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
542       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
543 
544       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
545       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
546       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
547       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
548       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
549       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
550       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
551       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
552 
553       setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
554       setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
555       setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
556       setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
557       setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
558       setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
559       setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
560       setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
561     }
562 
563     for (MVT VT : BoolVecVTs) {
564       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
565 
566       // Mask VTs are custom-expanded into a series of standard nodes
567       setOperationAction(ISD::TRUNCATE, VT, Custom);
568       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
569       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
570       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
571 
572       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
573       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
574 
575       setOperationAction(ISD::SELECT, VT, Custom);
576       setOperationAction(ISD::SELECT_CC, VT, Expand);
577       setOperationAction(ISD::VSELECT, VT, Expand);
578       setOperationAction(ISD::VP_MERGE, VT, Expand);
579       setOperationAction(ISD::VP_SELECT, VT, Expand);
580 
581       setOperationAction(ISD::VP_AND, VT, Custom);
582       setOperationAction(ISD::VP_OR, VT, Custom);
583       setOperationAction(ISD::VP_XOR, VT, Custom);
584 
585       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
586       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
587       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
588 
589       setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
590       setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
591       setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
592 
593       // RVV has native int->float & float->int conversions where the
594       // element type sizes are within one power-of-two of each other. Any
595       // wider distances between type sizes have to be lowered as sequences
596       // which progressively narrow the gap in stages.
597       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
598       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
599       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
600       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
601 
602       // Expand all extending loads to types larger than this, and truncating
603       // stores from types larger than this.
604       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
605         setTruncStoreAction(OtherVT, VT, Expand);
606         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
607         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
608         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
609       }
610     }
611 
612     for (MVT VT : IntVecVTs) {
613       if (VT.getVectorElementType() == MVT::i64 &&
614           !Subtarget.hasVInstructionsI64())
615         continue;
616 
617       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
618       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
619 
620       // Vectors implement MULHS/MULHU.
621       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
622       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
623 
624       // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
625       if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
626         setOperationAction(ISD::MULHU, VT, Expand);
627         setOperationAction(ISD::MULHS, VT, Expand);
628       }
629 
630       setOperationAction(ISD::SMIN, VT, Legal);
631       setOperationAction(ISD::SMAX, VT, Legal);
632       setOperationAction(ISD::UMIN, VT, Legal);
633       setOperationAction(ISD::UMAX, VT, Legal);
634 
635       setOperationAction(ISD::ROTL, VT, Expand);
636       setOperationAction(ISD::ROTR, VT, Expand);
637 
638       setOperationAction(ISD::CTTZ, VT, Expand);
639       setOperationAction(ISD::CTLZ, VT, Expand);
640       setOperationAction(ISD::CTPOP, VT, Expand);
641 
642       setOperationAction(ISD::BSWAP, VT, Expand);
643 
644       // Custom-lower extensions and truncations from/to mask types.
645       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
646       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
647       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
648 
649       // RVV has native int->float & float->int conversions where the
650       // element type sizes are within one power-of-two of each other. Any
651       // wider distances between type sizes have to be lowered as sequences
652       // which progressively narrow the gap in stages.
653       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
654       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
655       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
656       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
657 
658       setOperationAction(ISD::SADDSAT, VT, Legal);
659       setOperationAction(ISD::UADDSAT, VT, Legal);
660       setOperationAction(ISD::SSUBSAT, VT, Legal);
661       setOperationAction(ISD::USUBSAT, VT, Legal);
662 
663       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
664       // nodes which truncate by one power of two at a time.
665       setOperationAction(ISD::TRUNCATE, VT, Custom);
666 
667       // Custom-lower insert/extract operations to simplify patterns.
668       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
669       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
670 
671       // Custom-lower reduction operations to set up the corresponding custom
672       // nodes' operands.
673       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
674       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
675       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
676       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
677       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
678       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
679       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
680       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
681 
682       for (unsigned VPOpc : IntegerVPOps)
683         setOperationAction(VPOpc, VT, Custom);
684 
685       setOperationAction(ISD::LOAD, VT, Custom);
686       setOperationAction(ISD::STORE, VT, Custom);
687 
688       setOperationAction(ISD::MLOAD, VT, Custom);
689       setOperationAction(ISD::MSTORE, VT, Custom);
690       setOperationAction(ISD::MGATHER, VT, Custom);
691       setOperationAction(ISD::MSCATTER, VT, Custom);
692 
693       setOperationAction(ISD::VP_LOAD, VT, Custom);
694       setOperationAction(ISD::VP_STORE, VT, Custom);
695       setOperationAction(ISD::VP_GATHER, VT, Custom);
696       setOperationAction(ISD::VP_SCATTER, VT, Custom);
697 
698       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
699       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
700       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
701 
702       setOperationAction(ISD::SELECT, VT, Custom);
703       setOperationAction(ISD::SELECT_CC, VT, Expand);
704 
705       setOperationAction(ISD::STEP_VECTOR, VT, Custom);
706       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
707 
708       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
709         setTruncStoreAction(VT, OtherVT, Expand);
710         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
711         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
712         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
713       }
714 
715       // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
716       // type that can represent the value exactly.
717       if (VT.getVectorElementType() != MVT::i64) {
718         MVT FloatEltVT =
719             VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
720         EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
721         if (isTypeLegal(FloatVT)) {
722           setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
723           setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
724         }
725       }
726     }
727 
728     // Expand various CCs to best match the RVV ISA, which natively supports UNE
729     // but no other unordered comparisons, and supports all ordered comparisons
730     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
731     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
732     // and we pattern-match those back to the "original", swapping operands once
733     // more. This way we catch both operations and both "vf" and "fv" forms with
734     // fewer patterns.
735     static const ISD::CondCode VFPCCToExpand[] = {
736         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
737         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
738         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
739     };
740 
741     // Sets common operation actions on RVV floating-point vector types.
742     const auto SetCommonVFPActions = [&](MVT VT) {
743       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
744       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
745       // sizes are within one power-of-two of each other. Therefore conversions
746       // between vXf16 and vXf64 must be lowered as sequences which convert via
747       // vXf32.
748       setOperationAction(ISD::FP_ROUND, VT, Custom);
749       setOperationAction(ISD::FP_EXTEND, VT, Custom);
750       // Custom-lower insert/extract operations to simplify patterns.
751       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
752       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
753       // Expand various condition codes (explained above).
754       for (auto CC : VFPCCToExpand)
755         setCondCodeAction(CC, VT, Expand);
756 
757       setOperationAction(ISD::FMINNUM, VT, Legal);
758       setOperationAction(ISD::FMAXNUM, VT, Legal);
759 
760       setOperationAction(ISD::FTRUNC, VT, Custom);
761       setOperationAction(ISD::FCEIL, VT, Custom);
762       setOperationAction(ISD::FFLOOR, VT, Custom);
763       setOperationAction(ISD::FROUND, VT, Custom);
764 
765       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
766       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
767       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
768       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
769 
770       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
771 
772       setOperationAction(ISD::LOAD, VT, Custom);
773       setOperationAction(ISD::STORE, VT, Custom);
774 
775       setOperationAction(ISD::MLOAD, VT, Custom);
776       setOperationAction(ISD::MSTORE, VT, Custom);
777       setOperationAction(ISD::MGATHER, VT, Custom);
778       setOperationAction(ISD::MSCATTER, VT, Custom);
779 
780       setOperationAction(ISD::VP_LOAD, VT, Custom);
781       setOperationAction(ISD::VP_STORE, VT, Custom);
782       setOperationAction(ISD::VP_GATHER, VT, Custom);
783       setOperationAction(ISD::VP_SCATTER, VT, Custom);
784 
785       setOperationAction(ISD::SELECT, VT, Custom);
786       setOperationAction(ISD::SELECT_CC, VT, Expand);
787 
788       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
789       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
790       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
791 
792       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
793 
794       for (unsigned VPOpc : FloatingPointVPOps)
795         setOperationAction(VPOpc, VT, Custom);
796     };
797 
798     // Sets common extload/truncstore actions on RVV floating-point vector
799     // types.
800     const auto SetCommonVFPExtLoadTruncStoreActions =
801         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
802           for (auto SmallVT : SmallerVTs) {
803             setTruncStoreAction(VT, SmallVT, Expand);
804             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
805           }
806         };
807 
808     if (Subtarget.hasVInstructionsF16())
809       for (MVT VT : F16VecVTs)
810         SetCommonVFPActions(VT);
811 
812     for (MVT VT : F32VecVTs) {
813       if (Subtarget.hasVInstructionsF32())
814         SetCommonVFPActions(VT);
815       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
816     }
817 
818     for (MVT VT : F64VecVTs) {
819       if (Subtarget.hasVInstructionsF64())
820         SetCommonVFPActions(VT);
821       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
822       SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
823     }
824 
825     if (Subtarget.useRVVForFixedLengthVectors()) {
826       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
827         if (!useRVVForFixedLengthVectorVT(VT))
828           continue;
829 
830         // By default everything must be expanded.
831         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
832           setOperationAction(Op, VT, Expand);
833         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
834           setTruncStoreAction(VT, OtherVT, Expand);
835           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
836           setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
837           setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
838         }
839 
840         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
841         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
842         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
843 
844         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
845         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
846 
847         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
848         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
849 
850         setOperationAction(ISD::LOAD, VT, Custom);
851         setOperationAction(ISD::STORE, VT, Custom);
852 
853         setOperationAction(ISD::SETCC, VT, Custom);
854 
855         setOperationAction(ISD::SELECT, VT, Custom);
856 
857         setOperationAction(ISD::TRUNCATE, VT, Custom);
858 
859         setOperationAction(ISD::BITCAST, VT, Custom);
860 
861         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
862         setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
863         setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
864 
865         setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
866         setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
867         setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
868 
869         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
870         setOperationAction(ISD::UINT_TO_FP, VT, Custom);
871         setOperationAction(ISD::FP_TO_SINT, VT, Custom);
872         setOperationAction(ISD::FP_TO_UINT, VT, Custom);
873 
874         // Operations below are different for between masks and other vectors.
875         if (VT.getVectorElementType() == MVT::i1) {
876           setOperationAction(ISD::VP_AND, VT, Custom);
877           setOperationAction(ISD::VP_OR, VT, Custom);
878           setOperationAction(ISD::VP_XOR, VT, Custom);
879           setOperationAction(ISD::AND, VT, Custom);
880           setOperationAction(ISD::OR, VT, Custom);
881           setOperationAction(ISD::XOR, VT, Custom);
882           continue;
883         }
884 
885         // Use SPLAT_VECTOR to prevent type legalization from destroying the
886         // splats when type legalizing i64 scalar on RV32.
887         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
888         // improvements first.
889         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
890           setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
891           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
892         }
893 
894         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
895         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
896 
897         setOperationAction(ISD::MLOAD, VT, Custom);
898         setOperationAction(ISD::MSTORE, VT, Custom);
899         setOperationAction(ISD::MGATHER, VT, Custom);
900         setOperationAction(ISD::MSCATTER, VT, Custom);
901 
902         setOperationAction(ISD::VP_LOAD, VT, Custom);
903         setOperationAction(ISD::VP_STORE, VT, Custom);
904         setOperationAction(ISD::VP_GATHER, VT, Custom);
905         setOperationAction(ISD::VP_SCATTER, VT, Custom);
906 
907         setOperationAction(ISD::ADD, VT, Custom);
908         setOperationAction(ISD::MUL, VT, Custom);
909         setOperationAction(ISD::SUB, VT, Custom);
910         setOperationAction(ISD::AND, VT, Custom);
911         setOperationAction(ISD::OR, VT, Custom);
912         setOperationAction(ISD::XOR, VT, Custom);
913         setOperationAction(ISD::SDIV, VT, Custom);
914         setOperationAction(ISD::SREM, VT, Custom);
915         setOperationAction(ISD::UDIV, VT, Custom);
916         setOperationAction(ISD::UREM, VT, Custom);
917         setOperationAction(ISD::SHL, VT, Custom);
918         setOperationAction(ISD::SRA, VT, Custom);
919         setOperationAction(ISD::SRL, VT, Custom);
920 
921         setOperationAction(ISD::SMIN, VT, Custom);
922         setOperationAction(ISD::SMAX, VT, Custom);
923         setOperationAction(ISD::UMIN, VT, Custom);
924         setOperationAction(ISD::UMAX, VT, Custom);
925         setOperationAction(ISD::ABS,  VT, Custom);
926 
927         // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
928         if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) {
929           setOperationAction(ISD::MULHS, VT, Custom);
930           setOperationAction(ISD::MULHU, VT, Custom);
931         }
932 
933         setOperationAction(ISD::SADDSAT, VT, Custom);
934         setOperationAction(ISD::UADDSAT, VT, Custom);
935         setOperationAction(ISD::SSUBSAT, VT, Custom);
936         setOperationAction(ISD::USUBSAT, VT, Custom);
937 
938         setOperationAction(ISD::VSELECT, VT, Custom);
939         setOperationAction(ISD::SELECT_CC, VT, Expand);
940 
941         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
942         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
943         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
944 
945         // Custom-lower reduction operations to set up the corresponding custom
946         // nodes' operands.
947         setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
948         setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
949         setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
950         setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
951         setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
952 
953         for (unsigned VPOpc : IntegerVPOps)
954           setOperationAction(VPOpc, VT, Custom);
955 
956         // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
957         // type that can represent the value exactly.
958         if (VT.getVectorElementType() != MVT::i64) {
959           MVT FloatEltVT =
960               VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
961           EVT FloatVT =
962               MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
963           if (isTypeLegal(FloatVT)) {
964             setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
965             setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
966           }
967         }
968       }
969 
970       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
971         if (!useRVVForFixedLengthVectorVT(VT))
972           continue;
973 
974         // By default everything must be expanded.
975         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
976           setOperationAction(Op, VT, Expand);
977         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
978           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
979           setTruncStoreAction(VT, OtherVT, Expand);
980         }
981 
982         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
983         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
984         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
985 
986         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
987         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
988         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
989         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
990         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
991 
992         setOperationAction(ISD::LOAD, VT, Custom);
993         setOperationAction(ISD::STORE, VT, Custom);
994         setOperationAction(ISD::MLOAD, VT, Custom);
995         setOperationAction(ISD::MSTORE, VT, Custom);
996         setOperationAction(ISD::MGATHER, VT, Custom);
997         setOperationAction(ISD::MSCATTER, VT, Custom);
998 
999         setOperationAction(ISD::VP_LOAD, VT, Custom);
1000         setOperationAction(ISD::VP_STORE, VT, Custom);
1001         setOperationAction(ISD::VP_GATHER, VT, Custom);
1002         setOperationAction(ISD::VP_SCATTER, VT, Custom);
1003 
1004         setOperationAction(ISD::FADD, VT, Custom);
1005         setOperationAction(ISD::FSUB, VT, Custom);
1006         setOperationAction(ISD::FMUL, VT, Custom);
1007         setOperationAction(ISD::FDIV, VT, Custom);
1008         setOperationAction(ISD::FNEG, VT, Custom);
1009         setOperationAction(ISD::FABS, VT, Custom);
1010         setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1011         setOperationAction(ISD::FSQRT, VT, Custom);
1012         setOperationAction(ISD::FMA, VT, Custom);
1013         setOperationAction(ISD::FMINNUM, VT, Custom);
1014         setOperationAction(ISD::FMAXNUM, VT, Custom);
1015 
1016         setOperationAction(ISD::FP_ROUND, VT, Custom);
1017         setOperationAction(ISD::FP_EXTEND, VT, Custom);
1018 
1019         setOperationAction(ISD::FTRUNC, VT, Custom);
1020         setOperationAction(ISD::FCEIL, VT, Custom);
1021         setOperationAction(ISD::FFLOOR, VT, Custom);
1022         setOperationAction(ISD::FROUND, VT, Custom);
1023 
1024         for (auto CC : VFPCCToExpand)
1025           setCondCodeAction(CC, VT, Expand);
1026 
1027         setOperationAction(ISD::VSELECT, VT, Custom);
1028         setOperationAction(ISD::SELECT, VT, Custom);
1029         setOperationAction(ISD::SELECT_CC, VT, Expand);
1030 
1031         setOperationAction(ISD::BITCAST, VT, Custom);
1032 
1033         setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1034         setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1035         setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1036         setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1037 
1038         for (unsigned VPOpc : FloatingPointVPOps)
1039           setOperationAction(VPOpc, VT, Custom);
1040       }
1041 
1042       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1043       setOperationAction(ISD::BITCAST, MVT::i8, Custom);
1044       setOperationAction(ISD::BITCAST, MVT::i16, Custom);
1045       setOperationAction(ISD::BITCAST, MVT::i32, Custom);
1046       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1047       if (Subtarget.hasStdExtZfh())
1048         setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1049       if (Subtarget.hasStdExtF())
1050         setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1051       if (Subtarget.hasStdExtD())
1052         setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1053     }
1054   }
1055 
1056   // Function alignments.
1057   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
1058   setMinFunctionAlignment(FunctionAlignment);
1059   setPrefFunctionAlignment(FunctionAlignment);
1060 
1061   setMinimumJumpTableEntries(5);
1062 
1063   // Jumps are expensive, compared to logic
1064   setJumpIsExpensive();
1065 
1066   setTargetDAGCombine(ISD::ADD);
1067   setTargetDAGCombine(ISD::SUB);
1068   setTargetDAGCombine(ISD::AND);
1069   setTargetDAGCombine(ISD::OR);
1070   setTargetDAGCombine(ISD::XOR);
1071   setTargetDAGCombine(ISD::ANY_EXTEND);
1072   if (Subtarget.hasStdExtF()) {
1073     setTargetDAGCombine(ISD::ZERO_EXTEND);
1074     setTargetDAGCombine(ISD::FP_TO_SINT);
1075     setTargetDAGCombine(ISD::FP_TO_UINT);
1076     setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
1077     setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
1078   }
1079   if (Subtarget.hasVInstructions()) {
1080     setTargetDAGCombine(ISD::FCOPYSIGN);
1081     setTargetDAGCombine(ISD::MGATHER);
1082     setTargetDAGCombine(ISD::MSCATTER);
1083     setTargetDAGCombine(ISD::VP_GATHER);
1084     setTargetDAGCombine(ISD::VP_SCATTER);
1085     setTargetDAGCombine(ISD::SRA);
1086     setTargetDAGCombine(ISD::SRL);
1087     setTargetDAGCombine(ISD::SHL);
1088     setTargetDAGCombine(ISD::STORE);
1089   }
1090 
1091   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1092   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1093 }
1094 
1095 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1096                                             LLVMContext &Context,
1097                                             EVT VT) const {
1098   if (!VT.isVector())
1099     return getPointerTy(DL);
1100   if (Subtarget.hasVInstructions() &&
1101       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1102     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1103   return VT.changeVectorElementTypeToInteger();
1104 }
1105 
1106 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1107   return Subtarget.getXLenVT();
1108 }
1109 
1110 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1111                                              const CallInst &I,
1112                                              MachineFunction &MF,
1113                                              unsigned Intrinsic) const {
1114   auto &DL = I.getModule()->getDataLayout();
1115   switch (Intrinsic) {
1116   default:
1117     return false;
1118   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1119   case Intrinsic::riscv_masked_atomicrmw_add_i32:
1120   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1121   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1122   case Intrinsic::riscv_masked_atomicrmw_max_i32:
1123   case Intrinsic::riscv_masked_atomicrmw_min_i32:
1124   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1125   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1126   case Intrinsic::riscv_masked_cmpxchg_i32:
1127     Info.opc = ISD::INTRINSIC_W_CHAIN;
1128     Info.memVT = MVT::i32;
1129     Info.ptrVal = I.getArgOperand(0);
1130     Info.offset = 0;
1131     Info.align = Align(4);
1132     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1133                  MachineMemOperand::MOVolatile;
1134     return true;
1135   case Intrinsic::riscv_masked_strided_load:
1136     Info.opc = ISD::INTRINSIC_W_CHAIN;
1137     Info.ptrVal = I.getArgOperand(1);
1138     Info.memVT = getValueType(DL, I.getType()->getScalarType());
1139     Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
1140     Info.size = MemoryLocation::UnknownSize;
1141     Info.flags |= MachineMemOperand::MOLoad;
1142     return true;
1143   case Intrinsic::riscv_masked_strided_store:
1144     Info.opc = ISD::INTRINSIC_VOID;
1145     Info.ptrVal = I.getArgOperand(1);
1146     Info.memVT =
1147         getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
1148     Info.align = Align(
1149         DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
1150         8);
1151     Info.size = MemoryLocation::UnknownSize;
1152     Info.flags |= MachineMemOperand::MOStore;
1153     return true;
1154   }
1155 }
1156 
1157 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1158                                                 const AddrMode &AM, Type *Ty,
1159                                                 unsigned AS,
1160                                                 Instruction *I) const {
1161   // No global is ever allowed as a base.
1162   if (AM.BaseGV)
1163     return false;
1164 
1165   // Require a 12-bit signed offset.
1166   if (!isInt<12>(AM.BaseOffs))
1167     return false;
1168 
1169   switch (AM.Scale) {
1170   case 0: // "r+i" or just "i", depending on HasBaseReg.
1171     break;
1172   case 1:
1173     if (!AM.HasBaseReg) // allow "r+i".
1174       break;
1175     return false; // disallow "r+r" or "r+r+i".
1176   default:
1177     return false;
1178   }
1179 
1180   return true;
1181 }
1182 
1183 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1184   return isInt<12>(Imm);
1185 }
1186 
1187 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1188   return isInt<12>(Imm);
1189 }
1190 
1191 // On RV32, 64-bit integers are split into their high and low parts and held
1192 // in two different registers, so the trunc is free since the low register can
1193 // just be used.
1194 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1195   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1196     return false;
1197   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1198   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1199   return (SrcBits == 64 && DestBits == 32);
1200 }
1201 
1202 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1203   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1204       !SrcVT.isInteger() || !DstVT.isInteger())
1205     return false;
1206   unsigned SrcBits = SrcVT.getSizeInBits();
1207   unsigned DestBits = DstVT.getSizeInBits();
1208   return (SrcBits == 64 && DestBits == 32);
1209 }
1210 
1211 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1212   // Zexts are free if they can be combined with a load.
1213   // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1214   // poorly with type legalization of compares preferring sext.
1215   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1216     EVT MemVT = LD->getMemoryVT();
1217     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1218         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1219          LD->getExtensionType() == ISD::ZEXTLOAD))
1220       return true;
1221   }
1222 
1223   return TargetLowering::isZExtFree(Val, VT2);
1224 }
1225 
1226 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1227   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1228 }
1229 
1230 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
1231   return Subtarget.hasStdExtZbb();
1232 }
1233 
1234 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
1235   return Subtarget.hasStdExtZbb();
1236 }
1237 
1238 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1239   EVT VT = Y.getValueType();
1240 
1241   // FIXME: Support vectors once we have tests.
1242   if (VT.isVector())
1243     return false;
1244 
1245   return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
1246           Subtarget.hasStdExtZbkb()) &&
1247          !isa<ConstantSDNode>(Y);
1248 }
1249 
1250 /// Check if sinking \p I's operands to I's basic block is profitable, because
1251 /// the operands can be folded into a target instruction, e.g.
1252 /// splats of scalars can fold into vector instructions.
1253 bool RISCVTargetLowering::shouldSinkOperands(
1254     Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1255   using namespace llvm::PatternMatch;
1256 
1257   if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1258     return false;
1259 
1260   auto IsSinker = [&](Instruction *I, int Operand) {
1261     switch (I->getOpcode()) {
1262     case Instruction::Add:
1263     case Instruction::Sub:
1264     case Instruction::Mul:
1265     case Instruction::And:
1266     case Instruction::Or:
1267     case Instruction::Xor:
1268     case Instruction::FAdd:
1269     case Instruction::FSub:
1270     case Instruction::FMul:
1271     case Instruction::FDiv:
1272     case Instruction::ICmp:
1273     case Instruction::FCmp:
1274       return true;
1275     case Instruction::Shl:
1276     case Instruction::LShr:
1277     case Instruction::AShr:
1278     case Instruction::UDiv:
1279     case Instruction::SDiv:
1280     case Instruction::URem:
1281     case Instruction::SRem:
1282       return Operand == 1;
1283     case Instruction::Call:
1284       if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1285         switch (II->getIntrinsicID()) {
1286         case Intrinsic::fma:
1287           return Operand == 0 || Operand == 1;
1288         // FIXME: Our patterns can only match vx/vf instructions when the splat
1289         // it on the RHS, because TableGen doesn't recognize our VP operations
1290         // as commutative.
1291         case Intrinsic::vp_add:
1292         case Intrinsic::vp_mul:
1293         case Intrinsic::vp_and:
1294         case Intrinsic::vp_or:
1295         case Intrinsic::vp_xor:
1296         case Intrinsic::vp_fadd:
1297         case Intrinsic::vp_fmul:
1298         case Intrinsic::vp_shl:
1299         case Intrinsic::vp_lshr:
1300         case Intrinsic::vp_ashr:
1301         case Intrinsic::vp_udiv:
1302         case Intrinsic::vp_sdiv:
1303         case Intrinsic::vp_urem:
1304         case Intrinsic::vp_srem:
1305           return Operand == 1;
1306         // ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have
1307         // explicit patterns for both LHS and RHS (as 'vr' versions).
1308         case Intrinsic::vp_sub:
1309         case Intrinsic::vp_fsub:
1310         case Intrinsic::vp_fdiv:
1311           return Operand == 0 || Operand == 1;
1312         default:
1313           return false;
1314         }
1315       }
1316       return false;
1317     default:
1318       return false;
1319     }
1320   };
1321 
1322   for (auto OpIdx : enumerate(I->operands())) {
1323     if (!IsSinker(I, OpIdx.index()))
1324       continue;
1325 
1326     Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1327     // Make sure we are not already sinking this operand
1328     if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1329       continue;
1330 
1331     // We are looking for a splat that can be sunk.
1332     if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1333                              m_Undef(), m_ZeroMask())))
1334       continue;
1335 
1336     // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1337     // and vector registers
1338     for (Use &U : Op->uses()) {
1339       Instruction *Insn = cast<Instruction>(U.getUser());
1340       if (!IsSinker(Insn, U.getOperandNo()))
1341         return false;
1342     }
1343 
1344     Ops.push_back(&Op->getOperandUse(0));
1345     Ops.push_back(&OpIdx.value());
1346   }
1347   return true;
1348 }
1349 
1350 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1351                                        bool ForCodeSize) const {
1352   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1353   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1354     return false;
1355   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1356     return false;
1357   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1358     return false;
1359   return Imm.isZero();
1360 }
1361 
1362 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
1363   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1364          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1365          (VT == MVT::f64 && Subtarget.hasStdExtD());
1366 }
1367 
1368 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1369                                                       CallingConv::ID CC,
1370                                                       EVT VT) const {
1371   // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1372   // We might still end up using a GPR but that will be decided based on ABI.
1373   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1374   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1375     return MVT::f32;
1376 
1377   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1378 }
1379 
1380 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1381                                                            CallingConv::ID CC,
1382                                                            EVT VT) const {
1383   // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1384   // We might still end up using a GPR but that will be decided based on ABI.
1385   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1386   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1387     return 1;
1388 
1389   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1390 }
1391 
1392 // Changes the condition code and swaps operands if necessary, so the SetCC
1393 // operation matches one of the comparisons supported directly by branches
1394 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1395 // with 1/-1.
1396 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1397                                     ISD::CondCode &CC, SelectionDAG &DAG) {
1398   // Convert X > -1 to X >= 0.
1399   if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1400     RHS = DAG.getConstant(0, DL, RHS.getValueType());
1401     CC = ISD::SETGE;
1402     return;
1403   }
1404   // Convert X < 1 to 0 >= X.
1405   if (CC == ISD::SETLT && isOneConstant(RHS)) {
1406     RHS = LHS;
1407     LHS = DAG.getConstant(0, DL, RHS.getValueType());
1408     CC = ISD::SETGE;
1409     return;
1410   }
1411 
1412   switch (CC) {
1413   default:
1414     break;
1415   case ISD::SETGT:
1416   case ISD::SETLE:
1417   case ISD::SETUGT:
1418   case ISD::SETULE:
1419     CC = ISD::getSetCCSwappedOperands(CC);
1420     std::swap(LHS, RHS);
1421     break;
1422   }
1423 }
1424 
1425 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
1426   assert(VT.isScalableVector() && "Expecting a scalable vector type");
1427   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1428   if (VT.getVectorElementType() == MVT::i1)
1429     KnownSize *= 8;
1430 
1431   switch (KnownSize) {
1432   default:
1433     llvm_unreachable("Invalid LMUL.");
1434   case 8:
1435     return RISCVII::VLMUL::LMUL_F8;
1436   case 16:
1437     return RISCVII::VLMUL::LMUL_F4;
1438   case 32:
1439     return RISCVII::VLMUL::LMUL_F2;
1440   case 64:
1441     return RISCVII::VLMUL::LMUL_1;
1442   case 128:
1443     return RISCVII::VLMUL::LMUL_2;
1444   case 256:
1445     return RISCVII::VLMUL::LMUL_4;
1446   case 512:
1447     return RISCVII::VLMUL::LMUL_8;
1448   }
1449 }
1450 
1451 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
1452   switch (LMul) {
1453   default:
1454     llvm_unreachable("Invalid LMUL.");
1455   case RISCVII::VLMUL::LMUL_F8:
1456   case RISCVII::VLMUL::LMUL_F4:
1457   case RISCVII::VLMUL::LMUL_F2:
1458   case RISCVII::VLMUL::LMUL_1:
1459     return RISCV::VRRegClassID;
1460   case RISCVII::VLMUL::LMUL_2:
1461     return RISCV::VRM2RegClassID;
1462   case RISCVII::VLMUL::LMUL_4:
1463     return RISCV::VRM4RegClassID;
1464   case RISCVII::VLMUL::LMUL_8:
1465     return RISCV::VRM8RegClassID;
1466   }
1467 }
1468 
1469 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
1470   RISCVII::VLMUL LMUL = getLMUL(VT);
1471   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1472       LMUL == RISCVII::VLMUL::LMUL_F4 ||
1473       LMUL == RISCVII::VLMUL::LMUL_F2 ||
1474       LMUL == RISCVII::VLMUL::LMUL_1) {
1475     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1476                   "Unexpected subreg numbering");
1477     return RISCV::sub_vrm1_0 + Index;
1478   }
1479   if (LMUL == RISCVII::VLMUL::LMUL_2) {
1480     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1481                   "Unexpected subreg numbering");
1482     return RISCV::sub_vrm2_0 + Index;
1483   }
1484   if (LMUL == RISCVII::VLMUL::LMUL_4) {
1485     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1486                   "Unexpected subreg numbering");
1487     return RISCV::sub_vrm4_0 + Index;
1488   }
1489   llvm_unreachable("Invalid vector type.");
1490 }
1491 
1492 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
1493   if (VT.getVectorElementType() == MVT::i1)
1494     return RISCV::VRRegClassID;
1495   return getRegClassIDForLMUL(getLMUL(VT));
1496 }
1497 
1498 // Attempt to decompose a subvector insert/extract between VecVT and
1499 // SubVecVT via subregister indices. Returns the subregister index that
1500 // can perform the subvector insert/extract with the given element index, as
1501 // well as the index corresponding to any leftover subvectors that must be
1502 // further inserted/extracted within the register class for SubVecVT.
1503 std::pair<unsigned, unsigned>
1504 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1505     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1506     const RISCVRegisterInfo *TRI) {
1507   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1508                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1509                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1510                 "Register classes not ordered");
1511   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1512   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1513   // Try to compose a subregister index that takes us from the incoming
1514   // LMUL>1 register class down to the outgoing one. At each step we half
1515   // the LMUL:
1516   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1517   // Note that this is not guaranteed to find a subregister index, such as
1518   // when we are extracting from one VR type to another.
1519   unsigned SubRegIdx = RISCV::NoSubRegister;
1520   for (const unsigned RCID :
1521        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1522     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1523       VecVT = VecVT.getHalfNumVectorElementsVT();
1524       bool IsHi =
1525           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1526       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1527                                             getSubregIndexByMVT(VecVT, IsHi));
1528       if (IsHi)
1529         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1530     }
1531   return {SubRegIdx, InsertExtractIdx};
1532 }
1533 
1534 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1535 // stores for those types.
1536 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1537   return !Subtarget.useRVVForFixedLengthVectors() ||
1538          (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
1539 }
1540 
1541 bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
1542   if (ScalarTy->isPointerTy())
1543     return true;
1544 
1545   if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1546       ScalarTy->isIntegerTy(32))
1547     return true;
1548 
1549   if (ScalarTy->isIntegerTy(64))
1550     return Subtarget.hasVInstructionsI64();
1551 
1552   if (ScalarTy->isHalfTy())
1553     return Subtarget.hasVInstructionsF16();
1554   if (ScalarTy->isFloatTy())
1555     return Subtarget.hasVInstructionsF32();
1556   if (ScalarTy->isDoubleTy())
1557     return Subtarget.hasVInstructionsF64();
1558 
1559   return false;
1560 }
1561 
1562 static SDValue getVLOperand(SDValue Op) {
1563   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1564           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
1565          "Unexpected opcode");
1566   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
1567   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
1568   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1569       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
1570   if (!II)
1571     return SDValue();
1572   return Op.getOperand(II->VLOperand + 1 + HasChain);
1573 }
1574 
1575 static bool useRVVForFixedLengthVectorVT(MVT VT,
1576                                          const RISCVSubtarget &Subtarget) {
1577   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1578   if (!Subtarget.useRVVForFixedLengthVectors())
1579     return false;
1580 
1581   // We only support a set of vector types with a consistent maximum fixed size
1582   // across all supported vector element types to avoid legalization issues.
1583   // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1584   // fixed-length vector type we support is 1024 bytes.
1585   if (VT.getFixedSizeInBits() > 1024 * 8)
1586     return false;
1587 
1588   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1589 
1590   MVT EltVT = VT.getVectorElementType();
1591 
1592   // Don't use RVV for vectors we cannot scalarize if required.
1593   switch (EltVT.SimpleTy) {
1594   // i1 is supported but has different rules.
1595   default:
1596     return false;
1597   case MVT::i1:
1598     // Masks can only use a single register.
1599     if (VT.getVectorNumElements() > MinVLen)
1600       return false;
1601     MinVLen /= 8;
1602     break;
1603   case MVT::i8:
1604   case MVT::i16:
1605   case MVT::i32:
1606     break;
1607   case MVT::i64:
1608     if (!Subtarget.hasVInstructionsI64())
1609       return false;
1610     break;
1611   case MVT::f16:
1612     if (!Subtarget.hasVInstructionsF16())
1613       return false;
1614     break;
1615   case MVT::f32:
1616     if (!Subtarget.hasVInstructionsF32())
1617       return false;
1618     break;
1619   case MVT::f64:
1620     if (!Subtarget.hasVInstructionsF64())
1621       return false;
1622     break;
1623   }
1624 
1625   // Reject elements larger than ELEN.
1626   if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
1627     return false;
1628 
1629   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1630   // Don't use RVV for types that don't fit.
1631   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1632     return false;
1633 
1634   // TODO: Perhaps an artificial restriction, but worth having whilst getting
1635   // the base fixed length RVV support in place.
1636   if (!VT.isPow2VectorType())
1637     return false;
1638 
1639   return true;
1640 }
1641 
1642 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1643   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1644 }
1645 
1646 // Return the largest legal scalable vector type that matches VT's element type.
1647 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
1648                                             const RISCVSubtarget &Subtarget) {
1649   // This may be called before legal types are setup.
1650   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1651           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1652          "Expected legal fixed length vector!");
1653 
1654   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1655   unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
1656 
1657   MVT EltVT = VT.getVectorElementType();
1658   switch (EltVT.SimpleTy) {
1659   default:
1660     llvm_unreachable("unexpected element type for RVV container");
1661   case MVT::i1:
1662   case MVT::i8:
1663   case MVT::i16:
1664   case MVT::i32:
1665   case MVT::i64:
1666   case MVT::f16:
1667   case MVT::f32:
1668   case MVT::f64: {
1669     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1670     // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1671     // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1672     unsigned NumElts =
1673         (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1674     NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1675     assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1676     return MVT::getScalableVectorVT(EltVT, NumElts);
1677   }
1678   }
1679 }
1680 
1681 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
1682                                             const RISCVSubtarget &Subtarget) {
1683   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1684                                           Subtarget);
1685 }
1686 
1687 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1688   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
1689 }
1690 
1691 // Grow V to consume an entire RVV register.
1692 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1693                                        const RISCVSubtarget &Subtarget) {
1694   assert(VT.isScalableVector() &&
1695          "Expected to convert into a scalable vector!");
1696   assert(V.getValueType().isFixedLengthVector() &&
1697          "Expected a fixed length vector operand!");
1698   SDLoc DL(V);
1699   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1700   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1701 }
1702 
1703 // Shrink V so it's just big enough to maintain a VT's worth of data.
1704 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1705                                          const RISCVSubtarget &Subtarget) {
1706   assert(VT.isFixedLengthVector() &&
1707          "Expected to convert into a fixed length vector!");
1708   assert(V.getValueType().isScalableVector() &&
1709          "Expected a scalable vector operand!");
1710   SDLoc DL(V);
1711   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1712   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1713 }
1714 
1715 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1716 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1717 // the vector type that it is contained in.
1718 static std::pair<SDValue, SDValue>
1719 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1720                 const RISCVSubtarget &Subtarget) {
1721   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1722   MVT XLenVT = Subtarget.getXLenVT();
1723   SDValue VL = VecVT.isFixedLengthVector()
1724                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1725                    : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1726   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1727   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1728   return {Mask, VL};
1729 }
1730 
1731 // As above but assuming the given type is a scalable vector type.
1732 static std::pair<SDValue, SDValue>
1733 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1734                         const RISCVSubtarget &Subtarget) {
1735   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1736   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1737 }
1738 
1739 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1740 // of either is (currently) supported. This can get us into an infinite loop
1741 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1742 // as a ..., etc.
1743 // Until either (or both) of these can reliably lower any node, reporting that
1744 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1745 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1746 // which is not desirable.
1747 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1748     EVT VT, unsigned DefinedValues) const {
1749   return false;
1750 }
1751 
1752 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
1753                                   const RISCVSubtarget &Subtarget) {
1754   // RISCV FP-to-int conversions saturate to the destination register size, but
1755   // don't produce 0 for nan. We can use a conversion instruction and fix the
1756   // nan case with a compare and a select.
1757   SDValue Src = Op.getOperand(0);
1758 
1759   EVT DstVT = Op.getValueType();
1760   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1761 
1762   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1763   unsigned Opc;
1764   if (SatVT == DstVT)
1765     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
1766   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1767     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
1768   else
1769     return SDValue();
1770   // FIXME: Support other SatVTs by clamping before or after the conversion.
1771 
1772   SDLoc DL(Op);
1773   SDValue FpToInt = DAG.getNode(
1774       Opc, DL, DstVT, Src,
1775       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
1776 
1777   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1778   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1779 }
1780 
1781 // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
1782 // and back. Taking care to avoid converting values that are nan or already
1783 // correct.
1784 // TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
1785 // have FRM dependencies modeled yet.
1786 static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
1787   MVT VT = Op.getSimpleValueType();
1788   assert(VT.isVector() && "Unexpected type");
1789 
1790   SDLoc DL(Op);
1791 
1792   // Freeze the source since we are increasing the number of uses.
1793   SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0));
1794 
1795   // Truncate to integer and convert back to FP.
1796   MVT IntVT = VT.changeVectorElementTypeToInteger();
1797   SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src);
1798   Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1799 
1800   MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1801 
1802   if (Op.getOpcode() == ISD::FCEIL) {
1803     // If the truncated value is the greater than or equal to the original
1804     // value, we've computed the ceil. Otherwise, we went the wrong way and
1805     // need to increase by 1.
1806     // FIXME: This should use a masked operation. Handle here or in isel?
1807     SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated,
1808                                  DAG.getConstantFP(1.0, DL, VT));
1809     SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT);
1810     Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1811   } else if (Op.getOpcode() == ISD::FFLOOR) {
1812     // If the truncated value is the less than or equal to the original value,
1813     // we've computed the floor. Otherwise, we went the wrong way and need to
1814     // decrease by 1.
1815     // FIXME: This should use a masked operation. Handle here or in isel?
1816     SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated,
1817                                  DAG.getConstantFP(1.0, DL, VT));
1818     SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT);
1819     Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1820   }
1821 
1822   // Restore the original sign so that -0.0 is preserved.
1823   Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1824 
1825   // Determine the largest integer that can be represented exactly. This and
1826   // values larger than it don't have any fractional bits so don't need to
1827   // be converted.
1828   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1829   unsigned Precision = APFloat::semanticsPrecision(FltSem);
1830   APFloat MaxVal = APFloat(FltSem);
1831   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1832                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1833   SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1834 
1835   // If abs(Src) was larger than MaxVal or nan, keep it.
1836   SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1837   SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1838   return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1839 }
1840 
1841 // ISD::FROUND is defined to round to nearest with ties rounding away from 0.
1842 // This mode isn't supported in vector hardware on RISCV. But as long as we
1843 // aren't compiling with trapping math, we can emulate this with
1844 // floor(X + copysign(nextafter(0.5, 0.0), X)).
1845 // FIXME: Could be shorter by changing rounding mode, but we don't have FRM
1846 // dependencies modeled yet.
1847 // FIXME: Use masked operations to avoid final merge.
1848 static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG) {
1849   MVT VT = Op.getSimpleValueType();
1850   assert(VT.isVector() && "Unexpected type");
1851 
1852   SDLoc DL(Op);
1853 
1854   // Freeze the source since we are increasing the number of uses.
1855   SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0));
1856 
1857   // We do the conversion on the absolute value and fix the sign at the end.
1858   SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1859 
1860   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1861   bool Ignored;
1862   APFloat Point5Pred = APFloat(0.5f);
1863   Point5Pred.convert(FltSem, APFloat::rmNearestTiesToEven, &Ignored);
1864   Point5Pred.next(/*nextDown*/ true);
1865 
1866   // Add the adjustment.
1867   SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Abs,
1868                                DAG.getConstantFP(Point5Pred, DL, VT));
1869 
1870   // Truncate to integer and convert back to fp.
1871   MVT IntVT = VT.changeVectorElementTypeToInteger();
1872   SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Adjust);
1873   Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1874 
1875   // Restore the original sign.
1876   Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1877 
1878   // Determine the largest integer that can be represented exactly. This and
1879   // values larger than it don't have any fractional bits so don't need to
1880   // be converted.
1881   unsigned Precision = APFloat::semanticsPrecision(FltSem);
1882   APFloat MaxVal = APFloat(FltSem);
1883   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1884                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1885   SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1886 
1887   // If abs(Src) was larger than MaxVal or nan, keep it.
1888   MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1889   SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1890   return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1891 }
1892 
1893 static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
1894                                  const RISCVSubtarget &Subtarget) {
1895   MVT VT = Op.getSimpleValueType();
1896   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1897 
1898   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1899 
1900   SDLoc DL(Op);
1901   SDValue Mask, VL;
1902   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1903 
1904   unsigned Opc =
1905       VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
1906   SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
1907   return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1908 }
1909 
1910 struct VIDSequence {
1911   int64_t StepNumerator;
1912   unsigned StepDenominator;
1913   int64_t Addend;
1914 };
1915 
1916 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1917 // to the (non-zero) step S and start value X. This can be then lowered as the
1918 // RVV sequence (VID * S) + X, for example.
1919 // The step S is represented as an integer numerator divided by a positive
1920 // denominator. Note that the implementation currently only identifies
1921 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1922 // cannot detect 2/3, for example.
1923 // Note that this method will also match potentially unappealing index
1924 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1925 // determine whether this is worth generating code for.
1926 static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
1927   unsigned NumElts = Op.getNumOperands();
1928   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1929   if (!Op.getValueType().isInteger())
1930     return None;
1931 
1932   Optional<unsigned> SeqStepDenom;
1933   Optional<int64_t> SeqStepNum, SeqAddend;
1934   Optional<std::pair<uint64_t, unsigned>> PrevElt;
1935   unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1936   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1937     // Assume undef elements match the sequence; we just have to be careful
1938     // when interpolating across them.
1939     if (Op.getOperand(Idx).isUndef())
1940       continue;
1941     // The BUILD_VECTOR must be all constants.
1942     if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1943       return None;
1944 
1945     uint64_t Val = Op.getConstantOperandVal(Idx) &
1946                    maskTrailingOnes<uint64_t>(EltSizeInBits);
1947 
1948     if (PrevElt) {
1949       // Calculate the step since the last non-undef element, and ensure
1950       // it's consistent across the entire sequence.
1951       unsigned IdxDiff = Idx - PrevElt->second;
1952       int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1953 
1954       // A zero-value value difference means that we're somewhere in the middle
1955       // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1956       // step change before evaluating the sequence.
1957       if (ValDiff != 0) {
1958         int64_t Remainder = ValDiff % IdxDiff;
1959         // Normalize the step if it's greater than 1.
1960         if (Remainder != ValDiff) {
1961           // The difference must cleanly divide the element span.
1962           if (Remainder != 0)
1963             return None;
1964           ValDiff /= IdxDiff;
1965           IdxDiff = 1;
1966         }
1967 
1968         if (!SeqStepNum)
1969           SeqStepNum = ValDiff;
1970         else if (ValDiff != SeqStepNum)
1971           return None;
1972 
1973         if (!SeqStepDenom)
1974           SeqStepDenom = IdxDiff;
1975         else if (IdxDiff != *SeqStepDenom)
1976           return None;
1977       }
1978     }
1979 
1980     // Record and/or check any addend.
1981     if (SeqStepNum && SeqStepDenom) {
1982       uint64_t ExpectedVal =
1983           (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1984       int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1985       if (!SeqAddend)
1986         SeqAddend = Addend;
1987       else if (SeqAddend != Addend)
1988         return None;
1989     }
1990 
1991     // Record this non-undef element for later.
1992     if (!PrevElt || PrevElt->first != Val)
1993       PrevElt = std::make_pair(Val, Idx);
1994   }
1995   // We need to have logged both a step and an addend for this to count as
1996   // a legal index sequence.
1997   if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
1998     return None;
1999 
2000   return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
2001 }
2002 
2003 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
2004                                  const RISCVSubtarget &Subtarget) {
2005   MVT VT = Op.getSimpleValueType();
2006   assert(VT.isFixedLengthVector() && "Unexpected vector!");
2007 
2008   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2009 
2010   SDLoc DL(Op);
2011   SDValue Mask, VL;
2012   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2013 
2014   MVT XLenVT = Subtarget.getXLenVT();
2015   unsigned NumElts = Op.getNumOperands();
2016 
2017   if (VT.getVectorElementType() == MVT::i1) {
2018     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
2019       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
2020       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
2021     }
2022 
2023     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
2024       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
2025       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
2026     }
2027 
2028     // Lower constant mask BUILD_VECTORs via an integer vector type, in
2029     // scalar integer chunks whose bit-width depends on the number of mask
2030     // bits and XLEN.
2031     // First, determine the most appropriate scalar integer type to use. This
2032     // is at most XLenVT, but may be shrunk to a smaller vector element type
2033     // according to the size of the final vector - use i8 chunks rather than
2034     // XLenVT if we're producing a v8i1. This results in more consistent
2035     // codegen across RV32 and RV64.
2036     unsigned NumViaIntegerBits =
2037         std::min(std::max(NumElts, 8u), Subtarget.getXLen());
2038     NumViaIntegerBits = std::min(NumViaIntegerBits,
2039                                  Subtarget.getMaxELENForFixedLengthVectors());
2040     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
2041       // If we have to use more than one INSERT_VECTOR_ELT then this
2042       // optimization is likely to increase code size; avoid peforming it in
2043       // such a case. We can use a load from a constant pool in this case.
2044       if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
2045         return SDValue();
2046       // Now we can create our integer vector type. Note that it may be larger
2047       // than the resulting mask type: v4i1 would use v1i8 as its integer type.
2048       MVT IntegerViaVecVT =
2049           MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
2050                            divideCeil(NumElts, NumViaIntegerBits));
2051 
2052       uint64_t Bits = 0;
2053       unsigned BitPos = 0, IntegerEltIdx = 0;
2054       SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
2055 
2056       for (unsigned I = 0; I < NumElts; I++, BitPos++) {
2057         // Once we accumulate enough bits to fill our scalar type, insert into
2058         // our vector and clear our accumulated data.
2059         if (I != 0 && I % NumViaIntegerBits == 0) {
2060           if (NumViaIntegerBits <= 32)
2061             Bits = SignExtend64(Bits, 32);
2062           SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2063           Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
2064                             Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2065           Bits = 0;
2066           BitPos = 0;
2067           IntegerEltIdx++;
2068         }
2069         SDValue V = Op.getOperand(I);
2070         bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
2071         Bits |= ((uint64_t)BitValue << BitPos);
2072       }
2073 
2074       // Insert the (remaining) scalar value into position in our integer
2075       // vector type.
2076       if (NumViaIntegerBits <= 32)
2077         Bits = SignExtend64(Bits, 32);
2078       SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2079       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
2080                         DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2081 
2082       if (NumElts < NumViaIntegerBits) {
2083         // If we're producing a smaller vector than our minimum legal integer
2084         // type, bitcast to the equivalent (known-legal) mask type, and extract
2085         // our final mask.
2086         assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
2087         Vec = DAG.getBitcast(MVT::v8i1, Vec);
2088         Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
2089                           DAG.getConstant(0, DL, XLenVT));
2090       } else {
2091         // Else we must have produced an integer type with the same size as the
2092         // mask type; bitcast for the final result.
2093         assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
2094         Vec = DAG.getBitcast(VT, Vec);
2095       }
2096 
2097       return Vec;
2098     }
2099 
2100     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
2101     // vector type, we have a legal equivalently-sized i8 type, so we can use
2102     // that.
2103     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
2104     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
2105 
2106     SDValue WideVec;
2107     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2108       // For a splat, perform a scalar truncate before creating the wider
2109       // vector.
2110       assert(Splat.getValueType() == XLenVT &&
2111              "Unexpected type for i1 splat value");
2112       Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
2113                           DAG.getConstant(1, DL, XLenVT));
2114       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
2115     } else {
2116       SmallVector<SDValue, 8> Ops(Op->op_values());
2117       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
2118       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
2119       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
2120     }
2121 
2122     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
2123   }
2124 
2125   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2126     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
2127                                         : RISCVISD::VMV_V_X_VL;
2128     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
2129     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2130   }
2131 
2132   // Try and match index sequences, which we can lower to the vid instruction
2133   // with optional modifications. An all-undef vector is matched by
2134   // getSplatValue, above.
2135   if (auto SimpleVID = isSimpleVIDSequence(Op)) {
2136     int64_t StepNumerator = SimpleVID->StepNumerator;
2137     unsigned StepDenominator = SimpleVID->StepDenominator;
2138     int64_t Addend = SimpleVID->Addend;
2139 
2140     assert(StepNumerator != 0 && "Invalid step");
2141     bool Negate = false;
2142     int64_t SplatStepVal = StepNumerator;
2143     unsigned StepOpcode = ISD::MUL;
2144     if (StepNumerator != 1) {
2145       if (isPowerOf2_64(std::abs(StepNumerator))) {
2146         Negate = StepNumerator < 0;
2147         StepOpcode = ISD::SHL;
2148         SplatStepVal = Log2_64(std::abs(StepNumerator));
2149       }
2150     }
2151 
2152     // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
2153     // threshold since it's the immediate value many RVV instructions accept.
2154     // There is no vmul.vi instruction so ensure multiply constant can fit in
2155     // a single addi instruction.
2156     if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
2157          (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
2158         isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) {
2159       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
2160       // Convert right out of the scalable type so we can use standard ISD
2161       // nodes for the rest of the computation. If we used scalable types with
2162       // these, we'd lose the fixed-length vector info and generate worse
2163       // vsetvli code.
2164       VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
2165       if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
2166           (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
2167         SDValue SplatStep = DAG.getSplatVector(
2168             VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
2169         VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
2170       }
2171       if (StepDenominator != 1) {
2172         SDValue SplatStep = DAG.getSplatVector(
2173             VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
2174         VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
2175       }
2176       if (Addend != 0 || Negate) {
2177         SDValue SplatAddend =
2178             DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
2179         VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
2180       }
2181       return VID;
2182     }
2183   }
2184 
2185   // Attempt to detect "hidden" splats, which only reveal themselves as splats
2186   // when re-interpreted as a vector with a larger element type. For example,
2187   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
2188   // could be instead splat as
2189   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
2190   // TODO: This optimization could also work on non-constant splats, but it
2191   // would require bit-manipulation instructions to construct the splat value.
2192   SmallVector<SDValue> Sequence;
2193   unsigned EltBitSize = VT.getScalarSizeInBits();
2194   const auto *BV = cast<BuildVectorSDNode>(Op);
2195   if (VT.isInteger() && EltBitSize < 64 &&
2196       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
2197       BV->getRepeatedSequence(Sequence) &&
2198       (Sequence.size() * EltBitSize) <= 64) {
2199     unsigned SeqLen = Sequence.size();
2200     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
2201     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
2202     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
2203             ViaIntVT == MVT::i64) &&
2204            "Unexpected sequence type");
2205 
2206     unsigned EltIdx = 0;
2207     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2208     uint64_t SplatValue = 0;
2209     // Construct the amalgamated value which can be splatted as this larger
2210     // vector type.
2211     for (const auto &SeqV : Sequence) {
2212       if (!SeqV.isUndef())
2213         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2214                        << (EltIdx * EltBitSize));
2215       EltIdx++;
2216     }
2217 
2218     // On RV64, sign-extend from 32 to 64 bits where possible in order to
2219     // achieve better constant materializion.
2220     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
2221       SplatValue = SignExtend64(SplatValue, 32);
2222 
2223     // Since we can't introduce illegal i64 types at this stage, we can only
2224     // perform an i64 splat on RV32 if it is its own sign-extended value. That
2225     // way we can use RVV instructions to splat.
2226     assert((ViaIntVT.bitsLE(XLenVT) ||
2227             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
2228            "Unexpected bitcast sequence");
2229     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2230       SDValue ViaVL =
2231           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
2232       MVT ViaContainerVT =
2233           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
2234       SDValue Splat =
2235           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
2236                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
2237       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
2238       return DAG.getBitcast(VT, Splat);
2239     }
2240   }
2241 
2242   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
2243   // which constitute a large proportion of the elements. In such cases we can
2244   // splat a vector with the dominant element and make up the shortfall with
2245   // INSERT_VECTOR_ELTs.
2246   // Note that this includes vectors of 2 elements by association. The
2247   // upper-most element is the "dominant" one, allowing us to use a splat to
2248   // "insert" the upper element, and an insert of the lower element at position
2249   // 0, which improves codegen.
2250   SDValue DominantValue;
2251   unsigned MostCommonCount = 0;
2252   DenseMap<SDValue, unsigned> ValueCounts;
2253   unsigned NumUndefElts =
2254       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
2255 
2256   // Track the number of scalar loads we know we'd be inserting, estimated as
2257   // any non-zero floating-point constant. Other kinds of element are either
2258   // already in registers or are materialized on demand. The threshold at which
2259   // a vector load is more desirable than several scalar materializion and
2260   // vector-insertion instructions is not known.
2261   unsigned NumScalarLoads = 0;
2262 
2263   for (SDValue V : Op->op_values()) {
2264     if (V.isUndef())
2265       continue;
2266 
2267     ValueCounts.insert(std::make_pair(V, 0));
2268     unsigned &Count = ValueCounts[V];
2269 
2270     if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2271       NumScalarLoads += !CFP->isExactlyValue(+0.0);
2272 
2273     // Is this value dominant? In case of a tie, prefer the highest element as
2274     // it's cheaper to insert near the beginning of a vector than it is at the
2275     // end.
2276     if (++Count >= MostCommonCount) {
2277       DominantValue = V;
2278       MostCommonCount = Count;
2279     }
2280   }
2281 
2282   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
2283   unsigned NumDefElts = NumElts - NumUndefElts;
2284   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2285 
2286   // Don't perform this optimization when optimizing for size, since
2287   // materializing elements and inserting them tends to cause code bloat.
2288   if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
2289       ((MostCommonCount > DominantValueCountThreshold) ||
2290        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
2291     // Start by splatting the most common element.
2292     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
2293 
2294     DenseSet<SDValue> Processed{DominantValue};
2295     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
2296     for (const auto &OpIdx : enumerate(Op->ops())) {
2297       const SDValue &V = OpIdx.value();
2298       if (V.isUndef() || !Processed.insert(V).second)
2299         continue;
2300       if (ValueCounts[V] == 1) {
2301         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
2302                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
2303       } else {
2304         // Blend in all instances of this value using a VSELECT, using a
2305         // mask where each bit signals whether that element is the one
2306         // we're after.
2307         SmallVector<SDValue> Ops;
2308         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
2309           return DAG.getConstant(V == V1, DL, XLenVT);
2310         });
2311         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
2312                           DAG.getBuildVector(SelMaskTy, DL, Ops),
2313                           DAG.getSplatBuildVector(VT, DL, V), Vec);
2314       }
2315     }
2316 
2317     return Vec;
2318   }
2319 
2320   return SDValue();
2321 }
2322 
2323 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
2324                                    SDValue Hi, SDValue VL, SelectionDAG &DAG) {
2325   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2326     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2327     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2328     // If Hi constant is all the same sign bit as Lo, lower this as a custom
2329     // node in order to try and match RVV vector/scalar instructions.
2330     if ((LoC >> 31) == HiC)
2331       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
2332 
2333     // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
2334     // vmv.v.x whose EEW = 32 to lower it.
2335     auto *Const = dyn_cast<ConstantSDNode>(VL);
2336     if (LoC == HiC && Const && Const->isAllOnesValue() &&
2337         Const->getOpcode() != ISD::TargetConstant) {
2338       MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
2339       // TODO: if vl <= min(VLMAX), we can also do this. But we could not
2340       // access the subtarget here now.
2341       auto InterVec = DAG.getNode(
2342           RISCVISD::VMV_V_X_VL, DL, InterVT, Lo,
2343           DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i32));
2344       return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
2345     }
2346   }
2347 
2348   // Fall back to a stack store and stride x0 vector load.
2349   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
2350 }
2351 
2352 // Called by type legalization to handle splat of i64 on RV32.
2353 // FIXME: We can optimize this when the type has sign or zero bits in one
2354 // of the halves.
2355 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
2356                                    SDValue VL, SelectionDAG &DAG) {
2357   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2358   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2359                            DAG.getConstant(0, DL, MVT::i32));
2360   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2361                            DAG.getConstant(1, DL, MVT::i32));
2362   return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
2363 }
2364 
2365 // This function lowers a splat of a scalar operand Splat with the vector
2366 // length VL. It ensures the final sequence is type legal, which is useful when
2367 // lowering a splat after type legalization.
2368 static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
2369                                 SelectionDAG &DAG,
2370                                 const RISCVSubtarget &Subtarget) {
2371   if (VT.isFloatingPoint()) {
2372     // If VL is 1, we could use vfmv.s.f.
2373     if (isOneConstant(VL))
2374       return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT),
2375                          Scalar, VL);
2376     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
2377   }
2378 
2379   MVT XLenVT = Subtarget.getXLenVT();
2380 
2381   // Simplest case is that the operand needs to be promoted to XLenVT.
2382   if (Scalar.getValueType().bitsLE(XLenVT)) {
2383     // If the operand is a constant, sign extend to increase our chances
2384     // of being able to use a .vi instruction. ANY_EXTEND would become a
2385     // a zero extend and the simm5 check in isel would fail.
2386     // FIXME: Should we ignore the upper bits in isel instead?
2387     unsigned ExtOpc =
2388         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2389     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2390     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
2391     // If VL is 1 and the scalar value won't benefit from immediate, we could
2392     // use vmv.s.x.
2393     if (isOneConstant(VL) &&
2394         (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2395       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
2396                          VL);
2397     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
2398   }
2399 
2400   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2401          "Unexpected scalar for splat lowering!");
2402 
2403   if (isOneConstant(VL) && isNullConstant(Scalar))
2404     return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT),
2405                        DAG.getConstant(0, DL, XLenVT), VL);
2406 
2407   // Otherwise use the more complicated splatting algorithm.
2408   return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
2409 }
2410 
2411 // Is the mask a slidedown that shifts in undefs.
2412 static int matchShuffleAsSlideDown(ArrayRef<int> Mask) {
2413   int Size = Mask.size();
2414 
2415   // Elements shifted in should be undef.
2416   auto CheckUndefs = [&](int Shift) {
2417     for (int i = Size - Shift; i != Size; ++i)
2418       if (Mask[i] >= 0)
2419         return false;
2420     return true;
2421   };
2422 
2423   // Elements should be shifted or undef.
2424   auto MatchShift = [&](int Shift) {
2425     for (int i = 0; i != Size - Shift; ++i)
2426        if (Mask[i] >= 0 && Mask[i] != Shift + i)
2427          return false;
2428     return true;
2429   };
2430 
2431   // Try all possible shifts.
2432   for (int Shift = 1; Shift != Size; ++Shift)
2433     if (CheckUndefs(Shift) && MatchShift(Shift))
2434       return Shift;
2435 
2436   // No match.
2437   return -1;
2438 }
2439 
2440 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
2441                                 const RISCVSubtarget &Subtarget) {
2442   // We need to be able to widen elements to the next larger integer type.
2443   if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors())
2444     return false;
2445 
2446   int Size = Mask.size();
2447   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
2448 
2449   int Srcs[] = {-1, -1};
2450   for (int i = 0; i != Size; ++i) {
2451     // Ignore undef elements.
2452     if (Mask[i] < 0)
2453       continue;
2454 
2455     // Is this an even or odd element.
2456     int Pol = i % 2;
2457 
2458     // Ensure we consistently use the same source for this element polarity.
2459     int Src = Mask[i] / Size;
2460     if (Srcs[Pol] < 0)
2461       Srcs[Pol] = Src;
2462     if (Srcs[Pol] != Src)
2463       return false;
2464 
2465     // Make sure the element within the source is appropriate for this element
2466     // in the destination.
2467     int Elt = Mask[i] % Size;
2468     if (Elt != i / 2)
2469       return false;
2470   }
2471 
2472   // We need to find a source for each polarity and they can't be the same.
2473   if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
2474     return false;
2475 
2476   // Swap the sources if the second source was in the even polarity.
2477   SwapSources = Srcs[0] > Srcs[1];
2478 
2479   return true;
2480 }
2481 
2482 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
2483                                    const RISCVSubtarget &Subtarget) {
2484   SDValue V1 = Op.getOperand(0);
2485   SDValue V2 = Op.getOperand(1);
2486   SDLoc DL(Op);
2487   MVT XLenVT = Subtarget.getXLenVT();
2488   MVT VT = Op.getSimpleValueType();
2489   unsigned NumElts = VT.getVectorNumElements();
2490   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
2491 
2492   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2493 
2494   SDValue TrueMask, VL;
2495   std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2496 
2497   if (SVN->isSplat()) {
2498     const int Lane = SVN->getSplatIndex();
2499     if (Lane >= 0) {
2500       MVT SVT = VT.getVectorElementType();
2501 
2502       // Turn splatted vector load into a strided load with an X0 stride.
2503       SDValue V = V1;
2504       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
2505       // with undef.
2506       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
2507       int Offset = Lane;
2508       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
2509         int OpElements =
2510             V.getOperand(0).getSimpleValueType().getVectorNumElements();
2511         V = V.getOperand(Offset / OpElements);
2512         Offset %= OpElements;
2513       }
2514 
2515       // We need to ensure the load isn't atomic or volatile.
2516       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
2517         auto *Ld = cast<LoadSDNode>(V);
2518         Offset *= SVT.getStoreSize();
2519         SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2520                                                    TypeSize::Fixed(Offset), DL);
2521 
2522         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2523         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2524           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2525           SDValue IntID =
2526               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2527           SDValue Ops[] = {Ld->getChain(),
2528                            IntID,
2529                            DAG.getUNDEF(ContainerVT),
2530                            NewAddr,
2531                            DAG.getRegister(RISCV::X0, XLenVT),
2532                            VL};
2533           SDValue NewLoad = DAG.getMemIntrinsicNode(
2534               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2535               DAG.getMachineFunction().getMachineMemOperand(
2536                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2537           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2538           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2539         }
2540 
2541         // Otherwise use a scalar load and splat. This will give the best
2542         // opportunity to fold a splat into the operation. ISel can turn it into
2543         // the x0 strided load if we aren't able to fold away the select.
2544         if (SVT.isFloatingPoint())
2545           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2546                           Ld->getPointerInfo().getWithOffset(Offset),
2547                           Ld->getOriginalAlign(),
2548                           Ld->getMemOperand()->getFlags());
2549         else
2550           V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2551                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
2552                              Ld->getOriginalAlign(),
2553                              Ld->getMemOperand()->getFlags());
2554         DAG.makeEquivalentMemoryOrdering(Ld, V);
2555 
2556         unsigned Opc =
2557             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
2558         SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
2559         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2560       }
2561 
2562       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2563       assert(Lane < (int)NumElts && "Unexpected lane!");
2564       SDValue Gather =
2565           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
2566                       DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
2567       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2568     }
2569   }
2570 
2571   ArrayRef<int> Mask = SVN->getMask();
2572 
2573   // Try to match as a slidedown.
2574   int SlideAmt = matchShuffleAsSlideDown(Mask);
2575   if (SlideAmt >= 0) {
2576     // TODO: Should we reduce the VL to account for the upper undef elements?
2577     // Requires additional vsetvlis, but might be faster to execute.
2578     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2579     SDValue SlideDown =
2580         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2581                     DAG.getUNDEF(ContainerVT), V1,
2582                     DAG.getConstant(SlideAmt, DL, XLenVT),
2583                     TrueMask, VL);
2584     return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
2585   }
2586 
2587   // Detect an interleave shuffle and lower to
2588   // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
2589   bool SwapSources;
2590   if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
2591     // Swap sources if needed.
2592     if (SwapSources)
2593       std::swap(V1, V2);
2594 
2595     // Extract the lower half of the vectors.
2596     MVT HalfVT = VT.getHalfNumVectorElementsVT();
2597     V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
2598                      DAG.getConstant(0, DL, XLenVT));
2599     V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
2600                      DAG.getConstant(0, DL, XLenVT));
2601 
2602     // Double the element width and halve the number of elements in an int type.
2603     unsigned EltBits = VT.getScalarSizeInBits();
2604     MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
2605     MVT WideIntVT =
2606         MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
2607     // Convert this to a scalable vector. We need to base this on the
2608     // destination size to ensure there's always a type with a smaller LMUL.
2609     MVT WideIntContainerVT =
2610         getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
2611 
2612     // Convert sources to scalable vectors with the same element count as the
2613     // larger type.
2614     MVT HalfContainerVT = MVT::getVectorVT(
2615         VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
2616     V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
2617     V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
2618 
2619     // Cast sources to integer.
2620     MVT IntEltVT = MVT::getIntegerVT(EltBits);
2621     MVT IntHalfVT =
2622         MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
2623     V1 = DAG.getBitcast(IntHalfVT, V1);
2624     V2 = DAG.getBitcast(IntHalfVT, V2);
2625 
2626     // Freeze V2 since we use it twice and we need to be sure that the add and
2627     // multiply see the same value.
2628     V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2);
2629 
2630     // Recreate TrueMask using the widened type's element count.
2631     MVT MaskVT =
2632         MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount());
2633     TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2634 
2635     // Widen V1 and V2 with 0s and add one copy of V2 to V1.
2636     SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
2637                               V2, TrueMask, VL);
2638     // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
2639     SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
2640                                      DAG.getAllOnesConstant(DL, XLenVT));
2641     SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
2642                                    V2, Multiplier, TrueMask, VL);
2643     // Add the new copies to our previous addition giving us 2^eltbits copies of
2644     // V2. This is equivalent to shifting V2 left by eltbits. This should
2645     // combine with the vwmulu.vv above to form vwmaccu.vv.
2646     Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
2647                       TrueMask, VL);
2648     // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
2649     // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
2650     // vector VT.
2651     ContainerVT =
2652         MVT::getVectorVT(VT.getVectorElementType(),
2653                          WideIntContainerVT.getVectorElementCount() * 2);
2654     Add = DAG.getBitcast(ContainerVT, Add);
2655     return convertFromScalableVector(VT, Add, DAG, Subtarget);
2656   }
2657 
2658   // Detect shuffles which can be re-expressed as vector selects; these are
2659   // shuffles in which each element in the destination is taken from an element
2660   // at the corresponding index in either source vectors.
2661   bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
2662     int MaskIndex = MaskIdx.value();
2663     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2664   });
2665 
2666   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2667 
2668   SmallVector<SDValue> MaskVals;
2669   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2670   // merged with a second vrgather.
2671   SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2672 
2673   // By default we preserve the original operand order, and use a mask to
2674   // select LHS as true and RHS as false. However, since RVV vector selects may
2675   // feature splats but only on the LHS, we may choose to invert our mask and
2676   // instead select between RHS and LHS.
2677   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2678   bool InvertMask = IsSelect == SwapOps;
2679 
2680   // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2681   // half.
2682   DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2683 
2684   // Now construct the mask that will be used by the vselect or blended
2685   // vrgather operation. For vrgathers, construct the appropriate indices into
2686   // each vector.
2687   for (int MaskIndex : Mask) {
2688     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2689     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2690     if (!IsSelect) {
2691       bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2692       GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2693                                      ? DAG.getConstant(MaskIndex, DL, XLenVT)
2694                                      : DAG.getUNDEF(XLenVT));
2695       GatherIndicesRHS.push_back(
2696           IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2697                             : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2698       if (IsLHSOrUndefIndex && MaskIndex >= 0)
2699         ++LHSIndexCounts[MaskIndex];
2700       if (!IsLHSOrUndefIndex)
2701         ++RHSIndexCounts[MaskIndex - NumElts];
2702     }
2703   }
2704 
2705   if (SwapOps) {
2706     std::swap(V1, V2);
2707     std::swap(GatherIndicesLHS, GatherIndicesRHS);
2708   }
2709 
2710   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2711   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2712   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2713 
2714   if (IsSelect)
2715     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2716 
2717   if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2718     // On such a large vector we're unable to use i8 as the index type.
2719     // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2720     // may involve vector splitting if we're already at LMUL=8, or our
2721     // user-supplied maximum fixed-length LMUL.
2722     return SDValue();
2723   }
2724 
2725   unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2726   unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2727   MVT IndexVT = VT.changeTypeToInteger();
2728   // Since we can't introduce illegal index types at this stage, use i16 and
2729   // vrgatherei16 if the corresponding index type for plain vrgather is greater
2730   // than XLenVT.
2731   if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2732     GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2733     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2734   }
2735 
2736   MVT IndexContainerVT =
2737       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2738 
2739   SDValue Gather;
2740   // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2741   // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2742   if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2743     Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
2744   } else {
2745     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2746     // If only one index is used, we can use a "splat" vrgather.
2747     // TODO: We can splat the most-common index and fix-up any stragglers, if
2748     // that's beneficial.
2749     if (LHSIndexCounts.size() == 1) {
2750       int SplatIndex = LHSIndexCounts.begin()->getFirst();
2751       Gather =
2752           DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2753                       DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2754     } else {
2755       SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2756       LHSIndices =
2757           convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2758 
2759       Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2760                            TrueMask, VL);
2761     }
2762   }
2763 
2764   // If a second vector operand is used by this shuffle, blend it in with an
2765   // additional vrgather.
2766   if (!V2.isUndef()) {
2767     V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2768     // If only one index is used, we can use a "splat" vrgather.
2769     // TODO: We can splat the most-common index and fix-up any stragglers, if
2770     // that's beneficial.
2771     if (RHSIndexCounts.size() == 1) {
2772       int SplatIndex = RHSIndexCounts.begin()->getFirst();
2773       V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2774                        DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2775     } else {
2776       SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2777       RHSIndices =
2778           convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2779       V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
2780                        VL);
2781     }
2782 
2783     MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2784     SelectMask =
2785         convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2786 
2787     Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
2788                          Gather, VL);
2789   }
2790 
2791   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2792 }
2793 
2794 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
2795   // Support splats for any type. These should type legalize well.
2796   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
2797     return true;
2798 
2799   // Only support legal VTs for other shuffles for now.
2800   if (!isTypeLegal(VT))
2801     return false;
2802 
2803   MVT SVT = VT.getSimpleVT();
2804 
2805   bool SwapSources;
2806   return (matchShuffleAsSlideDown(M) >= 0) ||
2807          isInterleaveShuffle(M, SVT, SwapSources, Subtarget);
2808 }
2809 
2810 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
2811                                      SDLoc DL, SelectionDAG &DAG,
2812                                      const RISCVSubtarget &Subtarget) {
2813   if (VT.isScalableVector())
2814     return DAG.getFPExtendOrRound(Op, DL, VT);
2815   assert(VT.isFixedLengthVector() &&
2816          "Unexpected value type for RVV FP extend/round lowering");
2817   SDValue Mask, VL;
2818   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2819   unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
2820                         ? RISCVISD::FP_EXTEND_VL
2821                         : RISCVISD::FP_ROUND_VL;
2822   return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
2823 }
2824 
2825 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
2826 // the exponent.
2827 static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
2828   MVT VT = Op.getSimpleValueType();
2829   unsigned EltSize = VT.getScalarSizeInBits();
2830   SDValue Src = Op.getOperand(0);
2831   SDLoc DL(Op);
2832 
2833   // We need a FP type that can represent the value.
2834   // TODO: Use f16 for i8 when possible?
2835   MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
2836   MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
2837 
2838   // Legal types should have been checked in the RISCVTargetLowering
2839   // constructor.
2840   // TODO: Splitting may make sense in some cases.
2841   assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
2842          "Expected legal float type!");
2843 
2844   // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
2845   // The trailing zero count is equal to log2 of this single bit value.
2846   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
2847     SDValue Neg =
2848         DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
2849     Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
2850   }
2851 
2852   // We have a legal FP type, convert to it.
2853   SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
2854   // Bitcast to integer and shift the exponent to the LSB.
2855   EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
2856   SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
2857   unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
2858   SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
2859                               DAG.getConstant(ShiftAmt, DL, IntVT));
2860   // Truncate back to original type to allow vnsrl.
2861   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
2862   // The exponent contains log2 of the value in biased form.
2863   unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
2864 
2865   // For trailing zeros, we just need to subtract the bias.
2866   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
2867     return DAG.getNode(ISD::SUB, DL, VT, Trunc,
2868                        DAG.getConstant(ExponentBias, DL, VT));
2869 
2870   // For leading zeros, we need to remove the bias and convert from log2 to
2871   // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
2872   unsigned Adjust = ExponentBias + (EltSize - 1);
2873   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
2874 }
2875 
2876 // While RVV has alignment restrictions, we should always be able to load as a
2877 // legal equivalently-sized byte-typed vector instead. This method is
2878 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2879 // the load is already correctly-aligned, it returns SDValue().
2880 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2881                                                     SelectionDAG &DAG) const {
2882   auto *Load = cast<LoadSDNode>(Op);
2883   assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2884 
2885   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
2886                                      Load->getMemoryVT(),
2887                                      *Load->getMemOperand()))
2888     return SDValue();
2889 
2890   SDLoc DL(Op);
2891   MVT VT = Op.getSimpleValueType();
2892   unsigned EltSizeBits = VT.getScalarSizeInBits();
2893   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2894          "Unexpected unaligned RVV load type");
2895   MVT NewVT =
2896       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2897   assert(NewVT.isValid() &&
2898          "Expecting equally-sized RVV vector types to be legal");
2899   SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2900                           Load->getPointerInfo(), Load->getOriginalAlign(),
2901                           Load->getMemOperand()->getFlags());
2902   return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2903 }
2904 
2905 // While RVV has alignment restrictions, we should always be able to store as a
2906 // legal equivalently-sized byte-typed vector instead. This method is
2907 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2908 // returns SDValue() if the store is already correctly aligned.
2909 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2910                                                      SelectionDAG &DAG) const {
2911   auto *Store = cast<StoreSDNode>(Op);
2912   assert(Store && Store->getValue().getValueType().isVector() &&
2913          "Expected vector store");
2914 
2915   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
2916                                      Store->getMemoryVT(),
2917                                      *Store->getMemOperand()))
2918     return SDValue();
2919 
2920   SDLoc DL(Op);
2921   SDValue StoredVal = Store->getValue();
2922   MVT VT = StoredVal.getSimpleValueType();
2923   unsigned EltSizeBits = VT.getScalarSizeInBits();
2924   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2925          "Unexpected unaligned RVV store type");
2926   MVT NewVT =
2927       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2928   assert(NewVT.isValid() &&
2929          "Expecting equally-sized RVV vector types to be legal");
2930   StoredVal = DAG.getBitcast(NewVT, StoredVal);
2931   return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2932                       Store->getPointerInfo(), Store->getOriginalAlign(),
2933                       Store->getMemOperand()->getFlags());
2934 }
2935 
2936 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
2937                                             SelectionDAG &DAG) const {
2938   switch (Op.getOpcode()) {
2939   default:
2940     report_fatal_error("unimplemented operand");
2941   case ISD::GlobalAddress:
2942     return lowerGlobalAddress(Op, DAG);
2943   case ISD::BlockAddress:
2944     return lowerBlockAddress(Op, DAG);
2945   case ISD::ConstantPool:
2946     return lowerConstantPool(Op, DAG);
2947   case ISD::JumpTable:
2948     return lowerJumpTable(Op, DAG);
2949   case ISD::GlobalTLSAddress:
2950     return lowerGlobalTLSAddress(Op, DAG);
2951   case ISD::SELECT:
2952     return lowerSELECT(Op, DAG);
2953   case ISD::BRCOND:
2954     return lowerBRCOND(Op, DAG);
2955   case ISD::VASTART:
2956     return lowerVASTART(Op, DAG);
2957   case ISD::FRAMEADDR:
2958     return lowerFRAMEADDR(Op, DAG);
2959   case ISD::RETURNADDR:
2960     return lowerRETURNADDR(Op, DAG);
2961   case ISD::SHL_PARTS:
2962     return lowerShiftLeftParts(Op, DAG);
2963   case ISD::SRA_PARTS:
2964     return lowerShiftRightParts(Op, DAG, true);
2965   case ISD::SRL_PARTS:
2966     return lowerShiftRightParts(Op, DAG, false);
2967   case ISD::BITCAST: {
2968     SDLoc DL(Op);
2969     EVT VT = Op.getValueType();
2970     SDValue Op0 = Op.getOperand(0);
2971     EVT Op0VT = Op0.getValueType();
2972     MVT XLenVT = Subtarget.getXLenVT();
2973     if (VT.isFixedLengthVector()) {
2974       // We can handle fixed length vector bitcasts with a simple replacement
2975       // in isel.
2976       if (Op0VT.isFixedLengthVector())
2977         return Op;
2978       // When bitcasting from scalar to fixed-length vector, insert the scalar
2979       // into a one-element vector of the result type, and perform a vector
2980       // bitcast.
2981       if (!Op0VT.isVector()) {
2982         EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2983         if (!isTypeLegal(BVT))
2984           return SDValue();
2985         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2986                                               DAG.getUNDEF(BVT), Op0,
2987                                               DAG.getConstant(0, DL, XLenVT)));
2988       }
2989       return SDValue();
2990     }
2991     // Custom-legalize bitcasts from fixed-length vector types to scalar types
2992     // thus: bitcast the vector to a one-element vector type whose element type
2993     // is the same as the result type, and extract the first element.
2994     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2995       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
2996       if (!isTypeLegal(BVT))
2997         return SDValue();
2998       SDValue BVec = DAG.getBitcast(BVT, Op0);
2999       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
3000                          DAG.getConstant(0, DL, XLenVT));
3001     }
3002     if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
3003       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
3004       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
3005       return FPConv;
3006     }
3007     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
3008         Subtarget.hasStdExtF()) {
3009       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3010       SDValue FPConv =
3011           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
3012       return FPConv;
3013     }
3014     return SDValue();
3015   }
3016   case ISD::INTRINSIC_WO_CHAIN:
3017     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3018   case ISD::INTRINSIC_W_CHAIN:
3019     return LowerINTRINSIC_W_CHAIN(Op, DAG);
3020   case ISD::INTRINSIC_VOID:
3021     return LowerINTRINSIC_VOID(Op, DAG);
3022   case ISD::BSWAP:
3023   case ISD::BITREVERSE: {
3024     MVT VT = Op.getSimpleValueType();
3025     SDLoc DL(Op);
3026     if (Subtarget.hasStdExtZbp()) {
3027       // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
3028       // Start with the maximum immediate value which is the bitwidth - 1.
3029       unsigned Imm = VT.getSizeInBits() - 1;
3030       // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
3031       if (Op.getOpcode() == ISD::BSWAP)
3032         Imm &= ~0x7U;
3033       return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
3034                          DAG.getConstant(Imm, DL, VT));
3035     }
3036     assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
3037     assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
3038     // Expand bitreverse to a bswap(rev8) followed by brev8.
3039     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
3040     // We use the Zbp grevi encoding for rev.b/brev8 which will be recognized
3041     // as brev8 by an isel pattern.
3042     return DAG.getNode(RISCVISD::GREV, DL, VT, BSwap,
3043                        DAG.getConstant(7, DL, VT));
3044   }
3045   case ISD::FSHL:
3046   case ISD::FSHR: {
3047     MVT VT = Op.getSimpleValueType();
3048     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
3049     SDLoc DL(Op);
3050     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
3051     // use log(XLen) bits. Mask the shift amount accordingly to prevent
3052     // accidentally setting the extra bit.
3053     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
3054     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
3055                                 DAG.getConstant(ShAmtWidth, DL, VT));
3056     // fshl and fshr concatenate their operands in the same order. fsr and fsl
3057     // instruction use different orders. fshl will return its first operand for
3058     // shift of zero, fshr will return its second operand. fsl and fsr both
3059     // return rs1 so the ISD nodes need to have different operand orders.
3060     // Shift amount is in rs2.
3061     SDValue Op0 = Op.getOperand(0);
3062     SDValue Op1 = Op.getOperand(1);
3063     unsigned Opc = RISCVISD::FSL;
3064     if (Op.getOpcode() == ISD::FSHR) {
3065       std::swap(Op0, Op1);
3066       Opc = RISCVISD::FSR;
3067     }
3068     return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
3069   }
3070   case ISD::TRUNCATE: {
3071     SDLoc DL(Op);
3072     MVT VT = Op.getSimpleValueType();
3073     // Only custom-lower vector truncates
3074     if (!VT.isVector())
3075       return Op;
3076 
3077     // Truncates to mask types are handled differently
3078     if (VT.getVectorElementType() == MVT::i1)
3079       return lowerVectorMaskTrunc(Op, DAG);
3080 
3081     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
3082     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
3083     // truncate by one power of two at a time.
3084     MVT DstEltVT = VT.getVectorElementType();
3085 
3086     SDValue Src = Op.getOperand(0);
3087     MVT SrcVT = Src.getSimpleValueType();
3088     MVT SrcEltVT = SrcVT.getVectorElementType();
3089 
3090     assert(DstEltVT.bitsLT(SrcEltVT) &&
3091            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
3092            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
3093            "Unexpected vector truncate lowering");
3094 
3095     MVT ContainerVT = SrcVT;
3096     if (SrcVT.isFixedLengthVector()) {
3097       ContainerVT = getContainerForFixedLengthVector(SrcVT);
3098       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3099     }
3100 
3101     SDValue Result = Src;
3102     SDValue Mask, VL;
3103     std::tie(Mask, VL) =
3104         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
3105     LLVMContext &Context = *DAG.getContext();
3106     const ElementCount Count = ContainerVT.getVectorElementCount();
3107     do {
3108       SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3109       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
3110       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
3111                            Mask, VL);
3112     } while (SrcEltVT != DstEltVT);
3113 
3114     if (SrcVT.isFixedLengthVector())
3115       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
3116 
3117     return Result;
3118   }
3119   case ISD::ANY_EXTEND:
3120   case ISD::ZERO_EXTEND:
3121     if (Op.getOperand(0).getValueType().isVector() &&
3122         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3123       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
3124     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
3125   case ISD::SIGN_EXTEND:
3126     if (Op.getOperand(0).getValueType().isVector() &&
3127         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3128       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
3129     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
3130   case ISD::SPLAT_VECTOR_PARTS:
3131     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
3132   case ISD::INSERT_VECTOR_ELT:
3133     return lowerINSERT_VECTOR_ELT(Op, DAG);
3134   case ISD::EXTRACT_VECTOR_ELT:
3135     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
3136   case ISD::VSCALE: {
3137     MVT VT = Op.getSimpleValueType();
3138     SDLoc DL(Op);
3139     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
3140     // We define our scalable vector types for lmul=1 to use a 64 bit known
3141     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
3142     // vscale as VLENB / 8.
3143     static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
3144     if (Subtarget.getMinVLen() < RISCV::RVVBitsPerBlock)
3145       report_fatal_error("Support for VLEN==32 is incomplete.");
3146     if (isa<ConstantSDNode>(Op.getOperand(0))) {
3147       // We assume VLENB is a multiple of 8. We manually choose the best shift
3148       // here because SimplifyDemandedBits isn't always able to simplify it.
3149       uint64_t Val = Op.getConstantOperandVal(0);
3150       if (isPowerOf2_64(Val)) {
3151         uint64_t Log2 = Log2_64(Val);
3152         if (Log2 < 3)
3153           return DAG.getNode(ISD::SRL, DL, VT, VLENB,
3154                              DAG.getConstant(3 - Log2, DL, VT));
3155         if (Log2 > 3)
3156           return DAG.getNode(ISD::SHL, DL, VT, VLENB,
3157                              DAG.getConstant(Log2 - 3, DL, VT));
3158         return VLENB;
3159       }
3160       // If the multiplier is a multiple of 8, scale it down to avoid needing
3161       // to shift the VLENB value.
3162       if ((Val % 8) == 0)
3163         return DAG.getNode(ISD::MUL, DL, VT, VLENB,
3164                            DAG.getConstant(Val / 8, DL, VT));
3165     }
3166 
3167     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
3168                                  DAG.getConstant(3, DL, VT));
3169     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
3170   }
3171   case ISD::FPOWI: {
3172     // Custom promote f16 powi with illegal i32 integer type on RV64. Once
3173     // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
3174     if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
3175         Op.getOperand(1).getValueType() == MVT::i32) {
3176       SDLoc DL(Op);
3177       SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
3178       SDValue Powi =
3179           DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
3180       return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
3181                          DAG.getIntPtrConstant(0, DL));
3182     }
3183     return SDValue();
3184   }
3185   case ISD::FP_EXTEND: {
3186     // RVV can only do fp_extend to types double the size as the source. We
3187     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
3188     // via f32.
3189     SDLoc DL(Op);
3190     MVT VT = Op.getSimpleValueType();
3191     SDValue Src = Op.getOperand(0);
3192     MVT SrcVT = Src.getSimpleValueType();
3193 
3194     // Prepare any fixed-length vector operands.
3195     MVT ContainerVT = VT;
3196     if (SrcVT.isFixedLengthVector()) {
3197       ContainerVT = getContainerForFixedLengthVector(VT);
3198       MVT SrcContainerVT =
3199           ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
3200       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3201     }
3202 
3203     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
3204         SrcVT.getVectorElementType() != MVT::f16) {
3205       // For scalable vectors, we only need to close the gap between
3206       // vXf16->vXf64.
3207       if (!VT.isFixedLengthVector())
3208         return Op;
3209       // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
3210       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
3211       return convertFromScalableVector(VT, Src, DAG, Subtarget);
3212     }
3213 
3214     MVT InterVT = VT.changeVectorElementType(MVT::f32);
3215     MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
3216     SDValue IntermediateExtend = getRVVFPExtendOrRound(
3217         Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
3218 
3219     SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
3220                                            DL, DAG, Subtarget);
3221     if (VT.isFixedLengthVector())
3222       return convertFromScalableVector(VT, Extend, DAG, Subtarget);
3223     return Extend;
3224   }
3225   case ISD::FP_ROUND: {
3226     // RVV can only do fp_round to types half the size as the source. We
3227     // custom-lower f64->f16 rounds via RVV's round-to-odd float
3228     // conversion instruction.
3229     SDLoc DL(Op);
3230     MVT VT = Op.getSimpleValueType();
3231     SDValue Src = Op.getOperand(0);
3232     MVT SrcVT = Src.getSimpleValueType();
3233 
3234     // Prepare any fixed-length vector operands.
3235     MVT ContainerVT = VT;
3236     if (VT.isFixedLengthVector()) {
3237       MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3238       ContainerVT =
3239           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3240       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3241     }
3242 
3243     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
3244         SrcVT.getVectorElementType() != MVT::f64) {
3245       // For scalable vectors, we only need to close the gap between
3246       // vXf64<->vXf16.
3247       if (!VT.isFixedLengthVector())
3248         return Op;
3249       // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
3250       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
3251       return convertFromScalableVector(VT, Src, DAG, Subtarget);
3252     }
3253 
3254     SDValue Mask, VL;
3255     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3256 
3257     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
3258     SDValue IntermediateRound =
3259         DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
3260     SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
3261                                           DL, DAG, Subtarget);
3262 
3263     if (VT.isFixedLengthVector())
3264       return convertFromScalableVector(VT, Round, DAG, Subtarget);
3265     return Round;
3266   }
3267   case ISD::FP_TO_SINT:
3268   case ISD::FP_TO_UINT:
3269   case ISD::SINT_TO_FP:
3270   case ISD::UINT_TO_FP: {
3271     // RVV can only do fp<->int conversions to types half/double the size as
3272     // the source. We custom-lower any conversions that do two hops into
3273     // sequences.
3274     MVT VT = Op.getSimpleValueType();
3275     if (!VT.isVector())
3276       return Op;
3277     SDLoc DL(Op);
3278     SDValue Src = Op.getOperand(0);
3279     MVT EltVT = VT.getVectorElementType();
3280     MVT SrcVT = Src.getSimpleValueType();
3281     MVT SrcEltVT = SrcVT.getVectorElementType();
3282     unsigned EltSize = EltVT.getSizeInBits();
3283     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3284     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
3285            "Unexpected vector element types");
3286 
3287     bool IsInt2FP = SrcEltVT.isInteger();
3288     // Widening conversions
3289     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
3290       if (IsInt2FP) {
3291         // Do a regular integer sign/zero extension then convert to float.
3292         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
3293                                       VT.getVectorElementCount());
3294         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
3295                                  ? ISD::ZERO_EXTEND
3296                                  : ISD::SIGN_EXTEND;
3297         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
3298         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
3299       }
3300       // FP2Int
3301       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
3302       // Do one doubling fp_extend then complete the operation by converting
3303       // to int.
3304       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3305       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
3306       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
3307     }
3308 
3309     // Narrowing conversions
3310     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
3311       if (IsInt2FP) {
3312         // One narrowing int_to_fp, then an fp_round.
3313         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
3314         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3315         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
3316         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
3317       }
3318       // FP2Int
3319       // One narrowing fp_to_int, then truncate the integer. If the float isn't
3320       // representable by the integer, the result is poison.
3321       MVT IVecVT =
3322           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
3323                            VT.getVectorElementCount());
3324       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
3325       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
3326     }
3327 
3328     // Scalable vectors can exit here. Patterns will handle equally-sized
3329     // conversions halving/doubling ones.
3330     if (!VT.isFixedLengthVector())
3331       return Op;
3332 
3333     // For fixed-length vectors we lower to a custom "VL" node.
3334     unsigned RVVOpc = 0;
3335     switch (Op.getOpcode()) {
3336     default:
3337       llvm_unreachable("Impossible opcode");
3338     case ISD::FP_TO_SINT:
3339       RVVOpc = RISCVISD::FP_TO_SINT_VL;
3340       break;
3341     case ISD::FP_TO_UINT:
3342       RVVOpc = RISCVISD::FP_TO_UINT_VL;
3343       break;
3344     case ISD::SINT_TO_FP:
3345       RVVOpc = RISCVISD::SINT_TO_FP_VL;
3346       break;
3347     case ISD::UINT_TO_FP:
3348       RVVOpc = RISCVISD::UINT_TO_FP_VL;
3349       break;
3350     }
3351 
3352     MVT ContainerVT, SrcContainerVT;
3353     // Derive the reference container type from the larger vector type.
3354     if (SrcEltSize > EltSize) {
3355       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3356       ContainerVT =
3357           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3358     } else {
3359       ContainerVT = getContainerForFixedLengthVector(VT);
3360       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
3361     }
3362 
3363     SDValue Mask, VL;
3364     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3365 
3366     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3367     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
3368     return convertFromScalableVector(VT, Src, DAG, Subtarget);
3369   }
3370   case ISD::FP_TO_SINT_SAT:
3371   case ISD::FP_TO_UINT_SAT:
3372     return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
3373   case ISD::FTRUNC:
3374   case ISD::FCEIL:
3375   case ISD::FFLOOR:
3376     return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
3377   case ISD::FROUND:
3378     return lowerFROUND(Op, DAG);
3379   case ISD::VECREDUCE_ADD:
3380   case ISD::VECREDUCE_UMAX:
3381   case ISD::VECREDUCE_SMAX:
3382   case ISD::VECREDUCE_UMIN:
3383   case ISD::VECREDUCE_SMIN:
3384     return lowerVECREDUCE(Op, DAG);
3385   case ISD::VECREDUCE_AND:
3386   case ISD::VECREDUCE_OR:
3387   case ISD::VECREDUCE_XOR:
3388     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3389       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
3390     return lowerVECREDUCE(Op, DAG);
3391   case ISD::VECREDUCE_FADD:
3392   case ISD::VECREDUCE_SEQ_FADD:
3393   case ISD::VECREDUCE_FMIN:
3394   case ISD::VECREDUCE_FMAX:
3395     return lowerFPVECREDUCE(Op, DAG);
3396   case ISD::VP_REDUCE_ADD:
3397   case ISD::VP_REDUCE_UMAX:
3398   case ISD::VP_REDUCE_SMAX:
3399   case ISD::VP_REDUCE_UMIN:
3400   case ISD::VP_REDUCE_SMIN:
3401   case ISD::VP_REDUCE_FADD:
3402   case ISD::VP_REDUCE_SEQ_FADD:
3403   case ISD::VP_REDUCE_FMIN:
3404   case ISD::VP_REDUCE_FMAX:
3405     return lowerVPREDUCE(Op, DAG);
3406   case ISD::VP_REDUCE_AND:
3407   case ISD::VP_REDUCE_OR:
3408   case ISD::VP_REDUCE_XOR:
3409     if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
3410       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
3411     return lowerVPREDUCE(Op, DAG);
3412   case ISD::INSERT_SUBVECTOR:
3413     return lowerINSERT_SUBVECTOR(Op, DAG);
3414   case ISD::EXTRACT_SUBVECTOR:
3415     return lowerEXTRACT_SUBVECTOR(Op, DAG);
3416   case ISD::STEP_VECTOR:
3417     return lowerSTEP_VECTOR(Op, DAG);
3418   case ISD::VECTOR_REVERSE:
3419     return lowerVECTOR_REVERSE(Op, DAG);
3420   case ISD::BUILD_VECTOR:
3421     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
3422   case ISD::SPLAT_VECTOR:
3423     if (Op.getValueType().getVectorElementType() == MVT::i1)
3424       return lowerVectorMaskSplat(Op, DAG);
3425     return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
3426   case ISD::VECTOR_SHUFFLE:
3427     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
3428   case ISD::CONCAT_VECTORS: {
3429     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
3430     // better than going through the stack, as the default expansion does.
3431     SDLoc DL(Op);
3432     MVT VT = Op.getSimpleValueType();
3433     unsigned NumOpElts =
3434         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
3435     SDValue Vec = DAG.getUNDEF(VT);
3436     for (const auto &OpIdx : enumerate(Op->ops())) {
3437       SDValue SubVec = OpIdx.value();
3438       // Don't insert undef subvectors.
3439       if (SubVec.isUndef())
3440         continue;
3441       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
3442                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
3443     }
3444     return Vec;
3445   }
3446   case ISD::LOAD:
3447     if (auto V = expandUnalignedRVVLoad(Op, DAG))
3448       return V;
3449     if (Op.getValueType().isFixedLengthVector())
3450       return lowerFixedLengthVectorLoadToRVV(Op, DAG);
3451     return Op;
3452   case ISD::STORE:
3453     if (auto V = expandUnalignedRVVStore(Op, DAG))
3454       return V;
3455     if (Op.getOperand(1).getValueType().isFixedLengthVector())
3456       return lowerFixedLengthVectorStoreToRVV(Op, DAG);
3457     return Op;
3458   case ISD::MLOAD:
3459   case ISD::VP_LOAD:
3460     return lowerMaskedLoad(Op, DAG);
3461   case ISD::MSTORE:
3462   case ISD::VP_STORE:
3463     return lowerMaskedStore(Op, DAG);
3464   case ISD::SETCC:
3465     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
3466   case ISD::ADD:
3467     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
3468   case ISD::SUB:
3469     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
3470   case ISD::MUL:
3471     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
3472   case ISD::MULHS:
3473     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
3474   case ISD::MULHU:
3475     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
3476   case ISD::AND:
3477     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
3478                                               RISCVISD::AND_VL);
3479   case ISD::OR:
3480     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
3481                                               RISCVISD::OR_VL);
3482   case ISD::XOR:
3483     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
3484                                               RISCVISD::XOR_VL);
3485   case ISD::SDIV:
3486     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
3487   case ISD::SREM:
3488     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
3489   case ISD::UDIV:
3490     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
3491   case ISD::UREM:
3492     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
3493   case ISD::SHL:
3494   case ISD::SRA:
3495   case ISD::SRL:
3496     if (Op.getSimpleValueType().isFixedLengthVector())
3497       return lowerFixedLengthVectorShiftToRVV(Op, DAG);
3498     // This can be called for an i32 shift amount that needs to be promoted.
3499     assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
3500            "Unexpected custom legalisation");
3501     return SDValue();
3502   case ISD::SADDSAT:
3503     return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
3504   case ISD::UADDSAT:
3505     return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
3506   case ISD::SSUBSAT:
3507     return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
3508   case ISD::USUBSAT:
3509     return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
3510   case ISD::FADD:
3511     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
3512   case ISD::FSUB:
3513     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
3514   case ISD::FMUL:
3515     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
3516   case ISD::FDIV:
3517     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
3518   case ISD::FNEG:
3519     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
3520   case ISD::FABS:
3521     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
3522   case ISD::FSQRT:
3523     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
3524   case ISD::FMA:
3525     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
3526   case ISD::SMIN:
3527     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
3528   case ISD::SMAX:
3529     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
3530   case ISD::UMIN:
3531     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
3532   case ISD::UMAX:
3533     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
3534   case ISD::FMINNUM:
3535     return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
3536   case ISD::FMAXNUM:
3537     return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
3538   case ISD::ABS:
3539     return lowerABS(Op, DAG);
3540   case ISD::CTLZ_ZERO_UNDEF:
3541   case ISD::CTTZ_ZERO_UNDEF:
3542     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
3543   case ISD::VSELECT:
3544     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
3545   case ISD::FCOPYSIGN:
3546     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
3547   case ISD::MGATHER:
3548   case ISD::VP_GATHER:
3549     return lowerMaskedGather(Op, DAG);
3550   case ISD::MSCATTER:
3551   case ISD::VP_SCATTER:
3552     return lowerMaskedScatter(Op, DAG);
3553   case ISD::FLT_ROUNDS_:
3554     return lowerGET_ROUNDING(Op, DAG);
3555   case ISD::SET_ROUNDING:
3556     return lowerSET_ROUNDING(Op, DAG);
3557   case ISD::VP_SELECT:
3558     return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
3559   case ISD::VP_MERGE:
3560     return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
3561   case ISD::VP_ADD:
3562     return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
3563   case ISD::VP_SUB:
3564     return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
3565   case ISD::VP_MUL:
3566     return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
3567   case ISD::VP_SDIV:
3568     return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
3569   case ISD::VP_UDIV:
3570     return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
3571   case ISD::VP_SREM:
3572     return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
3573   case ISD::VP_UREM:
3574     return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
3575   case ISD::VP_AND:
3576     return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
3577   case ISD::VP_OR:
3578     return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
3579   case ISD::VP_XOR:
3580     return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
3581   case ISD::VP_ASHR:
3582     return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
3583   case ISD::VP_LSHR:
3584     return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
3585   case ISD::VP_SHL:
3586     return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
3587   case ISD::VP_FADD:
3588     return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
3589   case ISD::VP_FSUB:
3590     return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
3591   case ISD::VP_FMUL:
3592     return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
3593   case ISD::VP_FDIV:
3594     return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
3595   }
3596 }
3597 
3598 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
3599                              SelectionDAG &DAG, unsigned Flags) {
3600   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3601 }
3602 
3603 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
3604                              SelectionDAG &DAG, unsigned Flags) {
3605   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3606                                    Flags);
3607 }
3608 
3609 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
3610                              SelectionDAG &DAG, unsigned Flags) {
3611   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3612                                    N->getOffset(), Flags);
3613 }
3614 
3615 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
3616                              SelectionDAG &DAG, unsigned Flags) {
3617   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3618 }
3619 
3620 template <class NodeTy>
3621 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3622                                      bool IsLocal) const {
3623   SDLoc DL(N);
3624   EVT Ty = getPointerTy(DAG.getDataLayout());
3625 
3626   if (isPositionIndependent()) {
3627     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3628     if (IsLocal)
3629       // Use PC-relative addressing to access the symbol. This generates the
3630       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
3631       // %pcrel_lo(auipc)).
3632       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3633 
3634     // Use PC-relative addressing to access the GOT for this symbol, then load
3635     // the address from the GOT. This generates the pattern (PseudoLA sym),
3636     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
3637     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
3638   }
3639 
3640   switch (getTargetMachine().getCodeModel()) {
3641   default:
3642     report_fatal_error("Unsupported code model for lowering");
3643   case CodeModel::Small: {
3644     // Generate a sequence for accessing addresses within the first 2 GiB of
3645     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
3646     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
3647     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
3648     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3649     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
3650   }
3651   case CodeModel::Medium: {
3652     // Generate a sequence for accessing addresses within any 2GiB range within
3653     // the address space. This generates the pattern (PseudoLLA sym), which
3654     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
3655     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3656     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3657   }
3658   }
3659 }
3660 
3661 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
3662                                                 SelectionDAG &DAG) const {
3663   SDLoc DL(Op);
3664   EVT Ty = Op.getValueType();
3665   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3666   int64_t Offset = N->getOffset();
3667   MVT XLenVT = Subtarget.getXLenVT();
3668 
3669   const GlobalValue *GV = N->getGlobal();
3670   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
3671   SDValue Addr = getAddr(N, DAG, IsLocal);
3672 
3673   // In order to maximise the opportunity for common subexpression elimination,
3674   // emit a separate ADD node for the global address offset instead of folding
3675   // it in the global address node. Later peephole optimisations may choose to
3676   // fold it back in when profitable.
3677   if (Offset != 0)
3678     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3679                        DAG.getConstant(Offset, DL, XLenVT));
3680   return Addr;
3681 }
3682 
3683 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
3684                                                SelectionDAG &DAG) const {
3685   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
3686 
3687   return getAddr(N, DAG);
3688 }
3689 
3690 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
3691                                                SelectionDAG &DAG) const {
3692   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
3693 
3694   return getAddr(N, DAG);
3695 }
3696 
3697 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
3698                                             SelectionDAG &DAG) const {
3699   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
3700 
3701   return getAddr(N, DAG);
3702 }
3703 
3704 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3705                                               SelectionDAG &DAG,
3706                                               bool UseGOT) const {
3707   SDLoc DL(N);
3708   EVT Ty = getPointerTy(DAG.getDataLayout());
3709   const GlobalValue *GV = N->getGlobal();
3710   MVT XLenVT = Subtarget.getXLenVT();
3711 
3712   if (UseGOT) {
3713     // Use PC-relative addressing to access the GOT for this TLS symbol, then
3714     // load the address from the GOT and add the thread pointer. This generates
3715     // the pattern (PseudoLA_TLS_IE sym), which expands to
3716     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
3717     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3718     SDValue Load =
3719         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
3720 
3721     // Add the thread pointer.
3722     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3723     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
3724   }
3725 
3726   // Generate a sequence for accessing the address relative to the thread
3727   // pointer, with the appropriate adjustment for the thread pointer offset.
3728   // This generates the pattern
3729   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
3730   SDValue AddrHi =
3731       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
3732   SDValue AddrAdd =
3733       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
3734   SDValue AddrLo =
3735       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
3736 
3737   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3738   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3739   SDValue MNAdd = SDValue(
3740       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
3741       0);
3742   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
3743 }
3744 
3745 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3746                                                SelectionDAG &DAG) const {
3747   SDLoc DL(N);
3748   EVT Ty = getPointerTy(DAG.getDataLayout());
3749   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3750   const GlobalValue *GV = N->getGlobal();
3751 
3752   // Use a PC-relative addressing mode to access the global dynamic GOT address.
3753   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3754   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3755   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3756   SDValue Load =
3757       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
3758 
3759   // Prepare argument list to generate call.
3760   ArgListTy Args;
3761   ArgListEntry Entry;
3762   Entry.Node = Load;
3763   Entry.Ty = CallTy;
3764   Args.push_back(Entry);
3765 
3766   // Setup call to __tls_get_addr.
3767   TargetLowering::CallLoweringInfo CLI(DAG);
3768   CLI.setDebugLoc(DL)
3769       .setChain(DAG.getEntryNode())
3770       .setLibCallee(CallingConv::C, CallTy,
3771                     DAG.getExternalSymbol("__tls_get_addr", Ty),
3772                     std::move(Args));
3773 
3774   return LowerCallTo(CLI).first;
3775 }
3776 
3777 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3778                                                    SelectionDAG &DAG) const {
3779   SDLoc DL(Op);
3780   EVT Ty = Op.getValueType();
3781   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3782   int64_t Offset = N->getOffset();
3783   MVT XLenVT = Subtarget.getXLenVT();
3784 
3785   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
3786 
3787   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3788       CallingConv::GHC)
3789     report_fatal_error("In GHC calling convention TLS is not supported");
3790 
3791   SDValue Addr;
3792   switch (Model) {
3793   case TLSModel::LocalExec:
3794     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3795     break;
3796   case TLSModel::InitialExec:
3797     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3798     break;
3799   case TLSModel::LocalDynamic:
3800   case TLSModel::GeneralDynamic:
3801     Addr = getDynamicTLSAddr(N, DAG);
3802     break;
3803   }
3804 
3805   // In order to maximise the opportunity for common subexpression elimination,
3806   // emit a separate ADD node for the global address offset instead of folding
3807   // it in the global address node. Later peephole optimisations may choose to
3808   // fold it back in when profitable.
3809   if (Offset != 0)
3810     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3811                        DAG.getConstant(Offset, DL, XLenVT));
3812   return Addr;
3813 }
3814 
3815 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3816   SDValue CondV = Op.getOperand(0);
3817   SDValue TrueV = Op.getOperand(1);
3818   SDValue FalseV = Op.getOperand(2);
3819   SDLoc DL(Op);
3820   MVT VT = Op.getSimpleValueType();
3821   MVT XLenVT = Subtarget.getXLenVT();
3822 
3823   // Lower vector SELECTs to VSELECTs by splatting the condition.
3824   if (VT.isVector()) {
3825     MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3826     SDValue CondSplat = VT.isScalableVector()
3827                             ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3828                             : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3829     return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3830   }
3831 
3832   // If the result type is XLenVT and CondV is the output of a SETCC node
3833   // which also operated on XLenVT inputs, then merge the SETCC node into the
3834   // lowered RISCVISD::SELECT_CC to take advantage of the integer
3835   // compare+branch instructions. i.e.:
3836   // (select (setcc lhs, rhs, cc), truev, falsev)
3837   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3838   if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3839       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3840     SDValue LHS = CondV.getOperand(0);
3841     SDValue RHS = CondV.getOperand(1);
3842     const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3843     ISD::CondCode CCVal = CC->get();
3844 
3845     // Special case for a select of 2 constants that have a diffence of 1.
3846     // Normally this is done by DAGCombine, but if the select is introduced by
3847     // type legalization or op legalization, we miss it. Restricting to SETLT
3848     // case for now because that is what signed saturating add/sub need.
3849     // FIXME: We don't need the condition to be SETLT or even a SETCC,
3850     // but we would probably want to swap the true/false values if the condition
3851     // is SETGE/SETLE to avoid an XORI.
3852     if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3853         CCVal == ISD::SETLT) {
3854       const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
3855       const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
3856       if (TrueVal - 1 == FalseVal)
3857         return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
3858       if (TrueVal + 1 == FalseVal)
3859         return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
3860     }
3861 
3862     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3863 
3864     SDValue TargetCC = DAG.getCondCode(CCVal);
3865     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
3866     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3867   }
3868 
3869   // Otherwise:
3870   // (select condv, truev, falsev)
3871   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
3872   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3873   SDValue SetNE = DAG.getCondCode(ISD::SETNE);
3874 
3875   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
3876 
3877   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3878 }
3879 
3880 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
3881   SDValue CondV = Op.getOperand(1);
3882   SDLoc DL(Op);
3883   MVT XLenVT = Subtarget.getXLenVT();
3884 
3885   if (CondV.getOpcode() == ISD::SETCC &&
3886       CondV.getOperand(0).getValueType() == XLenVT) {
3887     SDValue LHS = CondV.getOperand(0);
3888     SDValue RHS = CondV.getOperand(1);
3889     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
3890 
3891     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3892 
3893     SDValue TargetCC = DAG.getCondCode(CCVal);
3894     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3895                        LHS, RHS, TargetCC, Op.getOperand(2));
3896   }
3897 
3898   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3899                      CondV, DAG.getConstant(0, DL, XLenVT),
3900                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3901 }
3902 
3903 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3904   MachineFunction &MF = DAG.getMachineFunction();
3905   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
3906 
3907   SDLoc DL(Op);
3908   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3909                                  getPointerTy(MF.getDataLayout()));
3910 
3911   // vastart just stores the address of the VarArgsFrameIndex slot into the
3912   // memory location argument.
3913   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3914   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3915                       MachinePointerInfo(SV));
3916 }
3917 
3918 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3919                                             SelectionDAG &DAG) const {
3920   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3921   MachineFunction &MF = DAG.getMachineFunction();
3922   MachineFrameInfo &MFI = MF.getFrameInfo();
3923   MFI.setFrameAddressIsTaken(true);
3924   Register FrameReg = RI.getFrameRegister(MF);
3925   int XLenInBytes = Subtarget.getXLen() / 8;
3926 
3927   EVT VT = Op.getValueType();
3928   SDLoc DL(Op);
3929   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3930   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3931   while (Depth--) {
3932     int Offset = -(XLenInBytes * 2);
3933     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3934                               DAG.getIntPtrConstant(Offset, DL));
3935     FrameAddr =
3936         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3937   }
3938   return FrameAddr;
3939 }
3940 
3941 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3942                                              SelectionDAG &DAG) const {
3943   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3944   MachineFunction &MF = DAG.getMachineFunction();
3945   MachineFrameInfo &MFI = MF.getFrameInfo();
3946   MFI.setReturnAddressIsTaken(true);
3947   MVT XLenVT = Subtarget.getXLenVT();
3948   int XLenInBytes = Subtarget.getXLen() / 8;
3949 
3950   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3951     return SDValue();
3952 
3953   EVT VT = Op.getValueType();
3954   SDLoc DL(Op);
3955   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3956   if (Depth) {
3957     int Off = -XLenInBytes;
3958     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3959     SDValue Offset = DAG.getConstant(Off, DL, VT);
3960     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3961                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3962                        MachinePointerInfo());
3963   }
3964 
3965   // Return the value of the return address register, marking it an implicit
3966   // live-in.
3967   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3968   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3969 }
3970 
3971 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3972                                                  SelectionDAG &DAG) const {
3973   SDLoc DL(Op);
3974   SDValue Lo = Op.getOperand(0);
3975   SDValue Hi = Op.getOperand(1);
3976   SDValue Shamt = Op.getOperand(2);
3977   EVT VT = Lo.getValueType();
3978 
3979   // if Shamt-XLEN < 0: // Shamt < XLEN
3980   //   Lo = Lo << Shamt
3981   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
3982   // else:
3983   //   Lo = 0
3984   //   Hi = Lo << (Shamt-XLEN)
3985 
3986   SDValue Zero = DAG.getConstant(0, DL, VT);
3987   SDValue One = DAG.getConstant(1, DL, VT);
3988   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3989   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3990   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3991   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3992 
3993   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3994   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3995   SDValue ShiftRightLo =
3996       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
3997   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3998   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3999   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
4000 
4001   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
4002 
4003   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4004   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4005 
4006   SDValue Parts[2] = {Lo, Hi};
4007   return DAG.getMergeValues(Parts, DL);
4008 }
4009 
4010 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
4011                                                   bool IsSRA) const {
4012   SDLoc DL(Op);
4013   SDValue Lo = Op.getOperand(0);
4014   SDValue Hi = Op.getOperand(1);
4015   SDValue Shamt = Op.getOperand(2);
4016   EVT VT = Lo.getValueType();
4017 
4018   // SRA expansion:
4019   //   if Shamt-XLEN < 0: // Shamt < XLEN
4020   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
4021   //     Hi = Hi >>s Shamt
4022   //   else:
4023   //     Lo = Hi >>s (Shamt-XLEN);
4024   //     Hi = Hi >>s (XLEN-1)
4025   //
4026   // SRL expansion:
4027   //   if Shamt-XLEN < 0: // Shamt < XLEN
4028   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
4029   //     Hi = Hi >>u Shamt
4030   //   else:
4031   //     Lo = Hi >>u (Shamt-XLEN);
4032   //     Hi = 0;
4033 
4034   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4035 
4036   SDValue Zero = DAG.getConstant(0, DL, VT);
4037   SDValue One = DAG.getConstant(1, DL, VT);
4038   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
4039   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
4040   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
4041   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
4042 
4043   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4044   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4045   SDValue ShiftLeftHi =
4046       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
4047   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4048   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4049   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
4050   SDValue HiFalse =
4051       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
4052 
4053   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
4054 
4055   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4056   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4057 
4058   SDValue Parts[2] = {Lo, Hi};
4059   return DAG.getMergeValues(Parts, DL);
4060 }
4061 
4062 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
4063 // legal equivalently-sized i8 type, so we can use that as a go-between.
4064 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
4065                                                   SelectionDAG &DAG) const {
4066   SDLoc DL(Op);
4067   MVT VT = Op.getSimpleValueType();
4068   SDValue SplatVal = Op.getOperand(0);
4069   // All-zeros or all-ones splats are handled specially.
4070   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
4071     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
4072     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
4073   }
4074   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
4075     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
4076     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
4077   }
4078   MVT XLenVT = Subtarget.getXLenVT();
4079   assert(SplatVal.getValueType() == XLenVT &&
4080          "Unexpected type for i1 splat value");
4081   MVT InterVT = VT.changeVectorElementType(MVT::i8);
4082   SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
4083                          DAG.getConstant(1, DL, XLenVT));
4084   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
4085   SDValue Zero = DAG.getConstant(0, DL, InterVT);
4086   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
4087 }
4088 
4089 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
4090 // illegal (currently only vXi64 RV32).
4091 // FIXME: We could also catch non-constant sign-extended i32 values and lower
4092 // them to VMV_V_X_VL.
4093 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
4094                                                      SelectionDAG &DAG) const {
4095   SDLoc DL(Op);
4096   MVT VecVT = Op.getSimpleValueType();
4097   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
4098          "Unexpected SPLAT_VECTOR_PARTS lowering");
4099 
4100   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
4101   SDValue Lo = Op.getOperand(0);
4102   SDValue Hi = Op.getOperand(1);
4103 
4104   if (VecVT.isFixedLengthVector()) {
4105     MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4106     SDLoc DL(Op);
4107     SDValue Mask, VL;
4108     std::tie(Mask, VL) =
4109         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4110 
4111     SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
4112     return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
4113   }
4114 
4115   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4116     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4117     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4118     // If Hi constant is all the same sign bit as Lo, lower this as a custom
4119     // node in order to try and match RVV vector/scalar instructions.
4120     if ((LoC >> 31) == HiC)
4121       return DAG.getNode(
4122           RISCVISD::VMV_V_X_VL, DL, VecVT, Lo,
4123           DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i32));
4124   }
4125 
4126   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4127   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4128       isa<ConstantSDNode>(Hi.getOperand(1)) &&
4129       Hi.getConstantOperandVal(1) == 31)
4130     return DAG.getNode(
4131         RISCVISD::VMV_V_X_VL, DL, VecVT, Lo,
4132         DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i32));
4133 
4134   // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
4135   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
4136                      DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i32));
4137 }
4138 
4139 // Custom-lower extensions from mask vectors by using a vselect either with 1
4140 // for zero/any-extension or -1 for sign-extension:
4141 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
4142 // Note that any-extension is lowered identically to zero-extension.
4143 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
4144                                                 int64_t ExtTrueVal) const {
4145   SDLoc DL(Op);
4146   MVT VecVT = Op.getSimpleValueType();
4147   SDValue Src = Op.getOperand(0);
4148   // Only custom-lower extensions from mask types
4149   assert(Src.getValueType().isVector() &&
4150          Src.getValueType().getVectorElementType() == MVT::i1);
4151 
4152   MVT XLenVT = Subtarget.getXLenVT();
4153   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
4154   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
4155 
4156   if (VecVT.isScalableVector()) {
4157     // Be careful not to introduce illegal scalar types at this stage, and be
4158     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
4159     // illegal and must be expanded. Since we know that the constants are
4160     // sign-extended 32-bit values, we use VMV_V_X_VL directly.
4161     bool IsRV32E64 =
4162         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
4163 
4164     if (!IsRV32E64) {
4165       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
4166       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
4167     } else {
4168       SplatZero =
4169           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, SplatZero,
4170                       DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT));
4171       SplatTrueVal =
4172           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, SplatTrueVal,
4173                       DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT));
4174     }
4175 
4176     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
4177   }
4178 
4179   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4180   MVT I1ContainerVT =
4181       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4182 
4183   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
4184 
4185   SDValue Mask, VL;
4186   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4187 
4188   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
4189   SplatTrueVal =
4190       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
4191   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
4192                                SplatTrueVal, SplatZero, VL);
4193 
4194   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
4195 }
4196 
4197 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
4198     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
4199   MVT ExtVT = Op.getSimpleValueType();
4200   // Only custom-lower extensions from fixed-length vector types.
4201   if (!ExtVT.isFixedLengthVector())
4202     return Op;
4203   MVT VT = Op.getOperand(0).getSimpleValueType();
4204   // Grab the canonical container type for the extended type. Infer the smaller
4205   // type from that to ensure the same number of vector elements, as we know
4206   // the LMUL will be sufficient to hold the smaller type.
4207   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
4208   // Get the extended container type manually to ensure the same number of
4209   // vector elements between source and dest.
4210   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
4211                                      ContainerExtVT.getVectorElementCount());
4212 
4213   SDValue Op1 =
4214       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
4215 
4216   SDLoc DL(Op);
4217   SDValue Mask, VL;
4218   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4219 
4220   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
4221 
4222   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
4223 }
4224 
4225 // Custom-lower truncations from vectors to mask vectors by using a mask and a
4226 // setcc operation:
4227 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
4228 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
4229                                                   SelectionDAG &DAG) const {
4230   SDLoc DL(Op);
4231   EVT MaskVT = Op.getValueType();
4232   // Only expect to custom-lower truncations to mask types
4233   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
4234          "Unexpected type for vector mask lowering");
4235   SDValue Src = Op.getOperand(0);
4236   MVT VecVT = Src.getSimpleValueType();
4237 
4238   // If this is a fixed vector, we need to convert it to a scalable vector.
4239   MVT ContainerVT = VecVT;
4240   if (VecVT.isFixedLengthVector()) {
4241     ContainerVT = getContainerForFixedLengthVector(VecVT);
4242     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4243   }
4244 
4245   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
4246   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
4247 
4248   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
4249   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
4250 
4251   if (VecVT.isScalableVector()) {
4252     SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
4253     return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
4254   }
4255 
4256   SDValue Mask, VL;
4257   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4258 
4259   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4260   SDValue Trunc =
4261       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
4262   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
4263                       DAG.getCondCode(ISD::SETNE), Mask, VL);
4264   return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
4265 }
4266 
4267 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
4268 // first position of a vector, and that vector is slid up to the insert index.
4269 // By limiting the active vector length to index+1 and merging with the
4270 // original vector (with an undisturbed tail policy for elements >= VL), we
4271 // achieve the desired result of leaving all elements untouched except the one
4272 // at VL-1, which is replaced with the desired value.
4273 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4274                                                     SelectionDAG &DAG) const {
4275   SDLoc DL(Op);
4276   MVT VecVT = Op.getSimpleValueType();
4277   SDValue Vec = Op.getOperand(0);
4278   SDValue Val = Op.getOperand(1);
4279   SDValue Idx = Op.getOperand(2);
4280 
4281   if (VecVT.getVectorElementType() == MVT::i1) {
4282     // FIXME: For now we just promote to an i8 vector and insert into that,
4283     // but this is probably not optimal.
4284     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
4285     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
4286     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
4287     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
4288   }
4289 
4290   MVT ContainerVT = VecVT;
4291   // If the operand is a fixed-length vector, convert to a scalable one.
4292   if (VecVT.isFixedLengthVector()) {
4293     ContainerVT = getContainerForFixedLengthVector(VecVT);
4294     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4295   }
4296 
4297   MVT XLenVT = Subtarget.getXLenVT();
4298 
4299   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
4300   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
4301   // Even i64-element vectors on RV32 can be lowered without scalar
4302   // legalization if the most-significant 32 bits of the value are not affected
4303   // by the sign-extension of the lower 32 bits.
4304   // TODO: We could also catch sign extensions of a 32-bit value.
4305   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
4306     const auto *CVal = cast<ConstantSDNode>(Val);
4307     if (isInt<32>(CVal->getSExtValue())) {
4308       IsLegalInsert = true;
4309       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
4310     }
4311   }
4312 
4313   SDValue Mask, VL;
4314   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4315 
4316   SDValue ValInVec;
4317 
4318   if (IsLegalInsert) {
4319     unsigned Opc =
4320         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4321     if (isNullConstant(Idx)) {
4322       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
4323       if (!VecVT.isFixedLengthVector())
4324         return Vec;
4325       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
4326     }
4327     ValInVec =
4328         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
4329   } else {
4330     // On RV32, i64-element vectors must be specially handled to place the
4331     // value at element 0, by using two vslide1up instructions in sequence on
4332     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
4333     // this.
4334     SDValue One = DAG.getConstant(1, DL, XLenVT);
4335     SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
4336     SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
4337     MVT I32ContainerVT =
4338         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
4339     SDValue I32Mask =
4340         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
4341     // Limit the active VL to two.
4342     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
4343     // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
4344     // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
4345     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
4346                            InsertI64VL);
4347     // First slide in the hi value, then the lo in underneath it.
4348     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
4349                            ValHi, I32Mask, InsertI64VL);
4350     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
4351                            ValLo, I32Mask, InsertI64VL);
4352     // Bitcast back to the right container type.
4353     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
4354   }
4355 
4356   // Now that the value is in a vector, slide it into position.
4357   SDValue InsertVL =
4358       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
4359   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
4360                                 ValInVec, Idx, Mask, InsertVL);
4361   if (!VecVT.isFixedLengthVector())
4362     return Slideup;
4363   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
4364 }
4365 
4366 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
4367 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
4368 // types this is done using VMV_X_S to allow us to glean information about the
4369 // sign bits of the result.
4370 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4371                                                      SelectionDAG &DAG) const {
4372   SDLoc DL(Op);
4373   SDValue Idx = Op.getOperand(1);
4374   SDValue Vec = Op.getOperand(0);
4375   EVT EltVT = Op.getValueType();
4376   MVT VecVT = Vec.getSimpleValueType();
4377   MVT XLenVT = Subtarget.getXLenVT();
4378 
4379   if (VecVT.getVectorElementType() == MVT::i1) {
4380     if (VecVT.isFixedLengthVector()) {
4381       unsigned NumElts = VecVT.getVectorNumElements();
4382       if (NumElts >= 8) {
4383         MVT WideEltVT;
4384         unsigned WidenVecLen;
4385         SDValue ExtractElementIdx;
4386         SDValue ExtractBitIdx;
4387         unsigned MaxEEW = Subtarget.getMaxELENForFixedLengthVectors();
4388         MVT LargestEltVT = MVT::getIntegerVT(
4389             std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
4390         if (NumElts <= LargestEltVT.getSizeInBits()) {
4391           assert(isPowerOf2_32(NumElts) &&
4392                  "the number of elements should be power of 2");
4393           WideEltVT = MVT::getIntegerVT(NumElts);
4394           WidenVecLen = 1;
4395           ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
4396           ExtractBitIdx = Idx;
4397         } else {
4398           WideEltVT = LargestEltVT;
4399           WidenVecLen = NumElts / WideEltVT.getSizeInBits();
4400           // extract element index = index / element width
4401           ExtractElementIdx = DAG.getNode(
4402               ISD::SRL, DL, XLenVT, Idx,
4403               DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
4404           // mask bit index = index % element width
4405           ExtractBitIdx = DAG.getNode(
4406               ISD::AND, DL, XLenVT, Idx,
4407               DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
4408         }
4409         MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
4410         Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
4411         SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
4412                                          Vec, ExtractElementIdx);
4413         // Extract the bit from GPR.
4414         SDValue ShiftRight =
4415             DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
4416         return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
4417                            DAG.getConstant(1, DL, XLenVT));
4418       }
4419     }
4420     // Otherwise, promote to an i8 vector and extract from that.
4421     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
4422     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
4423     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
4424   }
4425 
4426   // If this is a fixed vector, we need to convert it to a scalable vector.
4427   MVT ContainerVT = VecVT;
4428   if (VecVT.isFixedLengthVector()) {
4429     ContainerVT = getContainerForFixedLengthVector(VecVT);
4430     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4431   }
4432 
4433   // If the index is 0, the vector is already in the right position.
4434   if (!isNullConstant(Idx)) {
4435     // Use a VL of 1 to avoid processing more elements than we need.
4436     SDValue VL = DAG.getConstant(1, DL, XLenVT);
4437     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4438     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4439     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
4440                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
4441   }
4442 
4443   if (!EltVT.isInteger()) {
4444     // Floating-point extracts are handled in TableGen.
4445     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
4446                        DAG.getConstant(0, DL, XLenVT));
4447   }
4448 
4449   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
4450   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
4451 }
4452 
4453 // Some RVV intrinsics may claim that they want an integer operand to be
4454 // promoted or expanded.
4455 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
4456                                           const RISCVSubtarget &Subtarget) {
4457   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4458           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
4459          "Unexpected opcode");
4460 
4461   if (!Subtarget.hasVInstructions())
4462     return SDValue();
4463 
4464   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
4465   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
4466   SDLoc DL(Op);
4467 
4468   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
4469       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
4470   if (!II || !II->hasSplatOperand())
4471     return SDValue();
4472 
4473   unsigned SplatOp = II->SplatOperand + 1 + HasChain;
4474   assert(SplatOp < Op.getNumOperands());
4475 
4476   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
4477   SDValue &ScalarOp = Operands[SplatOp];
4478   MVT OpVT = ScalarOp.getSimpleValueType();
4479   MVT XLenVT = Subtarget.getXLenVT();
4480 
4481   // If this isn't a scalar, or its type is XLenVT we're done.
4482   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
4483     return SDValue();
4484 
4485   // Simplest case is that the operand needs to be promoted to XLenVT.
4486   if (OpVT.bitsLT(XLenVT)) {
4487     // If the operand is a constant, sign extend to increase our chances
4488     // of being able to use a .vi instruction. ANY_EXTEND would become a
4489     // a zero extend and the simm5 check in isel would fail.
4490     // FIXME: Should we ignore the upper bits in isel instead?
4491     unsigned ExtOpc =
4492         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4493     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
4494     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4495   }
4496 
4497   // Use the previous operand to get the vXi64 VT. The result might be a mask
4498   // VT for compares. Using the previous operand assumes that the previous
4499   // operand will never have a smaller element size than a scalar operand and
4500   // that a widening operation never uses SEW=64.
4501   // NOTE: If this fails the below assert, we can probably just find the
4502   // element count from any operand or result and use it to construct the VT.
4503   assert(II->SplatOperand > 0 && "Unexpected splat operand!");
4504   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
4505 
4506   // The more complex case is when the scalar is larger than XLenVT.
4507   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
4508          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
4509 
4510   // If this is a sign-extended 32-bit constant, we can truncate it and rely
4511   // on the instruction to sign-extend since SEW>XLEN.
4512   if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
4513     if (isInt<32>(CVal->getSExtValue())) {
4514       ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
4515       return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4516     }
4517   }
4518 
4519   // We need to convert the scalar to a splat vector.
4520   // FIXME: Can we implicitly truncate the scalar if it is known to
4521   // be sign extended?
4522   SDValue VL = getVLOperand(Op);
4523   assert(VL.getValueType() == XLenVT);
4524   ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
4525   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4526 }
4527 
4528 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
4529                                                      SelectionDAG &DAG) const {
4530   unsigned IntNo = Op.getConstantOperandVal(0);
4531   SDLoc DL(Op);
4532   MVT XLenVT = Subtarget.getXLenVT();
4533 
4534   switch (IntNo) {
4535   default:
4536     break; // Don't custom lower most intrinsics.
4537   case Intrinsic::thread_pointer: {
4538     EVT PtrVT = getPointerTy(DAG.getDataLayout());
4539     return DAG.getRegister(RISCV::X4, PtrVT);
4540   }
4541   case Intrinsic::riscv_orc_b:
4542   case Intrinsic::riscv_brev8: {
4543     // Lower to the GORCI encoding for orc.b or the GREVI encoding for brev8.
4544     unsigned Opc =
4545         IntNo == Intrinsic::riscv_brev8 ? RISCVISD::GREV : RISCVISD::GORC;
4546     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
4547                        DAG.getConstant(7, DL, XLenVT));
4548   }
4549   case Intrinsic::riscv_grev:
4550   case Intrinsic::riscv_gorc: {
4551     unsigned Opc =
4552         IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
4553     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4554   }
4555   case Intrinsic::riscv_zip:
4556   case Intrinsic::riscv_unzip: {
4557     // Lower to the SHFLI encoding for zip or the UNSHFLI encoding for unzip.
4558     // For i32 the immdiate is 15. For i64 the immediate is 31.
4559     unsigned Opc =
4560         IntNo == Intrinsic::riscv_zip ? RISCVISD::SHFL : RISCVISD::UNSHFL;
4561     unsigned BitWidth = Op.getValueSizeInBits();
4562     assert(isPowerOf2_32(BitWidth) && BitWidth >= 2 && "Unexpected bit width");
4563     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
4564                        DAG.getConstant((BitWidth / 2) - 1, DL, XLenVT));
4565   }
4566   case Intrinsic::riscv_shfl:
4567   case Intrinsic::riscv_unshfl: {
4568     unsigned Opc =
4569         IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
4570     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4571   }
4572   case Intrinsic::riscv_bcompress:
4573   case Intrinsic::riscv_bdecompress: {
4574     unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
4575                                                        : RISCVISD::BDECOMPRESS;
4576     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4577   }
4578   case Intrinsic::riscv_bfp:
4579     return DAG.getNode(RISCVISD::BFP, DL, XLenVT, Op.getOperand(1),
4580                        Op.getOperand(2));
4581   case Intrinsic::riscv_fsl:
4582     return DAG.getNode(RISCVISD::FSL, DL, XLenVT, Op.getOperand(1),
4583                        Op.getOperand(2), Op.getOperand(3));
4584   case Intrinsic::riscv_fsr:
4585     return DAG.getNode(RISCVISD::FSR, DL, XLenVT, Op.getOperand(1),
4586                        Op.getOperand(2), Op.getOperand(3));
4587   case Intrinsic::riscv_vmv_x_s:
4588     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
4589     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
4590                        Op.getOperand(1));
4591   case Intrinsic::riscv_vmv_v_x:
4592     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
4593                             Op.getSimpleValueType(), DL, DAG, Subtarget);
4594   case Intrinsic::riscv_vfmv_v_f:
4595     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
4596                        Op.getOperand(1), Op.getOperand(2));
4597   case Intrinsic::riscv_vmv_s_x: {
4598     SDValue Scalar = Op.getOperand(2);
4599 
4600     if (Scalar.getValueType().bitsLE(XLenVT)) {
4601       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
4602       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
4603                          Op.getOperand(1), Scalar, Op.getOperand(3));
4604     }
4605 
4606     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
4607 
4608     // This is an i64 value that lives in two scalar registers. We have to
4609     // insert this in a convoluted way. First we build vXi64 splat containing
4610     // the/ two values that we assemble using some bit math. Next we'll use
4611     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
4612     // to merge element 0 from our splat into the source vector.
4613     // FIXME: This is probably not the best way to do this, but it is
4614     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
4615     // point.
4616     //   sw lo, (a0)
4617     //   sw hi, 4(a0)
4618     //   vlse vX, (a0)
4619     //
4620     //   vid.v      vVid
4621     //   vmseq.vx   mMask, vVid, 0
4622     //   vmerge.vvm vDest, vSrc, vVal, mMask
4623     MVT VT = Op.getSimpleValueType();
4624     SDValue Vec = Op.getOperand(1);
4625     SDValue VL = getVLOperand(Op);
4626 
4627     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
4628     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
4629                                       DAG.getConstant(0, DL, MVT::i32), VL);
4630 
4631     MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
4632     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4633     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
4634     SDValue SelectCond =
4635         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
4636                     DAG.getCondCode(ISD::SETEQ), Mask, VL);
4637     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
4638                        Vec, VL);
4639   }
4640   case Intrinsic::riscv_vslide1up:
4641   case Intrinsic::riscv_vslide1down:
4642   case Intrinsic::riscv_vslide1up_mask:
4643   case Intrinsic::riscv_vslide1down_mask: {
4644     // We need to special case these when the scalar is larger than XLen.
4645     unsigned NumOps = Op.getNumOperands();
4646     bool IsMasked = NumOps == 7;
4647     unsigned OpOffset = IsMasked ? 1 : 0;
4648     SDValue Scalar = Op.getOperand(2 + OpOffset);
4649     if (Scalar.getValueType().bitsLE(XLenVT))
4650       break;
4651 
4652     // Splatting a sign extended constant is fine.
4653     if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
4654       if (isInt<32>(CVal->getSExtValue()))
4655         break;
4656 
4657     MVT VT = Op.getSimpleValueType();
4658     assert(VT.getVectorElementType() == MVT::i64 &&
4659            Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
4660 
4661     // Convert the vector source to the equivalent nxvXi32 vector.
4662     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4663     SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
4664 
4665     SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
4666                                    DAG.getConstant(0, DL, XLenVT));
4667     SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
4668                                    DAG.getConstant(1, DL, XLenVT));
4669 
4670     // Double the VL since we halved SEW.
4671     SDValue VL = getVLOperand(Op);
4672     SDValue I32VL =
4673         DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
4674 
4675     MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
4676     SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
4677 
4678     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
4679     // instructions.
4680     if (IntNo == Intrinsic::riscv_vslide1up ||
4681         IntNo == Intrinsic::riscv_vslide1up_mask) {
4682       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
4683                         I32Mask, I32VL);
4684       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
4685                         I32Mask, I32VL);
4686     } else {
4687       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
4688                         I32Mask, I32VL);
4689       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
4690                         I32Mask, I32VL);
4691     }
4692 
4693     // Convert back to nxvXi64.
4694     Vec = DAG.getBitcast(VT, Vec);
4695 
4696     if (!IsMasked)
4697       return Vec;
4698 
4699     // Apply mask after the operation.
4700     SDValue Mask = Op.getOperand(NumOps - 3);
4701     SDValue MaskedOff = Op.getOperand(1);
4702     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
4703   }
4704   }
4705 
4706   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
4707 }
4708 
4709 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
4710                                                     SelectionDAG &DAG) const {
4711   unsigned IntNo = Op.getConstantOperandVal(1);
4712   switch (IntNo) {
4713   default:
4714     break;
4715   case Intrinsic::riscv_masked_strided_load: {
4716     SDLoc DL(Op);
4717     MVT XLenVT = Subtarget.getXLenVT();
4718 
4719     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4720     // the selection of the masked intrinsics doesn't do this for us.
4721     SDValue Mask = Op.getOperand(5);
4722     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4723 
4724     MVT VT = Op->getSimpleValueType(0);
4725     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4726 
4727     SDValue PassThru = Op.getOperand(2);
4728     if (!IsUnmasked) {
4729       MVT MaskVT =
4730           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4731       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4732       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4733     }
4734 
4735     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4736 
4737     SDValue IntID = DAG.getTargetConstant(
4738         IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
4739         XLenVT);
4740 
4741     auto *Load = cast<MemIntrinsicSDNode>(Op);
4742     SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
4743     if (IsUnmasked)
4744       Ops.push_back(DAG.getUNDEF(ContainerVT));
4745     else
4746       Ops.push_back(PassThru);
4747     Ops.push_back(Op.getOperand(3)); // Ptr
4748     Ops.push_back(Op.getOperand(4)); // Stride
4749     if (!IsUnmasked)
4750       Ops.push_back(Mask);
4751     Ops.push_back(VL);
4752     if (!IsUnmasked) {
4753       SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
4754       Ops.push_back(Policy);
4755     }
4756 
4757     SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4758     SDValue Result =
4759         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
4760                                 Load->getMemoryVT(), Load->getMemOperand());
4761     SDValue Chain = Result.getValue(1);
4762     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4763     return DAG.getMergeValues({Result, Chain}, DL);
4764   }
4765   }
4766 
4767   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
4768 }
4769 
4770 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
4771                                                  SelectionDAG &DAG) const {
4772   unsigned IntNo = Op.getConstantOperandVal(1);
4773   switch (IntNo) {
4774   default:
4775     break;
4776   case Intrinsic::riscv_masked_strided_store: {
4777     SDLoc DL(Op);
4778     MVT XLenVT = Subtarget.getXLenVT();
4779 
4780     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4781     // the selection of the masked intrinsics doesn't do this for us.
4782     SDValue Mask = Op.getOperand(5);
4783     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4784 
4785     SDValue Val = Op.getOperand(2);
4786     MVT VT = Val.getSimpleValueType();
4787     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4788 
4789     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
4790     if (!IsUnmasked) {
4791       MVT MaskVT =
4792           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4793       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4794     }
4795 
4796     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4797 
4798     SDValue IntID = DAG.getTargetConstant(
4799         IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
4800         XLenVT);
4801 
4802     auto *Store = cast<MemIntrinsicSDNode>(Op);
4803     SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
4804     Ops.push_back(Val);
4805     Ops.push_back(Op.getOperand(3)); // Ptr
4806     Ops.push_back(Op.getOperand(4)); // Stride
4807     if (!IsUnmasked)
4808       Ops.push_back(Mask);
4809     Ops.push_back(VL);
4810 
4811     return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
4812                                    Ops, Store->getMemoryVT(),
4813                                    Store->getMemOperand());
4814   }
4815   }
4816 
4817   return SDValue();
4818 }
4819 
4820 static MVT getLMUL1VT(MVT VT) {
4821   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
4822          "Unexpected vector MVT");
4823   return MVT::getScalableVectorVT(
4824       VT.getVectorElementType(),
4825       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
4826 }
4827 
4828 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
4829   switch (ISDOpcode) {
4830   default:
4831     llvm_unreachable("Unhandled reduction");
4832   case ISD::VECREDUCE_ADD:
4833     return RISCVISD::VECREDUCE_ADD_VL;
4834   case ISD::VECREDUCE_UMAX:
4835     return RISCVISD::VECREDUCE_UMAX_VL;
4836   case ISD::VECREDUCE_SMAX:
4837     return RISCVISD::VECREDUCE_SMAX_VL;
4838   case ISD::VECREDUCE_UMIN:
4839     return RISCVISD::VECREDUCE_UMIN_VL;
4840   case ISD::VECREDUCE_SMIN:
4841     return RISCVISD::VECREDUCE_SMIN_VL;
4842   case ISD::VECREDUCE_AND:
4843     return RISCVISD::VECREDUCE_AND_VL;
4844   case ISD::VECREDUCE_OR:
4845     return RISCVISD::VECREDUCE_OR_VL;
4846   case ISD::VECREDUCE_XOR:
4847     return RISCVISD::VECREDUCE_XOR_VL;
4848   }
4849 }
4850 
4851 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
4852                                                          SelectionDAG &DAG,
4853                                                          bool IsVP) const {
4854   SDLoc DL(Op);
4855   SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
4856   MVT VecVT = Vec.getSimpleValueType();
4857   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
4858           Op.getOpcode() == ISD::VECREDUCE_OR ||
4859           Op.getOpcode() == ISD::VECREDUCE_XOR ||
4860           Op.getOpcode() == ISD::VP_REDUCE_AND ||
4861           Op.getOpcode() == ISD::VP_REDUCE_OR ||
4862           Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
4863          "Unexpected reduction lowering");
4864 
4865   MVT XLenVT = Subtarget.getXLenVT();
4866   assert(Op.getValueType() == XLenVT &&
4867          "Expected reduction output to be legalized to XLenVT");
4868 
4869   MVT ContainerVT = VecVT;
4870   if (VecVT.isFixedLengthVector()) {
4871     ContainerVT = getContainerForFixedLengthVector(VecVT);
4872     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4873   }
4874 
4875   SDValue Mask, VL;
4876   if (IsVP) {
4877     Mask = Op.getOperand(2);
4878     VL = Op.getOperand(3);
4879   } else {
4880     std::tie(Mask, VL) =
4881         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4882   }
4883 
4884   unsigned BaseOpc;
4885   ISD::CondCode CC;
4886   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
4887 
4888   switch (Op.getOpcode()) {
4889   default:
4890     llvm_unreachable("Unhandled reduction");
4891   case ISD::VECREDUCE_AND:
4892   case ISD::VP_REDUCE_AND: {
4893     // vcpop ~x == 0
4894     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4895     Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
4896     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
4897     CC = ISD::SETEQ;
4898     BaseOpc = ISD::AND;
4899     break;
4900   }
4901   case ISD::VECREDUCE_OR:
4902   case ISD::VP_REDUCE_OR:
4903     // vcpop x != 0
4904     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
4905     CC = ISD::SETNE;
4906     BaseOpc = ISD::OR;
4907     break;
4908   case ISD::VECREDUCE_XOR:
4909   case ISD::VP_REDUCE_XOR: {
4910     // ((vcpop x) & 1) != 0
4911     SDValue One = DAG.getConstant(1, DL, XLenVT);
4912     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
4913     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
4914     CC = ISD::SETNE;
4915     BaseOpc = ISD::XOR;
4916     break;
4917   }
4918   }
4919 
4920   SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
4921 
4922   if (!IsVP)
4923     return SetCC;
4924 
4925   // Now include the start value in the operation.
4926   // Note that we must return the start value when no elements are operated
4927   // upon. The vcpop instructions we've emitted in each case above will return
4928   // 0 for an inactive vector, and so we've already received the neutral value:
4929   // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
4930   // can simply include the start value.
4931   return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
4932 }
4933 
4934 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
4935                                             SelectionDAG &DAG) const {
4936   SDLoc DL(Op);
4937   SDValue Vec = Op.getOperand(0);
4938   EVT VecEVT = Vec.getValueType();
4939 
4940   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
4941 
4942   // Due to ordering in legalize types we may have a vector type that needs to
4943   // be split. Do that manually so we can get down to a legal type.
4944   while (getTypeAction(*DAG.getContext(), VecEVT) ==
4945          TargetLowering::TypeSplitVector) {
4946     SDValue Lo, Hi;
4947     std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
4948     VecEVT = Lo.getValueType();
4949     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
4950   }
4951 
4952   // TODO: The type may need to be widened rather than split. Or widened before
4953   // it can be split.
4954   if (!isTypeLegal(VecEVT))
4955     return SDValue();
4956 
4957   MVT VecVT = VecEVT.getSimpleVT();
4958   MVT VecEltVT = VecVT.getVectorElementType();
4959   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
4960 
4961   MVT ContainerVT = VecVT;
4962   if (VecVT.isFixedLengthVector()) {
4963     ContainerVT = getContainerForFixedLengthVector(VecVT);
4964     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4965   }
4966 
4967   MVT M1VT = getLMUL1VT(ContainerVT);
4968   MVT XLenVT = Subtarget.getXLenVT();
4969 
4970   SDValue Mask, VL;
4971   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4972 
4973   SDValue NeutralElem =
4974       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
4975   SDValue IdentitySplat = lowerScalarSplat(
4976       NeutralElem, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
4977   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
4978                                   IdentitySplat, Mask, VL);
4979   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
4980                              DAG.getConstant(0, DL, XLenVT));
4981   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
4982 }
4983 
4984 // Given a reduction op, this function returns the matching reduction opcode,
4985 // the vector SDValue and the scalar SDValue required to lower this to a
4986 // RISCVISD node.
4987 static std::tuple<unsigned, SDValue, SDValue>
4988 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
4989   SDLoc DL(Op);
4990   auto Flags = Op->getFlags();
4991   unsigned Opcode = Op.getOpcode();
4992   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
4993   switch (Opcode) {
4994   default:
4995     llvm_unreachable("Unhandled reduction");
4996   case ISD::VECREDUCE_FADD: {
4997     // Use positive zero if we can. It is cheaper to materialize.
4998     SDValue Zero =
4999         DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
5000     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
5001   }
5002   case ISD::VECREDUCE_SEQ_FADD:
5003     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
5004                            Op.getOperand(0));
5005   case ISD::VECREDUCE_FMIN:
5006     return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
5007                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
5008   case ISD::VECREDUCE_FMAX:
5009     return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
5010                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
5011   }
5012 }
5013 
5014 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
5015                                               SelectionDAG &DAG) const {
5016   SDLoc DL(Op);
5017   MVT VecEltVT = Op.getSimpleValueType();
5018 
5019   unsigned RVVOpcode;
5020   SDValue VectorVal, ScalarVal;
5021   std::tie(RVVOpcode, VectorVal, ScalarVal) =
5022       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
5023   MVT VecVT = VectorVal.getSimpleValueType();
5024 
5025   MVT ContainerVT = VecVT;
5026   if (VecVT.isFixedLengthVector()) {
5027     ContainerVT = getContainerForFixedLengthVector(VecVT);
5028     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
5029   }
5030 
5031   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
5032   MVT XLenVT = Subtarget.getXLenVT();
5033 
5034   SDValue Mask, VL;
5035   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
5036 
5037   SDValue ScalarSplat = lowerScalarSplat(
5038       ScalarVal, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
5039   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
5040                                   VectorVal, ScalarSplat, Mask, VL);
5041   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
5042                      DAG.getConstant(0, DL, XLenVT));
5043 }
5044 
5045 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
5046   switch (ISDOpcode) {
5047   default:
5048     llvm_unreachable("Unhandled reduction");
5049   case ISD::VP_REDUCE_ADD:
5050     return RISCVISD::VECREDUCE_ADD_VL;
5051   case ISD::VP_REDUCE_UMAX:
5052     return RISCVISD::VECREDUCE_UMAX_VL;
5053   case ISD::VP_REDUCE_SMAX:
5054     return RISCVISD::VECREDUCE_SMAX_VL;
5055   case ISD::VP_REDUCE_UMIN:
5056     return RISCVISD::VECREDUCE_UMIN_VL;
5057   case ISD::VP_REDUCE_SMIN:
5058     return RISCVISD::VECREDUCE_SMIN_VL;
5059   case ISD::VP_REDUCE_AND:
5060     return RISCVISD::VECREDUCE_AND_VL;
5061   case ISD::VP_REDUCE_OR:
5062     return RISCVISD::VECREDUCE_OR_VL;
5063   case ISD::VP_REDUCE_XOR:
5064     return RISCVISD::VECREDUCE_XOR_VL;
5065   case ISD::VP_REDUCE_FADD:
5066     return RISCVISD::VECREDUCE_FADD_VL;
5067   case ISD::VP_REDUCE_SEQ_FADD:
5068     return RISCVISD::VECREDUCE_SEQ_FADD_VL;
5069   case ISD::VP_REDUCE_FMAX:
5070     return RISCVISD::VECREDUCE_FMAX_VL;
5071   case ISD::VP_REDUCE_FMIN:
5072     return RISCVISD::VECREDUCE_FMIN_VL;
5073   }
5074 }
5075 
5076 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
5077                                            SelectionDAG &DAG) const {
5078   SDLoc DL(Op);
5079   SDValue Vec = Op.getOperand(1);
5080   EVT VecEVT = Vec.getValueType();
5081 
5082   // TODO: The type may need to be widened rather than split. Or widened before
5083   // it can be split.
5084   if (!isTypeLegal(VecEVT))
5085     return SDValue();
5086 
5087   MVT VecVT = VecEVT.getSimpleVT();
5088   MVT VecEltVT = VecVT.getVectorElementType();
5089   unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
5090 
5091   MVT ContainerVT = VecVT;
5092   if (VecVT.isFixedLengthVector()) {
5093     ContainerVT = getContainerForFixedLengthVector(VecVT);
5094     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5095   }
5096 
5097   SDValue VL = Op.getOperand(3);
5098   SDValue Mask = Op.getOperand(2);
5099 
5100   MVT M1VT = getLMUL1VT(ContainerVT);
5101   MVT XLenVT = Subtarget.getXLenVT();
5102   MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
5103 
5104   SDValue StartSplat =
5105       lowerScalarSplat(Op.getOperand(0), DAG.getConstant(1, DL, XLenVT), M1VT,
5106                        DL, DAG, Subtarget);
5107   SDValue Reduction =
5108       DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
5109   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
5110                              DAG.getConstant(0, DL, XLenVT));
5111   if (!VecVT.isInteger())
5112     return Elt0;
5113   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
5114 }
5115 
5116 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
5117                                                    SelectionDAG &DAG) const {
5118   SDValue Vec = Op.getOperand(0);
5119   SDValue SubVec = Op.getOperand(1);
5120   MVT VecVT = Vec.getSimpleValueType();
5121   MVT SubVecVT = SubVec.getSimpleValueType();
5122 
5123   SDLoc DL(Op);
5124   MVT XLenVT = Subtarget.getXLenVT();
5125   unsigned OrigIdx = Op.getConstantOperandVal(2);
5126   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
5127 
5128   // We don't have the ability to slide mask vectors up indexed by their i1
5129   // elements; the smallest we can do is i8. Often we are able to bitcast to
5130   // equivalent i8 vectors. Note that when inserting a fixed-length vector
5131   // into a scalable one, we might not necessarily have enough scalable
5132   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
5133   if (SubVecVT.getVectorElementType() == MVT::i1 &&
5134       (OrigIdx != 0 || !Vec.isUndef())) {
5135     if (VecVT.getVectorMinNumElements() >= 8 &&
5136         SubVecVT.getVectorMinNumElements() >= 8) {
5137       assert(OrigIdx % 8 == 0 && "Invalid index");
5138       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
5139              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
5140              "Unexpected mask vector lowering");
5141       OrigIdx /= 8;
5142       SubVecVT =
5143           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
5144                            SubVecVT.isScalableVector());
5145       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
5146                                VecVT.isScalableVector());
5147       Vec = DAG.getBitcast(VecVT, Vec);
5148       SubVec = DAG.getBitcast(SubVecVT, SubVec);
5149     } else {
5150       // We can't slide this mask vector up indexed by its i1 elements.
5151       // This poses a problem when we wish to insert a scalable vector which
5152       // can't be re-expressed as a larger type. Just choose the slow path and
5153       // extend to a larger type, then truncate back down.
5154       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
5155       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
5156       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
5157       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
5158       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
5159                         Op.getOperand(2));
5160       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
5161       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
5162     }
5163   }
5164 
5165   // If the subvector vector is a fixed-length type, we cannot use subregister
5166   // manipulation to simplify the codegen; we don't know which register of a
5167   // LMUL group contains the specific subvector as we only know the minimum
5168   // register size. Therefore we must slide the vector group up the full
5169   // amount.
5170   if (SubVecVT.isFixedLengthVector()) {
5171     if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
5172       return Op;
5173     MVT ContainerVT = VecVT;
5174     if (VecVT.isFixedLengthVector()) {
5175       ContainerVT = getContainerForFixedLengthVector(VecVT);
5176       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5177     }
5178     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
5179                          DAG.getUNDEF(ContainerVT), SubVec,
5180                          DAG.getConstant(0, DL, XLenVT));
5181     if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
5182       SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
5183       return DAG.getBitcast(Op.getValueType(), SubVec);
5184     }
5185     SDValue Mask =
5186         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
5187     // Set the vector length to only the number of elements we care about. Note
5188     // that for slideup this includes the offset.
5189     SDValue VL =
5190         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
5191     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
5192     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
5193                                   SubVec, SlideupAmt, Mask, VL);
5194     if (VecVT.isFixedLengthVector())
5195       Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
5196     return DAG.getBitcast(Op.getValueType(), Slideup);
5197   }
5198 
5199   unsigned SubRegIdx, RemIdx;
5200   std::tie(SubRegIdx, RemIdx) =
5201       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
5202           VecVT, SubVecVT, OrigIdx, TRI);
5203 
5204   RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
5205   bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
5206                          SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
5207                          SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
5208 
5209   // 1. If the Idx has been completely eliminated and this subvector's size is
5210   // a vector register or a multiple thereof, or the surrounding elements are
5211   // undef, then this is a subvector insert which naturally aligns to a vector
5212   // register. These can easily be handled using subregister manipulation.
5213   // 2. If the subvector is smaller than a vector register, then the insertion
5214   // must preserve the undisturbed elements of the register. We do this by
5215   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
5216   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
5217   // subvector within the vector register, and an INSERT_SUBVECTOR of that
5218   // LMUL=1 type back into the larger vector (resolving to another subregister
5219   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
5220   // to avoid allocating a large register group to hold our subvector.
5221   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
5222     return Op;
5223 
5224   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
5225   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
5226   // (in our case undisturbed). This means we can set up a subvector insertion
5227   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
5228   // size of the subvector.
5229   MVT InterSubVT = VecVT;
5230   SDValue AlignedExtract = Vec;
5231   unsigned AlignedIdx = OrigIdx - RemIdx;
5232   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
5233     InterSubVT = getLMUL1VT(VecVT);
5234     // Extract a subvector equal to the nearest full vector register type. This
5235     // should resolve to a EXTRACT_SUBREG instruction.
5236     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
5237                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
5238   }
5239 
5240   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
5241   // For scalable vectors this must be further multiplied by vscale.
5242   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
5243 
5244   SDValue Mask, VL;
5245   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
5246 
5247   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
5248   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
5249   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
5250   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
5251 
5252   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
5253                        DAG.getUNDEF(InterSubVT), SubVec,
5254                        DAG.getConstant(0, DL, XLenVT));
5255 
5256   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
5257                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
5258 
5259   // If required, insert this subvector back into the correct vector register.
5260   // This should resolve to an INSERT_SUBREG instruction.
5261   if (VecVT.bitsGT(InterSubVT))
5262     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
5263                           DAG.getConstant(AlignedIdx, DL, XLenVT));
5264 
5265   // We might have bitcast from a mask type: cast back to the original type if
5266   // required.
5267   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
5268 }
5269 
5270 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
5271                                                     SelectionDAG &DAG) const {
5272   SDValue Vec = Op.getOperand(0);
5273   MVT SubVecVT = Op.getSimpleValueType();
5274   MVT VecVT = Vec.getSimpleValueType();
5275 
5276   SDLoc DL(Op);
5277   MVT XLenVT = Subtarget.getXLenVT();
5278   unsigned OrigIdx = Op.getConstantOperandVal(1);
5279   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
5280 
5281   // We don't have the ability to slide mask vectors down indexed by their i1
5282   // elements; the smallest we can do is i8. Often we are able to bitcast to
5283   // equivalent i8 vectors. Note that when extracting a fixed-length vector
5284   // from a scalable one, we might not necessarily have enough scalable
5285   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
5286   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
5287     if (VecVT.getVectorMinNumElements() >= 8 &&
5288         SubVecVT.getVectorMinNumElements() >= 8) {
5289       assert(OrigIdx % 8 == 0 && "Invalid index");
5290       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
5291              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
5292              "Unexpected mask vector lowering");
5293       OrigIdx /= 8;
5294       SubVecVT =
5295           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
5296                            SubVecVT.isScalableVector());
5297       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
5298                                VecVT.isScalableVector());
5299       Vec = DAG.getBitcast(VecVT, Vec);
5300     } else {
5301       // We can't slide this mask vector down, indexed by its i1 elements.
5302       // This poses a problem when we wish to extract a scalable vector which
5303       // can't be re-expressed as a larger type. Just choose the slow path and
5304       // extend to a larger type, then truncate back down.
5305       // TODO: We could probably improve this when extracting certain fixed
5306       // from fixed, where we can extract as i8 and shift the correct element
5307       // right to reach the desired subvector?
5308       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
5309       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
5310       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
5311       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
5312                         Op.getOperand(1));
5313       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
5314       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
5315     }
5316   }
5317 
5318   // If the subvector vector is a fixed-length type, we cannot use subregister
5319   // manipulation to simplify the codegen; we don't know which register of a
5320   // LMUL group contains the specific subvector as we only know the minimum
5321   // register size. Therefore we must slide the vector group down the full
5322   // amount.
5323   if (SubVecVT.isFixedLengthVector()) {
5324     // With an index of 0 this is a cast-like subvector, which can be performed
5325     // with subregister operations.
5326     if (OrigIdx == 0)
5327       return Op;
5328     MVT ContainerVT = VecVT;
5329     if (VecVT.isFixedLengthVector()) {
5330       ContainerVT = getContainerForFixedLengthVector(VecVT);
5331       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5332     }
5333     SDValue Mask =
5334         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
5335     // Set the vector length to only the number of elements we care about. This
5336     // avoids sliding down elements we're going to discard straight away.
5337     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
5338     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
5339     SDValue Slidedown =
5340         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
5341                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
5342     // Now we can use a cast-like subvector extract to get the result.
5343     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
5344                             DAG.getConstant(0, DL, XLenVT));
5345     return DAG.getBitcast(Op.getValueType(), Slidedown);
5346   }
5347 
5348   unsigned SubRegIdx, RemIdx;
5349   std::tie(SubRegIdx, RemIdx) =
5350       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
5351           VecVT, SubVecVT, OrigIdx, TRI);
5352 
5353   // If the Idx has been completely eliminated then this is a subvector extract
5354   // which naturally aligns to a vector register. These can easily be handled
5355   // using subregister manipulation.
5356   if (RemIdx == 0)
5357     return Op;
5358 
5359   // Else we must shift our vector register directly to extract the subvector.
5360   // Do this using VSLIDEDOWN.
5361 
5362   // If the vector type is an LMUL-group type, extract a subvector equal to the
5363   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
5364   // instruction.
5365   MVT InterSubVT = VecVT;
5366   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
5367     InterSubVT = getLMUL1VT(VecVT);
5368     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
5369                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
5370   }
5371 
5372   // Slide this vector register down by the desired number of elements in order
5373   // to place the desired subvector starting at element 0.
5374   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
5375   // For scalable vectors this must be further multiplied by vscale.
5376   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
5377 
5378   SDValue Mask, VL;
5379   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
5380   SDValue Slidedown =
5381       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
5382                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
5383 
5384   // Now the vector is in the right position, extract our final subvector. This
5385   // should resolve to a COPY.
5386   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
5387                           DAG.getConstant(0, DL, XLenVT));
5388 
5389   // We might have bitcast from a mask type: cast back to the original type if
5390   // required.
5391   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
5392 }
5393 
5394 // Lower step_vector to the vid instruction. Any non-identity step value must
5395 // be accounted for my manual expansion.
5396 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
5397                                               SelectionDAG &DAG) const {
5398   SDLoc DL(Op);
5399   MVT VT = Op.getSimpleValueType();
5400   MVT XLenVT = Subtarget.getXLenVT();
5401   SDValue Mask, VL;
5402   std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
5403   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
5404   uint64_t StepValImm = Op.getConstantOperandVal(0);
5405   if (StepValImm != 1) {
5406     if (isPowerOf2_64(StepValImm)) {
5407       SDValue StepVal =
5408           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
5409                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
5410       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
5411     } else {
5412       SDValue StepVal = lowerScalarSplat(
5413           DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT,
5414           DL, DAG, Subtarget);
5415       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
5416     }
5417   }
5418   return StepVec;
5419 }
5420 
5421 // Implement vector_reverse using vrgather.vv with indices determined by
5422 // subtracting the id of each element from (VLMAX-1). This will convert
5423 // the indices like so:
5424 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
5425 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
5426 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
5427                                                  SelectionDAG &DAG) const {
5428   SDLoc DL(Op);
5429   MVT VecVT = Op.getSimpleValueType();
5430   unsigned EltSize = VecVT.getScalarSizeInBits();
5431   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
5432 
5433   unsigned MaxVLMAX = 0;
5434   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
5435   if (VectorBitsMax != 0)
5436     MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
5437 
5438   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
5439   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
5440 
5441   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
5442   // to use vrgatherei16.vv.
5443   // TODO: It's also possible to use vrgatherei16.vv for other types to
5444   // decrease register width for the index calculation.
5445   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
5446     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
5447     // Reverse each half, then reassemble them in reverse order.
5448     // NOTE: It's also possible that after splitting that VLMAX no longer
5449     // requires vrgatherei16.vv.
5450     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
5451       SDValue Lo, Hi;
5452       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
5453       EVT LoVT, HiVT;
5454       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
5455       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
5456       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
5457       // Reassemble the low and high pieces reversed.
5458       // FIXME: This is a CONCAT_VECTORS.
5459       SDValue Res =
5460           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
5461                       DAG.getIntPtrConstant(0, DL));
5462       return DAG.getNode(
5463           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
5464           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
5465     }
5466 
5467     // Just promote the int type to i16 which will double the LMUL.
5468     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
5469     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
5470   }
5471 
5472   MVT XLenVT = Subtarget.getXLenVT();
5473   SDValue Mask, VL;
5474   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
5475 
5476   // Calculate VLMAX-1 for the desired SEW.
5477   unsigned MinElts = VecVT.getVectorMinNumElements();
5478   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
5479                               DAG.getConstant(MinElts, DL, XLenVT));
5480   SDValue VLMinus1 =
5481       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
5482 
5483   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
5484   bool IsRV32E64 =
5485       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
5486   SDValue SplatVL;
5487   if (!IsRV32E64)
5488     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
5489   else
5490     SplatVL =
5491         DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, VLMinus1,
5492                     DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT));
5493 
5494   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
5495   SDValue Indices =
5496       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
5497 
5498   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
5499 }
5500 
5501 SDValue
5502 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
5503                                                      SelectionDAG &DAG) const {
5504   SDLoc DL(Op);
5505   auto *Load = cast<LoadSDNode>(Op);
5506 
5507   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5508                                         Load->getMemoryVT(),
5509                                         *Load->getMemOperand()) &&
5510          "Expecting a correctly-aligned load");
5511 
5512   MVT VT = Op.getSimpleValueType();
5513   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5514 
5515   SDValue VL =
5516       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
5517 
5518   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5519   SDValue NewLoad = DAG.getMemIntrinsicNode(
5520       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
5521       Load->getMemoryVT(), Load->getMemOperand());
5522 
5523   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5524   return DAG.getMergeValues({Result, Load->getChain()}, DL);
5525 }
5526 
5527 SDValue
5528 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
5529                                                       SelectionDAG &DAG) const {
5530   SDLoc DL(Op);
5531   auto *Store = cast<StoreSDNode>(Op);
5532 
5533   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5534                                         Store->getMemoryVT(),
5535                                         *Store->getMemOperand()) &&
5536          "Expecting a correctly-aligned store");
5537 
5538   SDValue StoreVal = Store->getValue();
5539   MVT VT = StoreVal.getSimpleValueType();
5540 
5541   // If the size less than a byte, we need to pad with zeros to make a byte.
5542   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
5543     VT = MVT::v8i1;
5544     StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
5545                            DAG.getConstant(0, DL, VT), StoreVal,
5546                            DAG.getIntPtrConstant(0, DL));
5547   }
5548 
5549   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5550 
5551   SDValue VL =
5552       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
5553 
5554   SDValue NewValue =
5555       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
5556   return DAG.getMemIntrinsicNode(
5557       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
5558       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
5559       Store->getMemoryVT(), Store->getMemOperand());
5560 }
5561 
5562 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
5563                                              SelectionDAG &DAG) const {
5564   SDLoc DL(Op);
5565   MVT VT = Op.getSimpleValueType();
5566 
5567   const auto *MemSD = cast<MemSDNode>(Op);
5568   EVT MemVT = MemSD->getMemoryVT();
5569   MachineMemOperand *MMO = MemSD->getMemOperand();
5570   SDValue Chain = MemSD->getChain();
5571   SDValue BasePtr = MemSD->getBasePtr();
5572 
5573   SDValue Mask, PassThru, VL;
5574   if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
5575     Mask = VPLoad->getMask();
5576     PassThru = DAG.getUNDEF(VT);
5577     VL = VPLoad->getVectorLength();
5578   } else {
5579     const auto *MLoad = cast<MaskedLoadSDNode>(Op);
5580     Mask = MLoad->getMask();
5581     PassThru = MLoad->getPassThru();
5582   }
5583 
5584   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5585 
5586   MVT XLenVT = Subtarget.getXLenVT();
5587 
5588   MVT ContainerVT = VT;
5589   if (VT.isFixedLengthVector()) {
5590     ContainerVT = getContainerForFixedLengthVector(VT);
5591     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
5592     if (!IsUnmasked) {
5593       MVT MaskVT =
5594           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5595       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5596     }
5597   }
5598 
5599   if (!VL)
5600     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5601 
5602   unsigned IntID =
5603       IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
5604   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5605   if (IsUnmasked)
5606     Ops.push_back(DAG.getUNDEF(ContainerVT));
5607   else
5608     Ops.push_back(PassThru);
5609   Ops.push_back(BasePtr);
5610   if (!IsUnmasked)
5611     Ops.push_back(Mask);
5612   Ops.push_back(VL);
5613   if (!IsUnmasked)
5614     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
5615 
5616   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5617 
5618   SDValue Result =
5619       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
5620   Chain = Result.getValue(1);
5621 
5622   if (VT.isFixedLengthVector())
5623     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
5624 
5625   return DAG.getMergeValues({Result, Chain}, DL);
5626 }
5627 
5628 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
5629                                               SelectionDAG &DAG) const {
5630   SDLoc DL(Op);
5631 
5632   const auto *MemSD = cast<MemSDNode>(Op);
5633   EVT MemVT = MemSD->getMemoryVT();
5634   MachineMemOperand *MMO = MemSD->getMemOperand();
5635   SDValue Chain = MemSD->getChain();
5636   SDValue BasePtr = MemSD->getBasePtr();
5637   SDValue Val, Mask, VL;
5638 
5639   if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
5640     Val = VPStore->getValue();
5641     Mask = VPStore->getMask();
5642     VL = VPStore->getVectorLength();
5643   } else {
5644     const auto *MStore = cast<MaskedStoreSDNode>(Op);
5645     Val = MStore->getValue();
5646     Mask = MStore->getMask();
5647   }
5648 
5649   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5650 
5651   MVT VT = Val.getSimpleValueType();
5652   MVT XLenVT = Subtarget.getXLenVT();
5653 
5654   MVT ContainerVT = VT;
5655   if (VT.isFixedLengthVector()) {
5656     ContainerVT = getContainerForFixedLengthVector(VT);
5657 
5658     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
5659     if (!IsUnmasked) {
5660       MVT MaskVT =
5661           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5662       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5663     }
5664   }
5665 
5666   if (!VL)
5667     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5668 
5669   unsigned IntID =
5670       IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
5671   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5672   Ops.push_back(Val);
5673   Ops.push_back(BasePtr);
5674   if (!IsUnmasked)
5675     Ops.push_back(Mask);
5676   Ops.push_back(VL);
5677 
5678   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
5679                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
5680 }
5681 
5682 SDValue
5683 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
5684                                                       SelectionDAG &DAG) const {
5685   MVT InVT = Op.getOperand(0).getSimpleValueType();
5686   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
5687 
5688   MVT VT = Op.getSimpleValueType();
5689 
5690   SDValue Op1 =
5691       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
5692   SDValue Op2 =
5693       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
5694 
5695   SDLoc DL(Op);
5696   SDValue VL =
5697       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
5698 
5699   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5700   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
5701 
5702   SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
5703                             Op.getOperand(2), Mask, VL);
5704 
5705   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
5706 }
5707 
5708 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
5709     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
5710   MVT VT = Op.getSimpleValueType();
5711 
5712   if (VT.getVectorElementType() == MVT::i1)
5713     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
5714 
5715   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
5716 }
5717 
5718 SDValue
5719 RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op,
5720                                                       SelectionDAG &DAG) const {
5721   unsigned Opc;
5722   switch (Op.getOpcode()) {
5723   default: llvm_unreachable("Unexpected opcode!");
5724   case ISD::SHL: Opc = RISCVISD::SHL_VL; break;
5725   case ISD::SRA: Opc = RISCVISD::SRA_VL; break;
5726   case ISD::SRL: Opc = RISCVISD::SRL_VL; break;
5727   }
5728 
5729   return lowerToScalableOp(Op, DAG, Opc);
5730 }
5731 
5732 // Lower vector ABS to smax(X, sub(0, X)).
5733 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
5734   SDLoc DL(Op);
5735   MVT VT = Op.getSimpleValueType();
5736   SDValue X = Op.getOperand(0);
5737 
5738   assert(VT.isFixedLengthVector() && "Unexpected type");
5739 
5740   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5741   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5742 
5743   SDValue Mask, VL;
5744   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5745 
5746   SDValue SplatZero =
5747       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
5748                   DAG.getConstant(0, DL, Subtarget.getXLenVT()));
5749   SDValue NegX =
5750       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
5751   SDValue Max =
5752       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
5753 
5754   return convertFromScalableVector(VT, Max, DAG, Subtarget);
5755 }
5756 
5757 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
5758     SDValue Op, SelectionDAG &DAG) const {
5759   SDLoc DL(Op);
5760   MVT VT = Op.getSimpleValueType();
5761   SDValue Mag = Op.getOperand(0);
5762   SDValue Sign = Op.getOperand(1);
5763   assert(Mag.getValueType() == Sign.getValueType() &&
5764          "Can only handle COPYSIGN with matching types.");
5765 
5766   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5767   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
5768   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
5769 
5770   SDValue Mask, VL;
5771   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5772 
5773   SDValue CopySign =
5774       DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
5775 
5776   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
5777 }
5778 
5779 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
5780     SDValue Op, SelectionDAG &DAG) const {
5781   MVT VT = Op.getSimpleValueType();
5782   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5783 
5784   MVT I1ContainerVT =
5785       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5786 
5787   SDValue CC =
5788       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
5789   SDValue Op1 =
5790       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
5791   SDValue Op2 =
5792       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
5793 
5794   SDLoc DL(Op);
5795   SDValue Mask, VL;
5796   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5797 
5798   SDValue Select =
5799       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
5800 
5801   return convertFromScalableVector(VT, Select, DAG, Subtarget);
5802 }
5803 
5804 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
5805                                                unsigned NewOpc,
5806                                                bool HasMask) const {
5807   MVT VT = Op.getSimpleValueType();
5808   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5809 
5810   // Create list of operands by converting existing ones to scalable types.
5811   SmallVector<SDValue, 6> Ops;
5812   for (const SDValue &V : Op->op_values()) {
5813     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
5814 
5815     // Pass through non-vector operands.
5816     if (!V.getValueType().isVector()) {
5817       Ops.push_back(V);
5818       continue;
5819     }
5820 
5821     // "cast" fixed length vector to a scalable vector.
5822     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
5823            "Only fixed length vectors are supported!");
5824     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
5825   }
5826 
5827   SDLoc DL(Op);
5828   SDValue Mask, VL;
5829   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5830   if (HasMask)
5831     Ops.push_back(Mask);
5832   Ops.push_back(VL);
5833 
5834   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
5835   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
5836 }
5837 
5838 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
5839 // * Operands of each node are assumed to be in the same order.
5840 // * The EVL operand is promoted from i32 to i64 on RV64.
5841 // * Fixed-length vectors are converted to their scalable-vector container
5842 //   types.
5843 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
5844                                        unsigned RISCVISDOpc) const {
5845   SDLoc DL(Op);
5846   MVT VT = Op.getSimpleValueType();
5847   SmallVector<SDValue, 4> Ops;
5848 
5849   for (const auto &OpIdx : enumerate(Op->ops())) {
5850     SDValue V = OpIdx.value();
5851     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
5852     // Pass through operands which aren't fixed-length vectors.
5853     if (!V.getValueType().isFixedLengthVector()) {
5854       Ops.push_back(V);
5855       continue;
5856     }
5857     // "cast" fixed length vector to a scalable vector.
5858     MVT OpVT = V.getSimpleValueType();
5859     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
5860     assert(useRVVForFixedLengthVectorVT(OpVT) &&
5861            "Only fixed length vectors are supported!");
5862     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
5863   }
5864 
5865   if (!VT.isFixedLengthVector())
5866     return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
5867 
5868   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5869 
5870   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
5871 
5872   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
5873 }
5874 
5875 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
5876                                             unsigned MaskOpc,
5877                                             unsigned VecOpc) const {
5878   MVT VT = Op.getSimpleValueType();
5879   if (VT.getVectorElementType() != MVT::i1)
5880     return lowerVPOp(Op, DAG, VecOpc);
5881 
5882   // It is safe to drop mask parameter as masked-off elements are undef.
5883   SDValue Op1 = Op->getOperand(0);
5884   SDValue Op2 = Op->getOperand(1);
5885   SDValue VL = Op->getOperand(3);
5886 
5887   MVT ContainerVT = VT;
5888   const bool IsFixed = VT.isFixedLengthVector();
5889   if (IsFixed) {
5890     ContainerVT = getContainerForFixedLengthVector(VT);
5891     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5892     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
5893   }
5894 
5895   SDLoc DL(Op);
5896   SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
5897   if (!IsFixed)
5898     return Val;
5899   return convertFromScalableVector(VT, Val, DAG, Subtarget);
5900 }
5901 
5902 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
5903 // matched to a RVV indexed load. The RVV indexed load instructions only
5904 // support the "unsigned unscaled" addressing mode; indices are implicitly
5905 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
5906 // signed or scaled indexing is extended to the XLEN value type and scaled
5907 // accordingly.
5908 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
5909                                                SelectionDAG &DAG) const {
5910   SDLoc DL(Op);
5911   MVT VT = Op.getSimpleValueType();
5912 
5913   const auto *MemSD = cast<MemSDNode>(Op.getNode());
5914   EVT MemVT = MemSD->getMemoryVT();
5915   MachineMemOperand *MMO = MemSD->getMemOperand();
5916   SDValue Chain = MemSD->getChain();
5917   SDValue BasePtr = MemSD->getBasePtr();
5918 
5919   ISD::LoadExtType LoadExtType;
5920   SDValue Index, Mask, PassThru, VL;
5921 
5922   if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
5923     Index = VPGN->getIndex();
5924     Mask = VPGN->getMask();
5925     PassThru = DAG.getUNDEF(VT);
5926     VL = VPGN->getVectorLength();
5927     // VP doesn't support extending loads.
5928     LoadExtType = ISD::NON_EXTLOAD;
5929   } else {
5930     // Else it must be a MGATHER.
5931     auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
5932     Index = MGN->getIndex();
5933     Mask = MGN->getMask();
5934     PassThru = MGN->getPassThru();
5935     LoadExtType = MGN->getExtensionType();
5936   }
5937 
5938   MVT IndexVT = Index.getSimpleValueType();
5939   MVT XLenVT = Subtarget.getXLenVT();
5940 
5941   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
5942          "Unexpected VTs!");
5943   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
5944   // Targets have to explicitly opt-in for extending vector loads.
5945   assert(LoadExtType == ISD::NON_EXTLOAD &&
5946          "Unexpected extending MGATHER/VP_GATHER");
5947   (void)LoadExtType;
5948 
5949   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
5950   // the selection of the masked intrinsics doesn't do this for us.
5951   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5952 
5953   MVT ContainerVT = VT;
5954   if (VT.isFixedLengthVector()) {
5955     // We need to use the larger of the result and index type to determine the
5956     // scalable type to use so we don't increase LMUL for any operand/result.
5957     if (VT.bitsGE(IndexVT)) {
5958       ContainerVT = getContainerForFixedLengthVector(VT);
5959       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
5960                                  ContainerVT.getVectorElementCount());
5961     } else {
5962       IndexVT = getContainerForFixedLengthVector(IndexVT);
5963       ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
5964                                      IndexVT.getVectorElementCount());
5965     }
5966 
5967     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
5968 
5969     if (!IsUnmasked) {
5970       MVT MaskVT =
5971           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5972       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5973       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
5974     }
5975   }
5976 
5977   if (!VL)
5978     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5979 
5980   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
5981     IndexVT = IndexVT.changeVectorElementType(XLenVT);
5982     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
5983                                    VL);
5984     Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
5985                         TrueMask, VL);
5986   }
5987 
5988   unsigned IntID =
5989       IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
5990   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5991   if (IsUnmasked)
5992     Ops.push_back(DAG.getUNDEF(ContainerVT));
5993   else
5994     Ops.push_back(PassThru);
5995   Ops.push_back(BasePtr);
5996   Ops.push_back(Index);
5997   if (!IsUnmasked)
5998     Ops.push_back(Mask);
5999   Ops.push_back(VL);
6000   if (!IsUnmasked)
6001     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
6002 
6003   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
6004   SDValue Result =
6005       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
6006   Chain = Result.getValue(1);
6007 
6008   if (VT.isFixedLengthVector())
6009     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
6010 
6011   return DAG.getMergeValues({Result, Chain}, DL);
6012 }
6013 
6014 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
6015 // matched to a RVV indexed store. The RVV indexed store instructions only
6016 // support the "unsigned unscaled" addressing mode; indices are implicitly
6017 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
6018 // signed or scaled indexing is extended to the XLEN value type and scaled
6019 // accordingly.
6020 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
6021                                                 SelectionDAG &DAG) const {
6022   SDLoc DL(Op);
6023   const auto *MemSD = cast<MemSDNode>(Op.getNode());
6024   EVT MemVT = MemSD->getMemoryVT();
6025   MachineMemOperand *MMO = MemSD->getMemOperand();
6026   SDValue Chain = MemSD->getChain();
6027   SDValue BasePtr = MemSD->getBasePtr();
6028 
6029   bool IsTruncatingStore = false;
6030   SDValue Index, Mask, Val, VL;
6031 
6032   if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
6033     Index = VPSN->getIndex();
6034     Mask = VPSN->getMask();
6035     Val = VPSN->getValue();
6036     VL = VPSN->getVectorLength();
6037     // VP doesn't support truncating stores.
6038     IsTruncatingStore = false;
6039   } else {
6040     // Else it must be a MSCATTER.
6041     auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
6042     Index = MSN->getIndex();
6043     Mask = MSN->getMask();
6044     Val = MSN->getValue();
6045     IsTruncatingStore = MSN->isTruncatingStore();
6046   }
6047 
6048   MVT VT = Val.getSimpleValueType();
6049   MVT IndexVT = Index.getSimpleValueType();
6050   MVT XLenVT = Subtarget.getXLenVT();
6051 
6052   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
6053          "Unexpected VTs!");
6054   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
6055   // Targets have to explicitly opt-in for extending vector loads and
6056   // truncating vector stores.
6057   assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
6058   (void)IsTruncatingStore;
6059 
6060   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
6061   // the selection of the masked intrinsics doesn't do this for us.
6062   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
6063 
6064   MVT ContainerVT = VT;
6065   if (VT.isFixedLengthVector()) {
6066     // We need to use the larger of the value and index type to determine the
6067     // scalable type to use so we don't increase LMUL for any operand/result.
6068     if (VT.bitsGE(IndexVT)) {
6069       ContainerVT = getContainerForFixedLengthVector(VT);
6070       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
6071                                  ContainerVT.getVectorElementCount());
6072     } else {
6073       IndexVT = getContainerForFixedLengthVector(IndexVT);
6074       ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
6075                                      IndexVT.getVectorElementCount());
6076     }
6077 
6078     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
6079     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
6080 
6081     if (!IsUnmasked) {
6082       MVT MaskVT =
6083           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
6084       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6085     }
6086   }
6087 
6088   if (!VL)
6089     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6090 
6091   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
6092     IndexVT = IndexVT.changeVectorElementType(XLenVT);
6093     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
6094                                    VL);
6095     Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
6096                         TrueMask, VL);
6097   }
6098 
6099   unsigned IntID =
6100       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
6101   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
6102   Ops.push_back(Val);
6103   Ops.push_back(BasePtr);
6104   Ops.push_back(Index);
6105   if (!IsUnmasked)
6106     Ops.push_back(Mask);
6107   Ops.push_back(VL);
6108 
6109   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
6110                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
6111 }
6112 
6113 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
6114                                                SelectionDAG &DAG) const {
6115   const MVT XLenVT = Subtarget.getXLenVT();
6116   SDLoc DL(Op);
6117   SDValue Chain = Op->getOperand(0);
6118   SDValue SysRegNo = DAG.getTargetConstant(
6119       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
6120   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
6121   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
6122 
6123   // Encoding used for rounding mode in RISCV differs from that used in
6124   // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
6125   // table, which consists of a sequence of 4-bit fields, each representing
6126   // corresponding FLT_ROUNDS mode.
6127   static const int Table =
6128       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
6129       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
6130       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
6131       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
6132       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
6133 
6134   SDValue Shift =
6135       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
6136   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
6137                                 DAG.getConstant(Table, DL, XLenVT), Shift);
6138   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
6139                                DAG.getConstant(7, DL, XLenVT));
6140 
6141   return DAG.getMergeValues({Masked, Chain}, DL);
6142 }
6143 
6144 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
6145                                                SelectionDAG &DAG) const {
6146   const MVT XLenVT = Subtarget.getXLenVT();
6147   SDLoc DL(Op);
6148   SDValue Chain = Op->getOperand(0);
6149   SDValue RMValue = Op->getOperand(1);
6150   SDValue SysRegNo = DAG.getTargetConstant(
6151       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
6152 
6153   // Encoding used for rounding mode in RISCV differs from that used in
6154   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
6155   // a table, which consists of a sequence of 4-bit fields, each representing
6156   // corresponding RISCV mode.
6157   static const unsigned Table =
6158       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
6159       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
6160       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
6161       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
6162       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
6163 
6164   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
6165                               DAG.getConstant(2, DL, XLenVT));
6166   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
6167                                 DAG.getConstant(Table, DL, XLenVT), Shift);
6168   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
6169                         DAG.getConstant(0x7, DL, XLenVT));
6170   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
6171                      RMValue);
6172 }
6173 
6174 static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) {
6175   switch (IntNo) {
6176   default:
6177     llvm_unreachable("Unexpected Intrinsic");
6178   case Intrinsic::riscv_grev:
6179     return RISCVISD::GREVW;
6180   case Intrinsic::riscv_gorc:
6181     return RISCVISD::GORCW;
6182   case Intrinsic::riscv_bcompress:
6183     return RISCVISD::BCOMPRESSW;
6184   case Intrinsic::riscv_bdecompress:
6185     return RISCVISD::BDECOMPRESSW;
6186   case Intrinsic::riscv_bfp:
6187     return RISCVISD::BFPW;
6188   case Intrinsic::riscv_fsl:
6189     return RISCVISD::FSLW;
6190   case Intrinsic::riscv_fsr:
6191     return RISCVISD::FSRW;
6192   }
6193 }
6194 
6195 // Converts the given intrinsic to a i64 operation with any extension.
6196 static SDValue customLegalizeToWOpByIntr(SDNode *N, SelectionDAG &DAG,
6197                                          unsigned IntNo) {
6198   SDLoc DL(N);
6199   RISCVISD::NodeType WOpcode = getRISCVWOpcodeByIntr(IntNo);
6200   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6201   SDValue NewOp2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6202   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp1, NewOp2);
6203   // ReplaceNodeResults requires we maintain the same type for the return value.
6204   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
6205 }
6206 
6207 // Returns the opcode of the target-specific SDNode that implements the 32-bit
6208 // form of the given Opcode.
6209 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
6210   switch (Opcode) {
6211   default:
6212     llvm_unreachable("Unexpected opcode");
6213   case ISD::SHL:
6214     return RISCVISD::SLLW;
6215   case ISD::SRA:
6216     return RISCVISD::SRAW;
6217   case ISD::SRL:
6218     return RISCVISD::SRLW;
6219   case ISD::SDIV:
6220     return RISCVISD::DIVW;
6221   case ISD::UDIV:
6222     return RISCVISD::DIVUW;
6223   case ISD::UREM:
6224     return RISCVISD::REMUW;
6225   case ISD::ROTL:
6226     return RISCVISD::ROLW;
6227   case ISD::ROTR:
6228     return RISCVISD::RORW;
6229   case RISCVISD::GREV:
6230     return RISCVISD::GREVW;
6231   case RISCVISD::GORC:
6232     return RISCVISD::GORCW;
6233   }
6234 }
6235 
6236 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
6237 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
6238 // otherwise be promoted to i64, making it difficult to select the
6239 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
6240 // type i8/i16/i32 is lost.
6241 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
6242                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
6243   SDLoc DL(N);
6244   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
6245   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
6246   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
6247   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
6248   // ReplaceNodeResults requires we maintain the same type for the return value.
6249   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
6250 }
6251 
6252 // Converts the given 32-bit operation to a i64 operation with signed extension
6253 // semantic to reduce the signed extension instructions.
6254 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
6255   SDLoc DL(N);
6256   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6257   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6258   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
6259   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
6260                                DAG.getValueType(MVT::i32));
6261   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
6262 }
6263 
6264 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
6265                                              SmallVectorImpl<SDValue> &Results,
6266                                              SelectionDAG &DAG) const {
6267   SDLoc DL(N);
6268   switch (N->getOpcode()) {
6269   default:
6270     llvm_unreachable("Don't know how to custom type legalize this operation!");
6271   case ISD::STRICT_FP_TO_SINT:
6272   case ISD::STRICT_FP_TO_UINT:
6273   case ISD::FP_TO_SINT:
6274   case ISD::FP_TO_UINT: {
6275     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6276            "Unexpected custom legalisation");
6277     bool IsStrict = N->isStrictFPOpcode();
6278     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
6279                     N->getOpcode() == ISD::STRICT_FP_TO_SINT;
6280     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
6281     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
6282         TargetLowering::TypeSoftenFloat) {
6283       if (!isTypeLegal(Op0.getValueType()))
6284         return;
6285       if (IsStrict) {
6286         unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
6287                                 : RISCVISD::STRICT_FCVT_WU_RV64;
6288         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
6289         SDValue Res = DAG.getNode(
6290             Opc, DL, VTs, N->getOperand(0), Op0,
6291             DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
6292         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6293         Results.push_back(Res.getValue(1));
6294         return;
6295       }
6296       unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
6297       SDValue Res =
6298           DAG.getNode(Opc, DL, MVT::i64, Op0,
6299                       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
6300       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6301       return;
6302     }
6303     // If the FP type needs to be softened, emit a library call using the 'si'
6304     // version. If we left it to default legalization we'd end up with 'di'. If
6305     // the FP type doesn't need to be softened just let generic type
6306     // legalization promote the result type.
6307     RTLIB::Libcall LC;
6308     if (IsSigned)
6309       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
6310     else
6311       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
6312     MakeLibCallOptions CallOptions;
6313     EVT OpVT = Op0.getValueType();
6314     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
6315     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
6316     SDValue Result;
6317     std::tie(Result, Chain) =
6318         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
6319     Results.push_back(Result);
6320     if (IsStrict)
6321       Results.push_back(Chain);
6322     break;
6323   }
6324   case ISD::READCYCLECOUNTER: {
6325     assert(!Subtarget.is64Bit() &&
6326            "READCYCLECOUNTER only has custom type legalization on riscv32");
6327 
6328     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
6329     SDValue RCW =
6330         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
6331 
6332     Results.push_back(
6333         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
6334     Results.push_back(RCW.getValue(2));
6335     break;
6336   }
6337   case ISD::MUL: {
6338     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
6339     unsigned XLen = Subtarget.getXLen();
6340     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
6341     if (Size > XLen) {
6342       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
6343       SDValue LHS = N->getOperand(0);
6344       SDValue RHS = N->getOperand(1);
6345       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
6346 
6347       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
6348       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
6349       // We need exactly one side to be unsigned.
6350       if (LHSIsU == RHSIsU)
6351         return;
6352 
6353       auto MakeMULPair = [&](SDValue S, SDValue U) {
6354         MVT XLenVT = Subtarget.getXLenVT();
6355         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
6356         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
6357         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
6358         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
6359         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
6360       };
6361 
6362       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
6363       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
6364 
6365       // The other operand should be signed, but still prefer MULH when
6366       // possible.
6367       if (RHSIsU && LHSIsS && !RHSIsS)
6368         Results.push_back(MakeMULPair(LHS, RHS));
6369       else if (LHSIsU && RHSIsS && !LHSIsS)
6370         Results.push_back(MakeMULPair(RHS, LHS));
6371 
6372       return;
6373     }
6374     LLVM_FALLTHROUGH;
6375   }
6376   case ISD::ADD:
6377   case ISD::SUB:
6378     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6379            "Unexpected custom legalisation");
6380     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
6381     break;
6382   case ISD::SHL:
6383   case ISD::SRA:
6384   case ISD::SRL:
6385     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6386            "Unexpected custom legalisation");
6387     if (N->getOperand(1).getOpcode() != ISD::Constant) {
6388       Results.push_back(customLegalizeToWOp(N, DAG));
6389       break;
6390     }
6391 
6392     // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
6393     // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
6394     // shift amount.
6395     if (N->getOpcode() == ISD::SHL) {
6396       SDLoc DL(N);
6397       SDValue NewOp0 =
6398           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6399       SDValue NewOp1 =
6400           DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
6401       SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
6402       SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
6403                                    DAG.getValueType(MVT::i32));
6404       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6405     }
6406 
6407     break;
6408   case ISD::ROTL:
6409   case ISD::ROTR:
6410     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6411            "Unexpected custom legalisation");
6412     Results.push_back(customLegalizeToWOp(N, DAG));
6413     break;
6414   case ISD::CTTZ:
6415   case ISD::CTTZ_ZERO_UNDEF:
6416   case ISD::CTLZ:
6417   case ISD::CTLZ_ZERO_UNDEF: {
6418     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6419            "Unexpected custom legalisation");
6420 
6421     SDValue NewOp0 =
6422         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6423     bool IsCTZ =
6424         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
6425     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
6426     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
6427     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6428     return;
6429   }
6430   case ISD::SDIV:
6431   case ISD::UDIV:
6432   case ISD::UREM: {
6433     MVT VT = N->getSimpleValueType(0);
6434     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
6435            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
6436            "Unexpected custom legalisation");
6437     // Don't promote division/remainder by constant since we should expand those
6438     // to multiply by magic constant.
6439     // FIXME: What if the expansion is disabled for minsize.
6440     if (N->getOperand(1).getOpcode() == ISD::Constant)
6441       return;
6442 
6443     // If the input is i32, use ANY_EXTEND since the W instructions don't read
6444     // the upper 32 bits. For other types we need to sign or zero extend
6445     // based on the opcode.
6446     unsigned ExtOpc = ISD::ANY_EXTEND;
6447     if (VT != MVT::i32)
6448       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
6449                                            : ISD::ZERO_EXTEND;
6450 
6451     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
6452     break;
6453   }
6454   case ISD::UADDO:
6455   case ISD::USUBO: {
6456     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6457            "Unexpected custom legalisation");
6458     bool IsAdd = N->getOpcode() == ISD::UADDO;
6459     // Create an ADDW or SUBW.
6460     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6461     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6462     SDValue Res =
6463         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
6464     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
6465                       DAG.getValueType(MVT::i32));
6466 
6467     // Sign extend the LHS and perform an unsigned compare with the ADDW result.
6468     // Since the inputs are sign extended from i32, this is equivalent to
6469     // comparing the lower 32 bits.
6470     LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
6471     SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
6472                                     IsAdd ? ISD::SETULT : ISD::SETUGT);
6473 
6474     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6475     Results.push_back(Overflow);
6476     return;
6477   }
6478   case ISD::UADDSAT:
6479   case ISD::USUBSAT: {
6480     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6481            "Unexpected custom legalisation");
6482     if (Subtarget.hasStdExtZbb()) {
6483       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
6484       // sign extend allows overflow of the lower 32 bits to be detected on
6485       // the promoted size.
6486       SDValue LHS =
6487           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
6488       SDValue RHS =
6489           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
6490       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
6491       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6492       return;
6493     }
6494 
6495     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
6496     // promotion for UADDO/USUBO.
6497     Results.push_back(expandAddSubSat(N, DAG));
6498     return;
6499   }
6500   case ISD::BITCAST: {
6501     EVT VT = N->getValueType(0);
6502     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
6503     SDValue Op0 = N->getOperand(0);
6504     EVT Op0VT = Op0.getValueType();
6505     MVT XLenVT = Subtarget.getXLenVT();
6506     if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) {
6507       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
6508       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
6509     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
6510                Subtarget.hasStdExtF()) {
6511       SDValue FPConv =
6512           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
6513       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
6514     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
6515                isTypeLegal(Op0VT)) {
6516       // Custom-legalize bitcasts from fixed-length vector types to illegal
6517       // scalar types in order to improve codegen. Bitcast the vector to a
6518       // one-element vector type whose element type is the same as the result
6519       // type, and extract the first element.
6520       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6521       if (isTypeLegal(BVT)) {
6522         SDValue BVec = DAG.getBitcast(BVT, Op0);
6523         Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6524                                       DAG.getConstant(0, DL, XLenVT)));
6525       }
6526     }
6527     break;
6528   }
6529   case RISCVISD::GREV:
6530   case RISCVISD::GORC: {
6531     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6532            "Unexpected custom legalisation");
6533     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
6534     // This is similar to customLegalizeToWOp, except that we pass the second
6535     // operand (a TargetConstant) straight through: it is already of type
6536     // XLenVT.
6537     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
6538     SDValue NewOp0 =
6539         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6540     SDValue NewOp1 =
6541         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6542     SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
6543     // ReplaceNodeResults requires we maintain the same type for the return
6544     // value.
6545     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6546     break;
6547   }
6548   case RISCVISD::SHFL: {
6549     // There is no SHFLIW instruction, but we can just promote the operation.
6550     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6551            "Unexpected custom legalisation");
6552     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
6553     SDValue NewOp0 =
6554         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6555     SDValue NewOp1 =
6556         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6557     SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1);
6558     // ReplaceNodeResults requires we maintain the same type for the return
6559     // value.
6560     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6561     break;
6562   }
6563   case ISD::BSWAP:
6564   case ISD::BITREVERSE: {
6565     MVT VT = N->getSimpleValueType(0);
6566     MVT XLenVT = Subtarget.getXLenVT();
6567     assert((VT == MVT::i8 || VT == MVT::i16 ||
6568             (VT == MVT::i32 && Subtarget.is64Bit())) &&
6569            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
6570     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
6571     unsigned Imm = VT.getSizeInBits() - 1;
6572     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
6573     if (N->getOpcode() == ISD::BSWAP)
6574       Imm &= ~0x7U;
6575     unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV;
6576     SDValue GREVI =
6577         DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT));
6578     // ReplaceNodeResults requires we maintain the same type for the return
6579     // value.
6580     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI));
6581     break;
6582   }
6583   case ISD::FSHL:
6584   case ISD::FSHR: {
6585     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6586            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
6587     SDValue NewOp0 =
6588         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6589     SDValue NewOp1 =
6590         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6591     SDValue NewShAmt =
6592         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6593     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
6594     // Mask the shift amount to 5 bits to prevent accidentally setting bit 5.
6595     NewShAmt = DAG.getNode(ISD::AND, DL, MVT::i64, NewShAmt,
6596                            DAG.getConstant(0x1f, DL, MVT::i64));
6597     // fshl and fshr concatenate their operands in the same order. fsrw and fslw
6598     // instruction use different orders. fshl will return its first operand for
6599     // shift of zero, fshr will return its second operand. fsl and fsr both
6600     // return rs1 so the ISD nodes need to have different operand orders.
6601     // Shift amount is in rs2.
6602     unsigned Opc = RISCVISD::FSLW;
6603     if (N->getOpcode() == ISD::FSHR) {
6604       std::swap(NewOp0, NewOp1);
6605       Opc = RISCVISD::FSRW;
6606     }
6607     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewShAmt);
6608     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
6609     break;
6610   }
6611   case ISD::EXTRACT_VECTOR_ELT: {
6612     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
6613     // type is illegal (currently only vXi64 RV32).
6614     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
6615     // transferred to the destination register. We issue two of these from the
6616     // upper- and lower- halves of the SEW-bit vector element, slid down to the
6617     // first element.
6618     SDValue Vec = N->getOperand(0);
6619     SDValue Idx = N->getOperand(1);
6620 
6621     // The vector type hasn't been legalized yet so we can't issue target
6622     // specific nodes if it needs legalization.
6623     // FIXME: We would manually legalize if it's important.
6624     if (!isTypeLegal(Vec.getValueType()))
6625       return;
6626 
6627     MVT VecVT = Vec.getSimpleValueType();
6628 
6629     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
6630            VecVT.getVectorElementType() == MVT::i64 &&
6631            "Unexpected EXTRACT_VECTOR_ELT legalization");
6632 
6633     // If this is a fixed vector, we need to convert it to a scalable vector.
6634     MVT ContainerVT = VecVT;
6635     if (VecVT.isFixedLengthVector()) {
6636       ContainerVT = getContainerForFixedLengthVector(VecVT);
6637       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6638     }
6639 
6640     MVT XLenVT = Subtarget.getXLenVT();
6641 
6642     // Use a VL of 1 to avoid processing more elements than we need.
6643     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
6644     SDValue VL = DAG.getConstant(1, DL, XLenVT);
6645     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
6646 
6647     // Unless the index is known to be 0, we must slide the vector down to get
6648     // the desired element into index 0.
6649     if (!isNullConstant(Idx)) {
6650       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
6651                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
6652     }
6653 
6654     // Extract the lower XLEN bits of the correct vector element.
6655     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
6656 
6657     // To extract the upper XLEN bits of the vector element, shift the first
6658     // element right by 32 bits and re-extract the lower XLEN bits.
6659     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6660                                      DAG.getConstant(32, DL, XLenVT), VL);
6661     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
6662                                  ThirtyTwoV, Mask, VL);
6663 
6664     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
6665 
6666     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
6667     break;
6668   }
6669   case ISD::INTRINSIC_WO_CHAIN: {
6670     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
6671     switch (IntNo) {
6672     default:
6673       llvm_unreachable(
6674           "Don't know how to custom type legalize this intrinsic!");
6675     case Intrinsic::riscv_grev:
6676     case Intrinsic::riscv_gorc:
6677     case Intrinsic::riscv_bcompress:
6678     case Intrinsic::riscv_bdecompress:
6679     case Intrinsic::riscv_bfp: {
6680       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6681              "Unexpected custom legalisation");
6682       Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
6683       break;
6684     }
6685     case Intrinsic::riscv_fsl:
6686     case Intrinsic::riscv_fsr: {
6687       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6688              "Unexpected custom legalisation");
6689       SDValue NewOp1 =
6690           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6691       SDValue NewOp2 =
6692           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6693       SDValue NewOp3 =
6694           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3));
6695       unsigned Opc = getRISCVWOpcodeByIntr(IntNo);
6696       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2, NewOp3);
6697       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6698       break;
6699     }
6700     case Intrinsic::riscv_orc_b: {
6701       // Lower to the GORCI encoding for orc.b with the operand extended.
6702       SDValue NewOp =
6703           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6704       // If Zbp is enabled, use GORCIW which will sign extend the result.
6705       unsigned Opc =
6706           Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC;
6707       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp,
6708                                 DAG.getConstant(7, DL, MVT::i64));
6709       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6710       return;
6711     }
6712     case Intrinsic::riscv_shfl:
6713     case Intrinsic::riscv_unshfl: {
6714       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6715              "Unexpected custom legalisation");
6716       SDValue NewOp1 =
6717           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6718       SDValue NewOp2 =
6719           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6720       unsigned Opc =
6721           IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
6722       // There is no (UN)SHFLIW. If the control word is a constant, we can use
6723       // (UN)SHFLI with bit 4 of the control word cleared. The upper 32 bit half
6724       // will be shuffled the same way as the lower 32 bit half, but the two
6725       // halves won't cross.
6726       if (isa<ConstantSDNode>(NewOp2)) {
6727         NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
6728                              DAG.getConstant(0xf, DL, MVT::i64));
6729         Opc =
6730             IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
6731       }
6732       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
6733       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6734       break;
6735     }
6736     case Intrinsic::riscv_vmv_x_s: {
6737       EVT VT = N->getValueType(0);
6738       MVT XLenVT = Subtarget.getXLenVT();
6739       if (VT.bitsLT(XLenVT)) {
6740         // Simple case just extract using vmv.x.s and truncate.
6741         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
6742                                       Subtarget.getXLenVT(), N->getOperand(1));
6743         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
6744         return;
6745       }
6746 
6747       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
6748              "Unexpected custom legalization");
6749 
6750       // We need to do the move in two steps.
6751       SDValue Vec = N->getOperand(1);
6752       MVT VecVT = Vec.getSimpleValueType();
6753 
6754       // First extract the lower XLEN bits of the element.
6755       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
6756 
6757       // To extract the upper XLEN bits of the vector element, shift the first
6758       // element right by 32 bits and re-extract the lower XLEN bits.
6759       SDValue VL = DAG.getConstant(1, DL, XLenVT);
6760       MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
6761       SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
6762       SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
6763                                        DAG.getConstant(32, DL, XLenVT), VL);
6764       SDValue LShr32 =
6765           DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
6766       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
6767 
6768       Results.push_back(
6769           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
6770       break;
6771     }
6772     }
6773     break;
6774   }
6775   case ISD::VECREDUCE_ADD:
6776   case ISD::VECREDUCE_AND:
6777   case ISD::VECREDUCE_OR:
6778   case ISD::VECREDUCE_XOR:
6779   case ISD::VECREDUCE_SMAX:
6780   case ISD::VECREDUCE_UMAX:
6781   case ISD::VECREDUCE_SMIN:
6782   case ISD::VECREDUCE_UMIN:
6783     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
6784       Results.push_back(V);
6785     break;
6786   case ISD::VP_REDUCE_ADD:
6787   case ISD::VP_REDUCE_AND:
6788   case ISD::VP_REDUCE_OR:
6789   case ISD::VP_REDUCE_XOR:
6790   case ISD::VP_REDUCE_SMAX:
6791   case ISD::VP_REDUCE_UMAX:
6792   case ISD::VP_REDUCE_SMIN:
6793   case ISD::VP_REDUCE_UMIN:
6794     if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
6795       Results.push_back(V);
6796     break;
6797   case ISD::FLT_ROUNDS_: {
6798     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
6799     SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
6800     Results.push_back(Res.getValue(0));
6801     Results.push_back(Res.getValue(1));
6802     break;
6803   }
6804   }
6805 }
6806 
6807 // A structure to hold one of the bit-manipulation patterns below. Together, a
6808 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
6809 //   (or (and (shl x, 1), 0xAAAAAAAA),
6810 //       (and (srl x, 1), 0x55555555))
6811 struct RISCVBitmanipPat {
6812   SDValue Op;
6813   unsigned ShAmt;
6814   bool IsSHL;
6815 
6816   bool formsPairWith(const RISCVBitmanipPat &Other) const {
6817     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
6818   }
6819 };
6820 
6821 // Matches patterns of the form
6822 //   (and (shl x, C2), (C1 << C2))
6823 //   (and (srl x, C2), C1)
6824 //   (shl (and x, C1), C2)
6825 //   (srl (and x, (C1 << C2)), C2)
6826 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
6827 // The expected masks for each shift amount are specified in BitmanipMasks where
6828 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
6829 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
6830 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
6831 // XLen is 64.
6832 static Optional<RISCVBitmanipPat>
6833 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
6834   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
6835          "Unexpected number of masks");
6836   Optional<uint64_t> Mask;
6837   // Optionally consume a mask around the shift operation.
6838   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
6839     Mask = Op.getConstantOperandVal(1);
6840     Op = Op.getOperand(0);
6841   }
6842   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
6843     return None;
6844   bool IsSHL = Op.getOpcode() == ISD::SHL;
6845 
6846   if (!isa<ConstantSDNode>(Op.getOperand(1)))
6847     return None;
6848   uint64_t ShAmt = Op.getConstantOperandVal(1);
6849 
6850   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
6851   if (ShAmt >= Width || !isPowerOf2_64(ShAmt))
6852     return None;
6853   // If we don't have enough masks for 64 bit, then we must be trying to
6854   // match SHFL so we're only allowed to shift 1/4 of the width.
6855   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
6856     return None;
6857 
6858   SDValue Src = Op.getOperand(0);
6859 
6860   // The expected mask is shifted left when the AND is found around SHL
6861   // patterns.
6862   //   ((x >> 1) & 0x55555555)
6863   //   ((x << 1) & 0xAAAAAAAA)
6864   bool SHLExpMask = IsSHL;
6865 
6866   if (!Mask) {
6867     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
6868     // the mask is all ones: consume that now.
6869     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
6870       Mask = Src.getConstantOperandVal(1);
6871       Src = Src.getOperand(0);
6872       // The expected mask is now in fact shifted left for SRL, so reverse the
6873       // decision.
6874       //   ((x & 0xAAAAAAAA) >> 1)
6875       //   ((x & 0x55555555) << 1)
6876       SHLExpMask = !SHLExpMask;
6877     } else {
6878       // Use a default shifted mask of all-ones if there's no AND, truncated
6879       // down to the expected width. This simplifies the logic later on.
6880       Mask = maskTrailingOnes<uint64_t>(Width);
6881       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
6882     }
6883   }
6884 
6885   unsigned MaskIdx = Log2_32(ShAmt);
6886   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
6887 
6888   if (SHLExpMask)
6889     ExpMask <<= ShAmt;
6890 
6891   if (Mask != ExpMask)
6892     return None;
6893 
6894   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
6895 }
6896 
6897 // Matches any of the following bit-manipulation patterns:
6898 //   (and (shl x, 1), (0x55555555 << 1))
6899 //   (and (srl x, 1), 0x55555555)
6900 //   (shl (and x, 0x55555555), 1)
6901 //   (srl (and x, (0x55555555 << 1)), 1)
6902 // where the shift amount and mask may vary thus:
6903 //   [1]  = 0x55555555 / 0xAAAAAAAA
6904 //   [2]  = 0x33333333 / 0xCCCCCCCC
6905 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
6906 //   [8]  = 0x00FF00FF / 0xFF00FF00
6907 //   [16] = 0x0000FFFF / 0xFFFFFFFF
6908 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
6909 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
6910   // These are the unshifted masks which we use to match bit-manipulation
6911   // patterns. They may be shifted left in certain circumstances.
6912   static const uint64_t BitmanipMasks[] = {
6913       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
6914       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
6915 
6916   return matchRISCVBitmanipPat(Op, BitmanipMasks);
6917 }
6918 
6919 // Match the following pattern as a GREVI(W) operation
6920 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
6921 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
6922                                const RISCVSubtarget &Subtarget) {
6923   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6924   EVT VT = Op.getValueType();
6925 
6926   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
6927     auto LHS = matchGREVIPat(Op.getOperand(0));
6928     auto RHS = matchGREVIPat(Op.getOperand(1));
6929     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
6930       SDLoc DL(Op);
6931       return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op,
6932                          DAG.getConstant(LHS->ShAmt, DL, VT));
6933     }
6934   }
6935   return SDValue();
6936 }
6937 
6938 // Matches any the following pattern as a GORCI(W) operation
6939 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
6940 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
6941 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
6942 // Note that with the variant of 3.,
6943 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
6944 // the inner pattern will first be matched as GREVI and then the outer
6945 // pattern will be matched to GORC via the first rule above.
6946 // 4.  (or (rotl/rotr x, bitwidth/2), x)
6947 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
6948                                const RISCVSubtarget &Subtarget) {
6949   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6950   EVT VT = Op.getValueType();
6951 
6952   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
6953     SDLoc DL(Op);
6954     SDValue Op0 = Op.getOperand(0);
6955     SDValue Op1 = Op.getOperand(1);
6956 
6957     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
6958       if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X &&
6959           isa<ConstantSDNode>(Reverse.getOperand(1)) &&
6960           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
6961         return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1));
6962       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
6963       if ((Reverse.getOpcode() == ISD::ROTL ||
6964            Reverse.getOpcode() == ISD::ROTR) &&
6965           Reverse.getOperand(0) == X &&
6966           isa<ConstantSDNode>(Reverse.getOperand(1))) {
6967         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
6968         if (RotAmt == (VT.getSizeInBits() / 2))
6969           return DAG.getNode(RISCVISD::GORC, DL, VT, X,
6970                              DAG.getConstant(RotAmt, DL, VT));
6971       }
6972       return SDValue();
6973     };
6974 
6975     // Check for either commutable permutation of (or (GREVI x, shamt), x)
6976     if (SDValue V = MatchOROfReverse(Op0, Op1))
6977       return V;
6978     if (SDValue V = MatchOROfReverse(Op1, Op0))
6979       return V;
6980 
6981     // OR is commutable so canonicalize its OR operand to the left
6982     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
6983       std::swap(Op0, Op1);
6984     if (Op0.getOpcode() != ISD::OR)
6985       return SDValue();
6986     SDValue OrOp0 = Op0.getOperand(0);
6987     SDValue OrOp1 = Op0.getOperand(1);
6988     auto LHS = matchGREVIPat(OrOp0);
6989     // OR is commutable so swap the operands and try again: x might have been
6990     // on the left
6991     if (!LHS) {
6992       std::swap(OrOp0, OrOp1);
6993       LHS = matchGREVIPat(OrOp0);
6994     }
6995     auto RHS = matchGREVIPat(Op1);
6996     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
6997       return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op,
6998                          DAG.getConstant(LHS->ShAmt, DL, VT));
6999     }
7000   }
7001   return SDValue();
7002 }
7003 
7004 // Matches any of the following bit-manipulation patterns:
7005 //   (and (shl x, 1), (0x22222222 << 1))
7006 //   (and (srl x, 1), 0x22222222)
7007 //   (shl (and x, 0x22222222), 1)
7008 //   (srl (and x, (0x22222222 << 1)), 1)
7009 // where the shift amount and mask may vary thus:
7010 //   [1]  = 0x22222222 / 0x44444444
7011 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
7012 //   [4]  = 0x00F000F0 / 0x0F000F00
7013 //   [8]  = 0x0000FF00 / 0x00FF0000
7014 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
7015 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
7016   // These are the unshifted masks which we use to match bit-manipulation
7017   // patterns. They may be shifted left in certain circumstances.
7018   static const uint64_t BitmanipMasks[] = {
7019       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
7020       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
7021 
7022   return matchRISCVBitmanipPat(Op, BitmanipMasks);
7023 }
7024 
7025 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
7026 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
7027                                const RISCVSubtarget &Subtarget) {
7028   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
7029   EVT VT = Op.getValueType();
7030 
7031   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
7032     return SDValue();
7033 
7034   SDValue Op0 = Op.getOperand(0);
7035   SDValue Op1 = Op.getOperand(1);
7036 
7037   // Or is commutable so canonicalize the second OR to the LHS.
7038   if (Op0.getOpcode() != ISD::OR)
7039     std::swap(Op0, Op1);
7040   if (Op0.getOpcode() != ISD::OR)
7041     return SDValue();
7042 
7043   // We found an inner OR, so our operands are the operands of the inner OR
7044   // and the other operand of the outer OR.
7045   SDValue A = Op0.getOperand(0);
7046   SDValue B = Op0.getOperand(1);
7047   SDValue C = Op1;
7048 
7049   auto Match1 = matchSHFLPat(A);
7050   auto Match2 = matchSHFLPat(B);
7051 
7052   // If neither matched, we failed.
7053   if (!Match1 && !Match2)
7054     return SDValue();
7055 
7056   // We had at least one match. if one failed, try the remaining C operand.
7057   if (!Match1) {
7058     std::swap(A, C);
7059     Match1 = matchSHFLPat(A);
7060     if (!Match1)
7061       return SDValue();
7062   } else if (!Match2) {
7063     std::swap(B, C);
7064     Match2 = matchSHFLPat(B);
7065     if (!Match2)
7066       return SDValue();
7067   }
7068   assert(Match1 && Match2);
7069 
7070   // Make sure our matches pair up.
7071   if (!Match1->formsPairWith(*Match2))
7072     return SDValue();
7073 
7074   // All the remains is to make sure C is an AND with the same input, that masks
7075   // out the bits that are being shuffled.
7076   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
7077       C.getOperand(0) != Match1->Op)
7078     return SDValue();
7079 
7080   uint64_t Mask = C.getConstantOperandVal(1);
7081 
7082   static const uint64_t BitmanipMasks[] = {
7083       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
7084       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
7085   };
7086 
7087   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
7088   unsigned MaskIdx = Log2_32(Match1->ShAmt);
7089   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
7090 
7091   if (Mask != ExpMask)
7092     return SDValue();
7093 
7094   SDLoc DL(Op);
7095   return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op,
7096                      DAG.getConstant(Match1->ShAmt, DL, VT));
7097 }
7098 
7099 // Optimize (add (shl x, c0), (shl y, c1)) ->
7100 //          (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
7101 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
7102                                   const RISCVSubtarget &Subtarget) {
7103   // Perform this optimization only in the zba extension.
7104   if (!Subtarget.hasStdExtZba())
7105     return SDValue();
7106 
7107   // Skip for vector types and larger types.
7108   EVT VT = N->getValueType(0);
7109   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
7110     return SDValue();
7111 
7112   // The two operand nodes must be SHL and have no other use.
7113   SDValue N0 = N->getOperand(0);
7114   SDValue N1 = N->getOperand(1);
7115   if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
7116       !N0->hasOneUse() || !N1->hasOneUse())
7117     return SDValue();
7118 
7119   // Check c0 and c1.
7120   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
7121   auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
7122   if (!N0C || !N1C)
7123     return SDValue();
7124   int64_t C0 = N0C->getSExtValue();
7125   int64_t C1 = N1C->getSExtValue();
7126   if (C0 <= 0 || C1 <= 0)
7127     return SDValue();
7128 
7129   // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
7130   int64_t Bits = std::min(C0, C1);
7131   int64_t Diff = std::abs(C0 - C1);
7132   if (Diff != 1 && Diff != 2 && Diff != 3)
7133     return SDValue();
7134 
7135   // Build nodes.
7136   SDLoc DL(N);
7137   SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
7138   SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
7139   SDValue NA0 =
7140       DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
7141   SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
7142   return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
7143 }
7144 
7145 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
7146 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
7147 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
7148 // not undo itself, but they are redundant.
7149 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
7150   SDValue Src = N->getOperand(0);
7151 
7152   if (Src.getOpcode() != N->getOpcode())
7153     return SDValue();
7154 
7155   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
7156       !isa<ConstantSDNode>(Src.getOperand(1)))
7157     return SDValue();
7158 
7159   unsigned ShAmt1 = N->getConstantOperandVal(1);
7160   unsigned ShAmt2 = Src.getConstantOperandVal(1);
7161   Src = Src.getOperand(0);
7162 
7163   unsigned CombinedShAmt;
7164   if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW)
7165     CombinedShAmt = ShAmt1 | ShAmt2;
7166   else
7167     CombinedShAmt = ShAmt1 ^ ShAmt2;
7168 
7169   if (CombinedShAmt == 0)
7170     return Src;
7171 
7172   SDLoc DL(N);
7173   return DAG.getNode(
7174       N->getOpcode(), DL, N->getValueType(0), Src,
7175       DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
7176 }
7177 
7178 // Combine a constant select operand into its use:
7179 //
7180 // (and (select cond, -1, c), x)
7181 //   -> (select cond, x, (and x, c))  [AllOnes=1]
7182 // (or  (select cond, 0, c), x)
7183 //   -> (select cond, x, (or x, c))  [AllOnes=0]
7184 // (xor (select cond, 0, c), x)
7185 //   -> (select cond, x, (xor x, c))  [AllOnes=0]
7186 // (add (select cond, 0, c), x)
7187 //   -> (select cond, x, (add x, c))  [AllOnes=0]
7188 // (sub x, (select cond, 0, c))
7189 //   -> (select cond, x, (sub x, c))  [AllOnes=0]
7190 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
7191                                    SelectionDAG &DAG, bool AllOnes) {
7192   EVT VT = N->getValueType(0);
7193 
7194   // Skip vectors.
7195   if (VT.isVector())
7196     return SDValue();
7197 
7198   if ((Slct.getOpcode() != ISD::SELECT &&
7199        Slct.getOpcode() != RISCVISD::SELECT_CC) ||
7200       !Slct.hasOneUse())
7201     return SDValue();
7202 
7203   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
7204     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
7205   };
7206 
7207   bool SwapSelectOps;
7208   unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
7209   SDValue TrueVal = Slct.getOperand(1 + OpOffset);
7210   SDValue FalseVal = Slct.getOperand(2 + OpOffset);
7211   SDValue NonConstantVal;
7212   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
7213     SwapSelectOps = false;
7214     NonConstantVal = FalseVal;
7215   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
7216     SwapSelectOps = true;
7217     NonConstantVal = TrueVal;
7218   } else
7219     return SDValue();
7220 
7221   // Slct is now know to be the desired identity constant when CC is true.
7222   TrueVal = OtherOp;
7223   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
7224   // Unless SwapSelectOps says the condition should be false.
7225   if (SwapSelectOps)
7226     std::swap(TrueVal, FalseVal);
7227 
7228   if (Slct.getOpcode() == RISCVISD::SELECT_CC)
7229     return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
7230                        {Slct.getOperand(0), Slct.getOperand(1),
7231                         Slct.getOperand(2), TrueVal, FalseVal});
7232 
7233   return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
7234                      {Slct.getOperand(0), TrueVal, FalseVal});
7235 }
7236 
7237 // Attempt combineSelectAndUse on each operand of a commutative operator N.
7238 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
7239                                               bool AllOnes) {
7240   SDValue N0 = N->getOperand(0);
7241   SDValue N1 = N->getOperand(1);
7242   if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes))
7243     return Result;
7244   if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes))
7245     return Result;
7246   return SDValue();
7247 }
7248 
7249 // Transform (add (mul x, c0), c1) ->
7250 //           (add (mul (add x, c1/c0), c0), c1%c0).
7251 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
7252 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
7253 // to an infinite loop in DAGCombine if transformed.
7254 // Or transform (add (mul x, c0), c1) ->
7255 //              (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
7256 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
7257 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
7258 // lead to an infinite loop in DAGCombine if transformed.
7259 // Or transform (add (mul x, c0), c1) ->
7260 //              (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
7261 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
7262 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
7263 // lead to an infinite loop in DAGCombine if transformed.
7264 // Or transform (add (mul x, c0), c1) ->
7265 //              (mul (add x, c1/c0), c0).
7266 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
7267 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
7268                                      const RISCVSubtarget &Subtarget) {
7269   // Skip for vector types and larger types.
7270   EVT VT = N->getValueType(0);
7271   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
7272     return SDValue();
7273   // The first operand node must be a MUL and has no other use.
7274   SDValue N0 = N->getOperand(0);
7275   if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
7276     return SDValue();
7277   // Check if c0 and c1 match above conditions.
7278   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
7279   auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
7280   if (!N0C || !N1C)
7281     return SDValue();
7282   int64_t C0 = N0C->getSExtValue();
7283   int64_t C1 = N1C->getSExtValue();
7284   int64_t CA, CB;
7285   if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
7286     return SDValue();
7287   // Search for proper CA (non-zero) and CB that both are simm12.
7288   if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
7289       !isInt<12>(C0 * (C1 / C0))) {
7290     CA = C1 / C0;
7291     CB = C1 % C0;
7292   } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
7293              isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
7294     CA = C1 / C0 + 1;
7295     CB = C1 % C0 - C0;
7296   } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
7297              isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
7298     CA = C1 / C0 - 1;
7299     CB = C1 % C0 + C0;
7300   } else
7301     return SDValue();
7302   // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
7303   SDLoc DL(N);
7304   SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
7305                              DAG.getConstant(CA, DL, VT));
7306   SDValue New1 =
7307       DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
7308   return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
7309 }
7310 
7311 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
7312                                  const RISCVSubtarget &Subtarget) {
7313   if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
7314     return V;
7315   if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
7316     return V;
7317   // fold (add (select lhs, rhs, cc, 0, y), x) ->
7318   //      (select lhs, rhs, cc, x, (add x, y))
7319   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7320 }
7321 
7322 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
7323   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
7324   //      (select lhs, rhs, cc, x, (sub x, y))
7325   SDValue N0 = N->getOperand(0);
7326   SDValue N1 = N->getOperand(1);
7327   return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
7328 }
7329 
7330 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) {
7331   // fold (and (select lhs, rhs, cc, -1, y), x) ->
7332   //      (select lhs, rhs, cc, x, (and x, y))
7333   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
7334 }
7335 
7336 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
7337                                 const RISCVSubtarget &Subtarget) {
7338   if (Subtarget.hasStdExtZbp()) {
7339     if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
7340       return GREV;
7341     if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget))
7342       return GORC;
7343     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget))
7344       return SHFL;
7345   }
7346 
7347   // fold (or (select cond, 0, y), x) ->
7348   //      (select cond, x, (or x, y))
7349   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7350 }
7351 
7352 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
7353   // fold (xor (select cond, 0, y), x) ->
7354   //      (select cond, x, (xor x, y))
7355   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7356 }
7357 
7358 // Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
7359 // has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free
7360 // by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be
7361 // removed during type legalization leaving an ADD/SUB/MUL use that won't use
7362 // ADDW/SUBW/MULW.
7363 static SDValue performANY_EXTENDCombine(SDNode *N,
7364                                         TargetLowering::DAGCombinerInfo &DCI,
7365                                         const RISCVSubtarget &Subtarget) {
7366   if (!Subtarget.is64Bit())
7367     return SDValue();
7368 
7369   SelectionDAG &DAG = DCI.DAG;
7370 
7371   SDValue Src = N->getOperand(0);
7372   EVT VT = N->getValueType(0);
7373   if (VT != MVT::i64 || Src.getValueType() != MVT::i32)
7374     return SDValue();
7375 
7376   // The opcode must be one that can implicitly sign_extend.
7377   // FIXME: Additional opcodes.
7378   switch (Src.getOpcode()) {
7379   default:
7380     return SDValue();
7381   case ISD::MUL:
7382     if (!Subtarget.hasStdExtM())
7383       return SDValue();
7384     LLVM_FALLTHROUGH;
7385   case ISD::ADD:
7386   case ISD::SUB:
7387     break;
7388   }
7389 
7390   // Only handle cases where the result is used by a CopyToReg. That likely
7391   // means the value is a liveout of the basic block. This helps prevent
7392   // infinite combine loops like PR51206.
7393   if (none_of(N->uses(),
7394               [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; }))
7395     return SDValue();
7396 
7397   SmallVector<SDNode *, 4> SetCCs;
7398   for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
7399                             UE = Src.getNode()->use_end();
7400        UI != UE; ++UI) {
7401     SDNode *User = *UI;
7402     if (User == N)
7403       continue;
7404     if (UI.getUse().getResNo() != Src.getResNo())
7405       continue;
7406     // All i32 setccs are legalized by sign extending operands.
7407     if (User->getOpcode() == ISD::SETCC) {
7408       SetCCs.push_back(User);
7409       continue;
7410     }
7411     // We don't know if we can extend this user.
7412     break;
7413   }
7414 
7415   // If we don't have any SetCCs, this isn't worthwhile.
7416   if (SetCCs.empty())
7417     return SDValue();
7418 
7419   SDLoc DL(N);
7420   SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src);
7421   DCI.CombineTo(N, SExt);
7422 
7423   // Promote all the setccs.
7424   for (SDNode *SetCC : SetCCs) {
7425     SmallVector<SDValue, 4> Ops;
7426 
7427     for (unsigned j = 0; j != 2; ++j) {
7428       SDValue SOp = SetCC->getOperand(j);
7429       if (SOp == Src)
7430         Ops.push_back(SExt);
7431       else
7432         Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp));
7433     }
7434 
7435     Ops.push_back(SetCC->getOperand(2));
7436     DCI.CombineTo(SetCC,
7437                   DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7438   }
7439   return SDValue(N, 0);
7440 }
7441 
7442 // Try to form vwadd(u).wv/wx or vwsub(u).wv/wx. It might later be optimized to
7443 // vwadd(u).vv/vx or vwsub(u).vv/vx.
7444 static SDValue combineADDSUB_VLToVWADDSUB_VL(SDNode *N, SelectionDAG &DAG,
7445                                              bool Commute = false) {
7446   assert((N->getOpcode() == RISCVISD::ADD_VL ||
7447           N->getOpcode() == RISCVISD::SUB_VL) &&
7448          "Unexpected opcode");
7449   bool IsAdd = N->getOpcode() == RISCVISD::ADD_VL;
7450   SDValue Op0 = N->getOperand(0);
7451   SDValue Op1 = N->getOperand(1);
7452   if (Commute)
7453     std::swap(Op0, Op1);
7454 
7455   MVT VT = N->getSimpleValueType(0);
7456 
7457   // Determine the narrow size for a widening add/sub.
7458   unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
7459   MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
7460                                   VT.getVectorElementCount());
7461 
7462   SDValue Mask = N->getOperand(2);
7463   SDValue VL = N->getOperand(3);
7464 
7465   SDLoc DL(N);
7466 
7467   // If the RHS is a sext or zext, we can form a widening op.
7468   if ((Op1.getOpcode() == RISCVISD::VZEXT_VL ||
7469        Op1.getOpcode() == RISCVISD::VSEXT_VL) &&
7470       Op1.hasOneUse() && Op1.getOperand(1) == Mask && Op1.getOperand(2) == VL) {
7471     unsigned ExtOpc = Op1.getOpcode();
7472     Op1 = Op1.getOperand(0);
7473     // Re-introduce narrower extends if needed.
7474     if (Op1.getValueType() != NarrowVT)
7475       Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
7476 
7477     unsigned WOpc;
7478     if (ExtOpc == RISCVISD::VSEXT_VL)
7479       WOpc = IsAdd ? RISCVISD::VWADD_W_VL : RISCVISD::VWSUB_W_VL;
7480     else
7481       WOpc = IsAdd ? RISCVISD::VWADDU_W_VL : RISCVISD::VWSUBU_W_VL;
7482 
7483     return DAG.getNode(WOpc, DL, VT, Op0, Op1, Mask, VL);
7484   }
7485 
7486   // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
7487   // sext/zext?
7488 
7489   return SDValue();
7490 }
7491 
7492 // Try to convert vwadd(u).wv/wx or vwsub(u).wv/wx to vwadd(u).vv/vx or
7493 // vwsub(u).vv/vx.
7494 static SDValue combineVWADD_W_VL_VWSUB_W_VL(SDNode *N, SelectionDAG &DAG) {
7495   SDValue Op0 = N->getOperand(0);
7496   SDValue Op1 = N->getOperand(1);
7497   SDValue Mask = N->getOperand(2);
7498   SDValue VL = N->getOperand(3);
7499 
7500   MVT VT = N->getSimpleValueType(0);
7501   MVT NarrowVT = Op1.getSimpleValueType();
7502   unsigned NarrowSize = NarrowVT.getScalarSizeInBits();
7503 
7504   unsigned VOpc;
7505   switch (N->getOpcode()) {
7506   default: llvm_unreachable("Unexpected opcode");
7507   case RISCVISD::VWADD_W_VL:  VOpc = RISCVISD::VWADD_VL;  break;
7508   case RISCVISD::VWSUB_W_VL:  VOpc = RISCVISD::VWSUB_VL;  break;
7509   case RISCVISD::VWADDU_W_VL: VOpc = RISCVISD::VWADDU_VL; break;
7510   case RISCVISD::VWSUBU_W_VL: VOpc = RISCVISD::VWSUBU_VL; break;
7511   }
7512 
7513   bool IsSigned = N->getOpcode() == RISCVISD::VWADD_W_VL ||
7514                   N->getOpcode() == RISCVISD::VWSUB_W_VL;
7515 
7516   SDLoc DL(N);
7517 
7518   // If the LHS is a sext or zext, we can narrow this op to the same size as
7519   // the RHS.
7520   if (((Op0.getOpcode() == RISCVISD::VZEXT_VL && !IsSigned) ||
7521        (Op0.getOpcode() == RISCVISD::VSEXT_VL && IsSigned)) &&
7522       Op0.hasOneUse() && Op0.getOperand(1) == Mask && Op0.getOperand(2) == VL) {
7523     unsigned ExtOpc = Op0.getOpcode();
7524     Op0 = Op0.getOperand(0);
7525     // Re-introduce narrower extends if needed.
7526     if (Op0.getValueType() != NarrowVT)
7527       Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
7528     return DAG.getNode(VOpc, DL, VT, Op0, Op1, Mask, VL);
7529   }
7530 
7531   bool IsAdd = N->getOpcode() == RISCVISD::VWADD_W_VL ||
7532                N->getOpcode() == RISCVISD::VWADDU_W_VL;
7533 
7534   // Look for splats on the left hand side of a vwadd(u).wv. We might be able
7535   // to commute and use a vwadd(u).vx instead.
7536   if (IsAdd && Op0.getOpcode() == RISCVISD::VMV_V_X_VL &&
7537       Op0.getOperand(1) == VL) {
7538     Op0 = Op0.getOperand(0);
7539 
7540     // See if have enough sign bits or zero bits in the scalar to use a
7541     // widening add/sub by splatting to smaller element size.
7542     unsigned EltBits = VT.getScalarSizeInBits();
7543     unsigned ScalarBits = Op0.getValueSizeInBits();
7544     // Make sure we're getting all element bits from the scalar register.
7545     // FIXME: Support implicit sign extension of vmv.v.x?
7546     if (ScalarBits < EltBits)
7547       return SDValue();
7548 
7549     if (IsSigned) {
7550       if (DAG.ComputeNumSignBits(Op0) <= (ScalarBits - NarrowSize))
7551         return SDValue();
7552     } else {
7553       APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
7554       if (!DAG.MaskedValueIsZero(Op0, Mask))
7555         return SDValue();
7556     }
7557 
7558     Op0 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op0, VL);
7559     return DAG.getNode(VOpc, DL, VT, Op1, Op0, Mask, VL);
7560   }
7561 
7562   return SDValue();
7563 }
7564 
7565 // Try to form VWMUL, VWMULU or VWMULSU.
7566 // TODO: Support VWMULSU.vx with a sign extend Op and a splat of scalar Op.
7567 static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
7568                                        bool Commute) {
7569   assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
7570   SDValue Op0 = N->getOperand(0);
7571   SDValue Op1 = N->getOperand(1);
7572   if (Commute)
7573     std::swap(Op0, Op1);
7574 
7575   bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
7576   bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
7577   bool IsVWMULSU = IsSignExt && Op1.getOpcode() == RISCVISD::VZEXT_VL;
7578   if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
7579     return SDValue();
7580 
7581   SDValue Mask = N->getOperand(2);
7582   SDValue VL = N->getOperand(3);
7583 
7584   // Make sure the mask and VL match.
7585   if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)
7586     return SDValue();
7587 
7588   MVT VT = N->getSimpleValueType(0);
7589 
7590   // Determine the narrow size for a widening multiply.
7591   unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
7592   MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
7593                                   VT.getVectorElementCount());
7594 
7595   SDLoc DL(N);
7596 
7597   // See if the other operand is the same opcode.
7598   if (IsVWMULSU || Op0.getOpcode() == Op1.getOpcode()) {
7599     if (!Op1.hasOneUse())
7600       return SDValue();
7601 
7602     // Make sure the mask and VL match.
7603     if (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
7604       return SDValue();
7605 
7606     Op1 = Op1.getOperand(0);
7607   } else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
7608     // The operand is a splat of a scalar.
7609 
7610     // The VL must be the same.
7611     if (Op1.getOperand(1) != VL)
7612       return SDValue();
7613 
7614     // Get the scalar value.
7615     Op1 = Op1.getOperand(0);
7616 
7617     // See if have enough sign bits or zero bits in the scalar to use a
7618     // widening multiply by splatting to smaller element size.
7619     unsigned EltBits = VT.getScalarSizeInBits();
7620     unsigned ScalarBits = Op1.getValueSizeInBits();
7621     // Make sure we're getting all element bits from the scalar register.
7622     // FIXME: Support implicit sign extension of vmv.v.x?
7623     if (ScalarBits < EltBits)
7624       return SDValue();
7625 
7626     if (IsSignExt) {
7627       if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
7628         return SDValue();
7629     } else {
7630       APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
7631       if (!DAG.MaskedValueIsZero(Op1, Mask))
7632         return SDValue();
7633     }
7634 
7635     Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL);
7636   } else
7637     return SDValue();
7638 
7639   Op0 = Op0.getOperand(0);
7640 
7641   // Re-introduce narrower extends if needed.
7642   unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
7643   if (Op0.getValueType() != NarrowVT)
7644     Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
7645   if (Op1.getValueType() != NarrowVT)
7646     Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
7647 
7648   unsigned WMulOpc = RISCVISD::VWMULSU_VL;
7649   if (!IsVWMULSU)
7650     WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
7651   return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
7652 }
7653 
7654 static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
7655   switch (Op.getOpcode()) {
7656   case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
7657   case ISD::FTRUNC:     return RISCVFPRndMode::RTZ;
7658   case ISD::FFLOOR:     return RISCVFPRndMode::RDN;
7659   case ISD::FCEIL:      return RISCVFPRndMode::RUP;
7660   case ISD::FROUND:     return RISCVFPRndMode::RMM;
7661   }
7662 
7663   return RISCVFPRndMode::Invalid;
7664 }
7665 
7666 // Fold
7667 //   (fp_to_int (froundeven X)) -> fcvt X, rne
7668 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
7669 //   (fp_to_int (ffloor X))     -> fcvt X, rdn
7670 //   (fp_to_int (fceil X))      -> fcvt X, rup
7671 //   (fp_to_int (fround X))     -> fcvt X, rmm
7672 static SDValue performFP_TO_INTCombine(SDNode *N,
7673                                        TargetLowering::DAGCombinerInfo &DCI,
7674                                        const RISCVSubtarget &Subtarget) {
7675   SelectionDAG &DAG = DCI.DAG;
7676   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7677   MVT XLenVT = Subtarget.getXLenVT();
7678 
7679   // Only handle XLen or i32 types. Other types narrower than XLen will
7680   // eventually be legalized to XLenVT.
7681   EVT VT = N->getValueType(0);
7682   if (VT != MVT::i32 && VT != XLenVT)
7683     return SDValue();
7684 
7685   SDValue Src = N->getOperand(0);
7686 
7687   // Ensure the FP type is also legal.
7688   if (!TLI.isTypeLegal(Src.getValueType()))
7689     return SDValue();
7690 
7691   // Don't do this for f16 with Zfhmin and not Zfh.
7692   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
7693     return SDValue();
7694 
7695   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
7696   if (FRM == RISCVFPRndMode::Invalid)
7697     return SDValue();
7698 
7699   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
7700 
7701   unsigned Opc;
7702   if (VT == XLenVT)
7703     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
7704   else
7705     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
7706 
7707   SDLoc DL(N);
7708   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
7709                                 DAG.getTargetConstant(FRM, DL, XLenVT));
7710   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
7711 }
7712 
7713 // Fold
7714 //   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
7715 //   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))
7716 //   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))
7717 //   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))
7718 //   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))
7719 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
7720                                        TargetLowering::DAGCombinerInfo &DCI,
7721                                        const RISCVSubtarget &Subtarget) {
7722   SelectionDAG &DAG = DCI.DAG;
7723   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7724   MVT XLenVT = Subtarget.getXLenVT();
7725 
7726   // Only handle XLen types. Other types narrower than XLen will eventually be
7727   // legalized to XLenVT.
7728   EVT DstVT = N->getValueType(0);
7729   if (DstVT != XLenVT)
7730     return SDValue();
7731 
7732   SDValue Src = N->getOperand(0);
7733 
7734   // Ensure the FP type is also legal.
7735   if (!TLI.isTypeLegal(Src.getValueType()))
7736     return SDValue();
7737 
7738   // Don't do this for f16 with Zfhmin and not Zfh.
7739   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
7740     return SDValue();
7741 
7742   EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7743 
7744   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
7745   if (FRM == RISCVFPRndMode::Invalid)
7746     return SDValue();
7747 
7748   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
7749 
7750   unsigned Opc;
7751   if (SatVT == DstVT)
7752     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
7753   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
7754     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
7755   else
7756     return SDValue();
7757   // FIXME: Support other SatVTs by clamping before or after the conversion.
7758 
7759   Src = Src.getOperand(0);
7760 
7761   SDLoc DL(N);
7762   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
7763                                 DAG.getTargetConstant(FRM, DL, XLenVT));
7764 
7765   // RISCV FP-to-int conversions saturate to the destination register size, but
7766   // don't produce 0 for nan.
7767   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
7768   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
7769 }
7770 
7771 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
7772                                                DAGCombinerInfo &DCI) const {
7773   SelectionDAG &DAG = DCI.DAG;
7774 
7775   // Helper to call SimplifyDemandedBits on an operand of N where only some low
7776   // bits are demanded. N will be added to the Worklist if it was not deleted.
7777   // Caller should return SDValue(N, 0) if this returns true.
7778   auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
7779     SDValue Op = N->getOperand(OpNo);
7780     APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
7781     if (!SimplifyDemandedBits(Op, Mask, DCI))
7782       return false;
7783 
7784     if (N->getOpcode() != ISD::DELETED_NODE)
7785       DCI.AddToWorklist(N);
7786     return true;
7787   };
7788 
7789   switch (N->getOpcode()) {
7790   default:
7791     break;
7792   case RISCVISD::SplitF64: {
7793     SDValue Op0 = N->getOperand(0);
7794     // If the input to SplitF64 is just BuildPairF64 then the operation is
7795     // redundant. Instead, use BuildPairF64's operands directly.
7796     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
7797       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
7798 
7799     SDLoc DL(N);
7800 
7801     // It's cheaper to materialise two 32-bit integers than to load a double
7802     // from the constant pool and transfer it to integer registers through the
7803     // stack.
7804     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
7805       APInt V = C->getValueAPF().bitcastToAPInt();
7806       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
7807       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
7808       return DCI.CombineTo(N, Lo, Hi);
7809     }
7810 
7811     // This is a target-specific version of a DAGCombine performed in
7812     // DAGCombiner::visitBITCAST. It performs the equivalent of:
7813     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7814     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7815     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
7816         !Op0.getNode()->hasOneUse())
7817       break;
7818     SDValue NewSplitF64 =
7819         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
7820                     Op0.getOperand(0));
7821     SDValue Lo = NewSplitF64.getValue(0);
7822     SDValue Hi = NewSplitF64.getValue(1);
7823     APInt SignBit = APInt::getSignMask(32);
7824     if (Op0.getOpcode() == ISD::FNEG) {
7825       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
7826                                   DAG.getConstant(SignBit, DL, MVT::i32));
7827       return DCI.CombineTo(N, Lo, NewHi);
7828     }
7829     assert(Op0.getOpcode() == ISD::FABS);
7830     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
7831                                 DAG.getConstant(~SignBit, DL, MVT::i32));
7832     return DCI.CombineTo(N, Lo, NewHi);
7833   }
7834   case RISCVISD::SLLW:
7835   case RISCVISD::SRAW:
7836   case RISCVISD::SRLW:
7837   case RISCVISD::ROLW:
7838   case RISCVISD::RORW: {
7839     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
7840     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7841         SimplifyDemandedLowBitsHelper(1, 5))
7842       return SDValue(N, 0);
7843     break;
7844   }
7845   case RISCVISD::CLZW:
7846   case RISCVISD::CTZW: {
7847     // Only the lower 32 bits of the first operand are read
7848     if (SimplifyDemandedLowBitsHelper(0, 32))
7849       return SDValue(N, 0);
7850     break;
7851   }
7852   case RISCVISD::GREV:
7853   case RISCVISD::GORC: {
7854     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
7855     unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
7856     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
7857     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth)))
7858       return SDValue(N, 0);
7859 
7860     return combineGREVI_GORCI(N, DAG);
7861   }
7862   case RISCVISD::GREVW:
7863   case RISCVISD::GORCW: {
7864     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
7865     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7866         SimplifyDemandedLowBitsHelper(1, 5))
7867       return SDValue(N, 0);
7868 
7869     return combineGREVI_GORCI(N, DAG);
7870   }
7871   case RISCVISD::SHFL:
7872   case RISCVISD::UNSHFL: {
7873     // Only the lower log2(Bitwidth)-1 bits of the the shift amount are read.
7874     unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
7875     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
7876     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) - 1))
7877       return SDValue(N, 0);
7878 
7879     break;
7880   }
7881   case RISCVISD::SHFLW:
7882   case RISCVISD::UNSHFLW: {
7883     // Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
7884     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7885         SimplifyDemandedLowBitsHelper(1, 4))
7886       return SDValue(N, 0);
7887 
7888     break;
7889   }
7890   case RISCVISD::BCOMPRESSW:
7891   case RISCVISD::BDECOMPRESSW: {
7892     // Only the lower 32 bits of LHS and RHS are read.
7893     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7894         SimplifyDemandedLowBitsHelper(1, 32))
7895       return SDValue(N, 0);
7896 
7897     break;
7898   }
7899   case RISCVISD::FMV_X_ANYEXTH:
7900   case RISCVISD::FMV_X_ANYEXTW_RV64: {
7901     SDLoc DL(N);
7902     SDValue Op0 = N->getOperand(0);
7903     MVT VT = N->getSimpleValueType(0);
7904     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
7905     // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
7906     // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
7907     if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
7908          Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
7909         (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
7910          Op0->getOpcode() == RISCVISD::FMV_H_X)) {
7911       assert(Op0.getOperand(0).getValueType() == VT &&
7912              "Unexpected value type!");
7913       return Op0.getOperand(0);
7914     }
7915 
7916     // This is a target-specific version of a DAGCombine performed in
7917     // DAGCombiner::visitBITCAST. It performs the equivalent of:
7918     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7919     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7920     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
7921         !Op0.getNode()->hasOneUse())
7922       break;
7923     SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
7924     unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
7925     APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits());
7926     if (Op0.getOpcode() == ISD::FNEG)
7927       return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
7928                          DAG.getConstant(SignBit, DL, VT));
7929 
7930     assert(Op0.getOpcode() == ISD::FABS);
7931     return DAG.getNode(ISD::AND, DL, VT, NewFMV,
7932                        DAG.getConstant(~SignBit, DL, VT));
7933   }
7934   case ISD::ADD:
7935     return performADDCombine(N, DAG, Subtarget);
7936   case ISD::SUB:
7937     return performSUBCombine(N, DAG);
7938   case ISD::AND:
7939     return performANDCombine(N, DAG);
7940   case ISD::OR:
7941     return performORCombine(N, DAG, Subtarget);
7942   case ISD::XOR:
7943     return performXORCombine(N, DAG);
7944   case ISD::ANY_EXTEND:
7945     return performANY_EXTENDCombine(N, DCI, Subtarget);
7946   case ISD::ZERO_EXTEND:
7947     // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
7948     // type legalization. This is safe because fp_to_uint produces poison if
7949     // it overflows.
7950     if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
7951       SDValue Src = N->getOperand(0);
7952       if (Src.getOpcode() == ISD::FP_TO_UINT &&
7953           isTypeLegal(Src.getOperand(0).getValueType()))
7954         return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
7955                            Src.getOperand(0));
7956       if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
7957           isTypeLegal(Src.getOperand(1).getValueType())) {
7958         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
7959         SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
7960                                   Src.getOperand(0), Src.getOperand(1));
7961         DCI.CombineTo(N, Res);
7962         DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
7963         DCI.recursivelyDeleteUnusedNodes(Src.getNode());
7964         return SDValue(N, 0); // Return N so it doesn't get rechecked.
7965       }
7966     }
7967     return SDValue();
7968   case RISCVISD::SELECT_CC: {
7969     // Transform
7970     SDValue LHS = N->getOperand(0);
7971     SDValue RHS = N->getOperand(1);
7972     SDValue TrueV = N->getOperand(3);
7973     SDValue FalseV = N->getOperand(4);
7974 
7975     // If the True and False values are the same, we don't need a select_cc.
7976     if (TrueV == FalseV)
7977       return TrueV;
7978 
7979     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
7980     if (!ISD::isIntEqualitySetCC(CCVal))
7981       break;
7982 
7983     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
7984     //      (select_cc X, Y, lt, trueV, falseV)
7985     // Sometimes the setcc is introduced after select_cc has been formed.
7986     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
7987         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
7988       // If we're looking for eq 0 instead of ne 0, we need to invert the
7989       // condition.
7990       bool Invert = CCVal == ISD::SETEQ;
7991       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
7992       if (Invert)
7993         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7994 
7995       SDLoc DL(N);
7996       RHS = LHS.getOperand(1);
7997       LHS = LHS.getOperand(0);
7998       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7999 
8000       SDValue TargetCC = DAG.getCondCode(CCVal);
8001       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
8002                          {LHS, RHS, TargetCC, TrueV, FalseV});
8003     }
8004 
8005     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
8006     //      (select_cc X, Y, eq/ne, trueV, falseV)
8007     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
8008       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
8009                          {LHS.getOperand(0), LHS.getOperand(1),
8010                           N->getOperand(2), TrueV, FalseV});
8011     // (select_cc X, 1, setne, trueV, falseV) ->
8012     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
8013     // This can occur when legalizing some floating point comparisons.
8014     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
8015     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
8016       SDLoc DL(N);
8017       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
8018       SDValue TargetCC = DAG.getCondCode(CCVal);
8019       RHS = DAG.getConstant(0, DL, LHS.getValueType());
8020       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
8021                          {LHS, RHS, TargetCC, TrueV, FalseV});
8022     }
8023 
8024     break;
8025   }
8026   case RISCVISD::BR_CC: {
8027     SDValue LHS = N->getOperand(1);
8028     SDValue RHS = N->getOperand(2);
8029     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
8030     if (!ISD::isIntEqualitySetCC(CCVal))
8031       break;
8032 
8033     // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
8034     //      (br_cc X, Y, lt, dest)
8035     // Sometimes the setcc is introduced after br_cc has been formed.
8036     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
8037         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
8038       // If we're looking for eq 0 instead of ne 0, we need to invert the
8039       // condition.
8040       bool Invert = CCVal == ISD::SETEQ;
8041       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
8042       if (Invert)
8043         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
8044 
8045       SDLoc DL(N);
8046       RHS = LHS.getOperand(1);
8047       LHS = LHS.getOperand(0);
8048       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8049 
8050       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
8051                          N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
8052                          N->getOperand(4));
8053     }
8054 
8055     // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
8056     //      (br_cc X, Y, eq/ne, trueV, falseV)
8057     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
8058       return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
8059                          N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
8060                          N->getOperand(3), N->getOperand(4));
8061 
8062     // (br_cc X, 1, setne, br_cc) ->
8063     // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
8064     // This can occur when legalizing some floating point comparisons.
8065     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
8066     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
8067       SDLoc DL(N);
8068       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
8069       SDValue TargetCC = DAG.getCondCode(CCVal);
8070       RHS = DAG.getConstant(0, DL, LHS.getValueType());
8071       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
8072                          N->getOperand(0), LHS, RHS, TargetCC,
8073                          N->getOperand(4));
8074     }
8075     break;
8076   }
8077   case ISD::FP_TO_SINT:
8078   case ISD::FP_TO_UINT:
8079     return performFP_TO_INTCombine(N, DCI, Subtarget);
8080   case ISD::FP_TO_SINT_SAT:
8081   case ISD::FP_TO_UINT_SAT:
8082     return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
8083   case ISD::FCOPYSIGN: {
8084     EVT VT = N->getValueType(0);
8085     if (!VT.isVector())
8086       break;
8087     // There is a form of VFSGNJ which injects the negated sign of its second
8088     // operand. Try and bubble any FNEG up after the extend/round to produce
8089     // this optimized pattern. Avoid modifying cases where FP_ROUND and
8090     // TRUNC=1.
8091     SDValue In2 = N->getOperand(1);
8092     // Avoid cases where the extend/round has multiple uses, as duplicating
8093     // those is typically more expensive than removing a fneg.
8094     if (!In2.hasOneUse())
8095       break;
8096     if (In2.getOpcode() != ISD::FP_EXTEND &&
8097         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
8098       break;
8099     In2 = In2.getOperand(0);
8100     if (In2.getOpcode() != ISD::FNEG)
8101       break;
8102     SDLoc DL(N);
8103     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
8104     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
8105                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
8106   }
8107   case ISD::MGATHER:
8108   case ISD::MSCATTER:
8109   case ISD::VP_GATHER:
8110   case ISD::VP_SCATTER: {
8111     if (!DCI.isBeforeLegalize())
8112       break;
8113     SDValue Index, ScaleOp;
8114     bool IsIndexScaled = false;
8115     bool IsIndexSigned = false;
8116     if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
8117       Index = VPGSN->getIndex();
8118       ScaleOp = VPGSN->getScale();
8119       IsIndexScaled = VPGSN->isIndexScaled();
8120       IsIndexSigned = VPGSN->isIndexSigned();
8121     } else {
8122       const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
8123       Index = MGSN->getIndex();
8124       ScaleOp = MGSN->getScale();
8125       IsIndexScaled = MGSN->isIndexScaled();
8126       IsIndexSigned = MGSN->isIndexSigned();
8127     }
8128     EVT IndexVT = Index.getValueType();
8129     MVT XLenVT = Subtarget.getXLenVT();
8130     // RISCV indexed loads only support the "unsigned unscaled" addressing
8131     // mode, so anything else must be manually legalized.
8132     bool NeedsIdxLegalization =
8133         IsIndexScaled ||
8134         (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
8135     if (!NeedsIdxLegalization)
8136       break;
8137 
8138     SDLoc DL(N);
8139 
8140     // Any index legalization should first promote to XLenVT, so we don't lose
8141     // bits when scaling. This may create an illegal index type so we let
8142     // LLVM's legalization take care of the splitting.
8143     // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
8144     if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
8145       IndexVT = IndexVT.changeVectorElementType(XLenVT);
8146       Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
8147                           DL, IndexVT, Index);
8148     }
8149 
8150     unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
8151     if (IsIndexScaled && Scale != 1) {
8152       // Manually scale the indices by the element size.
8153       // TODO: Sanitize the scale operand here?
8154       // TODO: For VP nodes, should we use VP_SHL here?
8155       assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
8156       SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
8157       Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
8158     }
8159 
8160     ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
8161     if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
8162       return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
8163                              {VPGN->getChain(), VPGN->getBasePtr(), Index,
8164                               VPGN->getScale(), VPGN->getMask(),
8165                               VPGN->getVectorLength()},
8166                              VPGN->getMemOperand(), NewIndexTy);
8167     if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
8168       return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
8169                               {VPSN->getChain(), VPSN->getValue(),
8170                                VPSN->getBasePtr(), Index, VPSN->getScale(),
8171                                VPSN->getMask(), VPSN->getVectorLength()},
8172                               VPSN->getMemOperand(), NewIndexTy);
8173     if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
8174       return DAG.getMaskedGather(
8175           N->getVTList(), MGN->getMemoryVT(), DL,
8176           {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
8177            MGN->getBasePtr(), Index, MGN->getScale()},
8178           MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
8179     const auto *MSN = cast<MaskedScatterSDNode>(N);
8180     return DAG.getMaskedScatter(
8181         N->getVTList(), MSN->getMemoryVT(), DL,
8182         {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
8183          Index, MSN->getScale()},
8184         MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
8185   }
8186   case RISCVISD::SRA_VL:
8187   case RISCVISD::SRL_VL:
8188   case RISCVISD::SHL_VL: {
8189     SDValue ShAmt = N->getOperand(1);
8190     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
8191       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
8192       SDLoc DL(N);
8193       SDValue VL = N->getOperand(3);
8194       EVT VT = N->getValueType(0);
8195       ShAmt =
8196           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL);
8197       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
8198                          N->getOperand(2), N->getOperand(3));
8199     }
8200     break;
8201   }
8202   case ISD::SRA:
8203   case ISD::SRL:
8204   case ISD::SHL: {
8205     SDValue ShAmt = N->getOperand(1);
8206     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
8207       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
8208       SDLoc DL(N);
8209       EVT VT = N->getValueType(0);
8210       ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0),
8211                           DAG.getTargetConstant(RISCV::VLMaxSentinel, DL,
8212                                                 Subtarget.getXLenVT()));
8213       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
8214     }
8215     break;
8216   }
8217   case RISCVISD::ADD_VL:
8218     if (SDValue V = combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ false))
8219       return V;
8220     return combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ true);
8221   case RISCVISD::SUB_VL:
8222     return combineADDSUB_VLToVWADDSUB_VL(N, DAG);
8223   case RISCVISD::VWADD_W_VL:
8224   case RISCVISD::VWADDU_W_VL:
8225   case RISCVISD::VWSUB_W_VL:
8226   case RISCVISD::VWSUBU_W_VL:
8227     return combineVWADD_W_VL_VWSUB_W_VL(N, DAG);
8228   case RISCVISD::MUL_VL:
8229     if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false))
8230       return V;
8231     // Mul is commutative.
8232     return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true);
8233   case ISD::STORE: {
8234     auto *Store = cast<StoreSDNode>(N);
8235     SDValue Val = Store->getValue();
8236     // Combine store of vmv.x.s to vse with VL of 1.
8237     // FIXME: Support FP.
8238     if (Val.getOpcode() == RISCVISD::VMV_X_S) {
8239       SDValue Src = Val.getOperand(0);
8240       EVT VecVT = Src.getValueType();
8241       EVT MemVT = Store->getMemoryVT();
8242       // The memory VT and the element type must match.
8243       if (VecVT.getVectorElementType() == MemVT) {
8244         SDLoc DL(N);
8245         MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
8246         return DAG.getStoreVP(
8247             Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
8248             DAG.getConstant(1, DL, MaskVT),
8249             DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
8250             Store->getMemOperand(), Store->getAddressingMode(),
8251             Store->isTruncatingStore(), /*IsCompress*/ false);
8252       }
8253     }
8254 
8255     break;
8256   }
8257   }
8258 
8259   return SDValue();
8260 }
8261 
8262 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
8263     const SDNode *N, CombineLevel Level) const {
8264   // The following folds are only desirable if `(OP _, c1 << c2)` can be
8265   // materialised in fewer instructions than `(OP _, c1)`:
8266   //
8267   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8268   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8269   SDValue N0 = N->getOperand(0);
8270   EVT Ty = N0.getValueType();
8271   if (Ty.isScalarInteger() &&
8272       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
8273     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
8274     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
8275     if (C1 && C2) {
8276       const APInt &C1Int = C1->getAPIntValue();
8277       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
8278 
8279       // We can materialise `c1 << c2` into an add immediate, so it's "free",
8280       // and the combine should happen, to potentially allow further combines
8281       // later.
8282       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
8283           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
8284         return true;
8285 
8286       // We can materialise `c1` in an add immediate, so it's "free", and the
8287       // combine should be prevented.
8288       if (C1Int.getMinSignedBits() <= 64 &&
8289           isLegalAddImmediate(C1Int.getSExtValue()))
8290         return false;
8291 
8292       // Neither constant will fit into an immediate, so find materialisation
8293       // costs.
8294       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
8295                                               Subtarget.getFeatureBits(),
8296                                               /*CompressionCost*/true);
8297       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
8298           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
8299           /*CompressionCost*/true);
8300 
8301       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
8302       // combine should be prevented.
8303       if (C1Cost < ShiftedC1Cost)
8304         return false;
8305     }
8306   }
8307   return true;
8308 }
8309 
8310 bool RISCVTargetLowering::targetShrinkDemandedConstant(
8311     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
8312     TargetLoweringOpt &TLO) const {
8313   // Delay this optimization as late as possible.
8314   if (!TLO.LegalOps)
8315     return false;
8316 
8317   EVT VT = Op.getValueType();
8318   if (VT.isVector())
8319     return false;
8320 
8321   // Only handle AND for now.
8322   if (Op.getOpcode() != ISD::AND)
8323     return false;
8324 
8325   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
8326   if (!C)
8327     return false;
8328 
8329   const APInt &Mask = C->getAPIntValue();
8330 
8331   // Clear all non-demanded bits initially.
8332   APInt ShrunkMask = Mask & DemandedBits;
8333 
8334   // Try to make a smaller immediate by setting undemanded bits.
8335 
8336   APInt ExpandedMask = Mask | ~DemandedBits;
8337 
8338   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
8339     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
8340   };
8341   auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
8342     if (NewMask == Mask)
8343       return true;
8344     SDLoc DL(Op);
8345     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
8346     SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
8347     return TLO.CombineTo(Op, NewOp);
8348   };
8349 
8350   // If the shrunk mask fits in sign extended 12 bits, let the target
8351   // independent code apply it.
8352   if (ShrunkMask.isSignedIntN(12))
8353     return false;
8354 
8355   // Preserve (and X, 0xffff) when zext.h is supported.
8356   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
8357     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
8358     if (IsLegalMask(NewMask))
8359       return UseMask(NewMask);
8360   }
8361 
8362   // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
8363   if (VT == MVT::i64) {
8364     APInt NewMask = APInt(64, 0xffffffff);
8365     if (IsLegalMask(NewMask))
8366       return UseMask(NewMask);
8367   }
8368 
8369   // For the remaining optimizations, we need to be able to make a negative
8370   // number through a combination of mask and undemanded bits.
8371   if (!ExpandedMask.isNegative())
8372     return false;
8373 
8374   // What is the fewest number of bits we need to represent the negative number.
8375   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
8376 
8377   // Try to make a 12 bit negative immediate. If that fails try to make a 32
8378   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
8379   APInt NewMask = ShrunkMask;
8380   if (MinSignedBits <= 12)
8381     NewMask.setBitsFrom(11);
8382   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
8383     NewMask.setBitsFrom(31);
8384   else
8385     return false;
8386 
8387   // Check that our new mask is a subset of the demanded mask.
8388   assert(IsLegalMask(NewMask));
8389   return UseMask(NewMask);
8390 }
8391 
8392 static void computeGREV(APInt &Src, unsigned ShAmt) {
8393   ShAmt &= Src.getBitWidth() - 1;
8394   uint64_t x = Src.getZExtValue();
8395   if (ShAmt & 1)
8396     x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
8397   if (ShAmt & 2)
8398     x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2);
8399   if (ShAmt & 4)
8400     x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4);
8401   if (ShAmt & 8)
8402     x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8);
8403   if (ShAmt & 16)
8404     x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16);
8405   if (ShAmt & 32)
8406     x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32);
8407   Src = x;
8408 }
8409 
8410 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
8411                                                         KnownBits &Known,
8412                                                         const APInt &DemandedElts,
8413                                                         const SelectionDAG &DAG,
8414                                                         unsigned Depth) const {
8415   unsigned BitWidth = Known.getBitWidth();
8416   unsigned Opc = Op.getOpcode();
8417   assert((Opc >= ISD::BUILTIN_OP_END ||
8418           Opc == ISD::INTRINSIC_WO_CHAIN ||
8419           Opc == ISD::INTRINSIC_W_CHAIN ||
8420           Opc == ISD::INTRINSIC_VOID) &&
8421          "Should use MaskedValueIsZero if you don't know whether Op"
8422          " is a target node!");
8423 
8424   Known.resetAll();
8425   switch (Opc) {
8426   default: break;
8427   case RISCVISD::SELECT_CC: {
8428     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
8429     // If we don't know any bits, early out.
8430     if (Known.isUnknown())
8431       break;
8432     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
8433 
8434     // Only known if known in both the LHS and RHS.
8435     Known = KnownBits::commonBits(Known, Known2);
8436     break;
8437   }
8438   case RISCVISD::REMUW: {
8439     KnownBits Known2;
8440     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
8441     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
8442     // We only care about the lower 32 bits.
8443     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
8444     // Restore the original width by sign extending.
8445     Known = Known.sext(BitWidth);
8446     break;
8447   }
8448   case RISCVISD::DIVUW: {
8449     KnownBits Known2;
8450     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
8451     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
8452     // We only care about the lower 32 bits.
8453     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
8454     // Restore the original width by sign extending.
8455     Known = Known.sext(BitWidth);
8456     break;
8457   }
8458   case RISCVISD::CTZW: {
8459     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
8460     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
8461     unsigned LowBits = Log2_32(PossibleTZ) + 1;
8462     Known.Zero.setBitsFrom(LowBits);
8463     break;
8464   }
8465   case RISCVISD::CLZW: {
8466     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
8467     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
8468     unsigned LowBits = Log2_32(PossibleLZ) + 1;
8469     Known.Zero.setBitsFrom(LowBits);
8470     break;
8471   }
8472   case RISCVISD::GREV:
8473   case RISCVISD::GREVW: {
8474     if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8475       Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
8476       if (Opc == RISCVISD::GREVW)
8477         Known = Known.trunc(32);
8478       unsigned ShAmt = C->getZExtValue();
8479       computeGREV(Known.Zero, ShAmt);
8480       computeGREV(Known.One, ShAmt);
8481       if (Opc == RISCVISD::GREVW)
8482         Known = Known.sext(BitWidth);
8483     }
8484     break;
8485   }
8486   case RISCVISD::READ_VLENB: {
8487     // If we know the minimum VLen from Zvl extensions, we can use that to
8488     // determine the trailing zeros of VLENB.
8489     // FIXME: Limit to 128 bit vectors until we have more testing.
8490     unsigned MinVLenB = std::min(128U, Subtarget.getMinVLen()) / 8;
8491     if (MinVLenB > 0)
8492       Known.Zero.setLowBits(Log2_32(MinVLenB));
8493     // We assume VLENB is no more than 65536 / 8 bytes.
8494     Known.Zero.setBitsFrom(14);
8495     break;
8496   }
8497   case ISD::INTRINSIC_W_CHAIN:
8498   case ISD::INTRINSIC_WO_CHAIN: {
8499     unsigned IntNo =
8500         Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
8501     switch (IntNo) {
8502     default:
8503       // We can't do anything for most intrinsics.
8504       break;
8505     case Intrinsic::riscv_vsetvli:
8506     case Intrinsic::riscv_vsetvlimax:
8507     case Intrinsic::riscv_vsetvli_opt:
8508     case Intrinsic::riscv_vsetvlimax_opt:
8509       // Assume that VL output is positive and would fit in an int32_t.
8510       // TODO: VLEN might be capped at 16 bits in a future V spec update.
8511       if (BitWidth >= 32)
8512         Known.Zero.setBitsFrom(31);
8513       break;
8514     }
8515     break;
8516   }
8517   }
8518 }
8519 
8520 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
8521     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8522     unsigned Depth) const {
8523   switch (Op.getOpcode()) {
8524   default:
8525     break;
8526   case RISCVISD::SELECT_CC: {
8527     unsigned Tmp =
8528         DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
8529     if (Tmp == 1) return 1;  // Early out.
8530     unsigned Tmp2 =
8531         DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
8532     return std::min(Tmp, Tmp2);
8533   }
8534   case RISCVISD::SLLW:
8535   case RISCVISD::SRAW:
8536   case RISCVISD::SRLW:
8537   case RISCVISD::DIVW:
8538   case RISCVISD::DIVUW:
8539   case RISCVISD::REMUW:
8540   case RISCVISD::ROLW:
8541   case RISCVISD::RORW:
8542   case RISCVISD::GREVW:
8543   case RISCVISD::GORCW:
8544   case RISCVISD::FSLW:
8545   case RISCVISD::FSRW:
8546   case RISCVISD::SHFLW:
8547   case RISCVISD::UNSHFLW:
8548   case RISCVISD::BCOMPRESSW:
8549   case RISCVISD::BDECOMPRESSW:
8550   case RISCVISD::BFPW:
8551   case RISCVISD::FCVT_W_RV64:
8552   case RISCVISD::FCVT_WU_RV64:
8553   case RISCVISD::STRICT_FCVT_W_RV64:
8554   case RISCVISD::STRICT_FCVT_WU_RV64:
8555     // TODO: As the result is sign-extended, this is conservatively correct. A
8556     // more precise answer could be calculated for SRAW depending on known
8557     // bits in the shift amount.
8558     return 33;
8559   case RISCVISD::SHFL:
8560   case RISCVISD::UNSHFL: {
8561     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
8562     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
8563     // will stay within the upper 32 bits. If there were more than 32 sign bits
8564     // before there will be at least 33 sign bits after.
8565     if (Op.getValueType() == MVT::i64 &&
8566         isa<ConstantSDNode>(Op.getOperand(1)) &&
8567         (Op.getConstantOperandVal(1) & 0x10) == 0) {
8568       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
8569       if (Tmp > 32)
8570         return 33;
8571     }
8572     break;
8573   }
8574   case RISCVISD::VMV_X_S: {
8575     // The number of sign bits of the scalar result is computed by obtaining the
8576     // element type of the input vector operand, subtracting its width from the
8577     // XLEN, and then adding one (sign bit within the element type). If the
8578     // element type is wider than XLen, the least-significant XLEN bits are
8579     // taken.
8580     unsigned XLen = Subtarget.getXLen();
8581     unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
8582     if (EltBits <= XLen)
8583       return XLen - EltBits + 1;
8584     break;
8585   }
8586   }
8587 
8588   return 1;
8589 }
8590 
8591 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
8592                                                   MachineBasicBlock *BB) {
8593   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
8594 
8595   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
8596   // Should the count have wrapped while it was being read, we need to try
8597   // again.
8598   // ...
8599   // read:
8600   // rdcycleh x3 # load high word of cycle
8601   // rdcycle  x2 # load low word of cycle
8602   // rdcycleh x4 # load high word of cycle
8603   // bne x3, x4, read # check if high word reads match, otherwise try again
8604   // ...
8605 
8606   MachineFunction &MF = *BB->getParent();
8607   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8608   MachineFunction::iterator It = ++BB->getIterator();
8609 
8610   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
8611   MF.insert(It, LoopMBB);
8612 
8613   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
8614   MF.insert(It, DoneMBB);
8615 
8616   // Transfer the remainder of BB and its successor edges to DoneMBB.
8617   DoneMBB->splice(DoneMBB->begin(), BB,
8618                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
8619   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
8620 
8621   BB->addSuccessor(LoopMBB);
8622 
8623   MachineRegisterInfo &RegInfo = MF.getRegInfo();
8624   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
8625   Register LoReg = MI.getOperand(0).getReg();
8626   Register HiReg = MI.getOperand(1).getReg();
8627   DebugLoc DL = MI.getDebugLoc();
8628 
8629   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8630   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
8631       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
8632       .addReg(RISCV::X0);
8633   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
8634       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
8635       .addReg(RISCV::X0);
8636   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
8637       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
8638       .addReg(RISCV::X0);
8639 
8640   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
8641       .addReg(HiReg)
8642       .addReg(ReadAgainReg)
8643       .addMBB(LoopMBB);
8644 
8645   LoopMBB->addSuccessor(LoopMBB);
8646   LoopMBB->addSuccessor(DoneMBB);
8647 
8648   MI.eraseFromParent();
8649 
8650   return DoneMBB;
8651 }
8652 
8653 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
8654                                              MachineBasicBlock *BB) {
8655   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
8656 
8657   MachineFunction &MF = *BB->getParent();
8658   DebugLoc DL = MI.getDebugLoc();
8659   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
8660   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
8661   Register LoReg = MI.getOperand(0).getReg();
8662   Register HiReg = MI.getOperand(1).getReg();
8663   Register SrcReg = MI.getOperand(2).getReg();
8664   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
8665   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
8666 
8667   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
8668                           RI);
8669   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
8670   MachineMemOperand *MMOLo =
8671       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
8672   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
8673       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
8674   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
8675       .addFrameIndex(FI)
8676       .addImm(0)
8677       .addMemOperand(MMOLo);
8678   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
8679       .addFrameIndex(FI)
8680       .addImm(4)
8681       .addMemOperand(MMOHi);
8682   MI.eraseFromParent(); // The pseudo instruction is gone now.
8683   return BB;
8684 }
8685 
8686 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
8687                                                  MachineBasicBlock *BB) {
8688   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
8689          "Unexpected instruction");
8690 
8691   MachineFunction &MF = *BB->getParent();
8692   DebugLoc DL = MI.getDebugLoc();
8693   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
8694   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
8695   Register DstReg = MI.getOperand(0).getReg();
8696   Register LoReg = MI.getOperand(1).getReg();
8697   Register HiReg = MI.getOperand(2).getReg();
8698   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
8699   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
8700 
8701   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
8702   MachineMemOperand *MMOLo =
8703       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
8704   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
8705       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
8706   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
8707       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
8708       .addFrameIndex(FI)
8709       .addImm(0)
8710       .addMemOperand(MMOLo);
8711   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
8712       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
8713       .addFrameIndex(FI)
8714       .addImm(4)
8715       .addMemOperand(MMOHi);
8716   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
8717   MI.eraseFromParent(); // The pseudo instruction is gone now.
8718   return BB;
8719 }
8720 
8721 static bool isSelectPseudo(MachineInstr &MI) {
8722   switch (MI.getOpcode()) {
8723   default:
8724     return false;
8725   case RISCV::Select_GPR_Using_CC_GPR:
8726   case RISCV::Select_FPR16_Using_CC_GPR:
8727   case RISCV::Select_FPR32_Using_CC_GPR:
8728   case RISCV::Select_FPR64_Using_CC_GPR:
8729     return true;
8730   }
8731 }
8732 
8733 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
8734                                         unsigned RelOpcode, unsigned EqOpcode,
8735                                         const RISCVSubtarget &Subtarget) {
8736   DebugLoc DL = MI.getDebugLoc();
8737   Register DstReg = MI.getOperand(0).getReg();
8738   Register Src1Reg = MI.getOperand(1).getReg();
8739   Register Src2Reg = MI.getOperand(2).getReg();
8740   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
8741   Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
8742   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
8743 
8744   // Save the current FFLAGS.
8745   BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
8746 
8747   auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
8748                  .addReg(Src1Reg)
8749                  .addReg(Src2Reg);
8750   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
8751     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
8752 
8753   // Restore the FFLAGS.
8754   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
8755       .addReg(SavedFFlags, RegState::Kill);
8756 
8757   // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
8758   auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
8759                   .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
8760                   .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
8761   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
8762     MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
8763 
8764   // Erase the pseudoinstruction.
8765   MI.eraseFromParent();
8766   return BB;
8767 }
8768 
8769 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
8770                                            MachineBasicBlock *BB,
8771                                            const RISCVSubtarget &Subtarget) {
8772   // To "insert" Select_* instructions, we actually have to insert the triangle
8773   // control-flow pattern.  The incoming instructions know the destination vreg
8774   // to set, the condition code register to branch on, the true/false values to
8775   // select between, and the condcode to use to select the appropriate branch.
8776   //
8777   // We produce the following control flow:
8778   //     HeadMBB
8779   //     |  \
8780   //     |  IfFalseMBB
8781   //     | /
8782   //    TailMBB
8783   //
8784   // When we find a sequence of selects we attempt to optimize their emission
8785   // by sharing the control flow. Currently we only handle cases where we have
8786   // multiple selects with the exact same condition (same LHS, RHS and CC).
8787   // The selects may be interleaved with other instructions if the other
8788   // instructions meet some requirements we deem safe:
8789   // - They are debug instructions. Otherwise,
8790   // - They do not have side-effects, do not access memory and their inputs do
8791   //   not depend on the results of the select pseudo-instructions.
8792   // The TrueV/FalseV operands of the selects cannot depend on the result of
8793   // previous selects in the sequence.
8794   // These conditions could be further relaxed. See the X86 target for a
8795   // related approach and more information.
8796   Register LHS = MI.getOperand(1).getReg();
8797   Register RHS = MI.getOperand(2).getReg();
8798   auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
8799 
8800   SmallVector<MachineInstr *, 4> SelectDebugValues;
8801   SmallSet<Register, 4> SelectDests;
8802   SelectDests.insert(MI.getOperand(0).getReg());
8803 
8804   MachineInstr *LastSelectPseudo = &MI;
8805 
8806   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
8807        SequenceMBBI != E; ++SequenceMBBI) {
8808     if (SequenceMBBI->isDebugInstr())
8809       continue;
8810     else if (isSelectPseudo(*SequenceMBBI)) {
8811       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
8812           SequenceMBBI->getOperand(2).getReg() != RHS ||
8813           SequenceMBBI->getOperand(3).getImm() != CC ||
8814           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
8815           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
8816         break;
8817       LastSelectPseudo = &*SequenceMBBI;
8818       SequenceMBBI->collectDebugValues(SelectDebugValues);
8819       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
8820     } else {
8821       if (SequenceMBBI->hasUnmodeledSideEffects() ||
8822           SequenceMBBI->mayLoadOrStore())
8823         break;
8824       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
8825             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
8826           }))
8827         break;
8828     }
8829   }
8830 
8831   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
8832   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8833   DebugLoc DL = MI.getDebugLoc();
8834   MachineFunction::iterator I = ++BB->getIterator();
8835 
8836   MachineBasicBlock *HeadMBB = BB;
8837   MachineFunction *F = BB->getParent();
8838   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
8839   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
8840 
8841   F->insert(I, IfFalseMBB);
8842   F->insert(I, TailMBB);
8843 
8844   // Transfer debug instructions associated with the selects to TailMBB.
8845   for (MachineInstr *DebugInstr : SelectDebugValues) {
8846     TailMBB->push_back(DebugInstr->removeFromParent());
8847   }
8848 
8849   // Move all instructions after the sequence to TailMBB.
8850   TailMBB->splice(TailMBB->end(), HeadMBB,
8851                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
8852   // Update machine-CFG edges by transferring all successors of the current
8853   // block to the new block which will contain the Phi nodes for the selects.
8854   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
8855   // Set the successors for HeadMBB.
8856   HeadMBB->addSuccessor(IfFalseMBB);
8857   HeadMBB->addSuccessor(TailMBB);
8858 
8859   // Insert appropriate branch.
8860   BuildMI(HeadMBB, DL, TII.getBrCond(CC))
8861     .addReg(LHS)
8862     .addReg(RHS)
8863     .addMBB(TailMBB);
8864 
8865   // IfFalseMBB just falls through to TailMBB.
8866   IfFalseMBB->addSuccessor(TailMBB);
8867 
8868   // Create PHIs for all of the select pseudo-instructions.
8869   auto SelectMBBI = MI.getIterator();
8870   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
8871   auto InsertionPoint = TailMBB->begin();
8872   while (SelectMBBI != SelectEnd) {
8873     auto Next = std::next(SelectMBBI);
8874     if (isSelectPseudo(*SelectMBBI)) {
8875       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
8876       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
8877               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
8878           .addReg(SelectMBBI->getOperand(4).getReg())
8879           .addMBB(HeadMBB)
8880           .addReg(SelectMBBI->getOperand(5).getReg())
8881           .addMBB(IfFalseMBB);
8882       SelectMBBI->eraseFromParent();
8883     }
8884     SelectMBBI = Next;
8885   }
8886 
8887   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
8888   return TailMBB;
8889 }
8890 
8891 MachineBasicBlock *
8892 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
8893                                                  MachineBasicBlock *BB) const {
8894   switch (MI.getOpcode()) {
8895   default:
8896     llvm_unreachable("Unexpected instr type to insert");
8897   case RISCV::ReadCycleWide:
8898     assert(!Subtarget.is64Bit() &&
8899            "ReadCycleWrite is only to be used on riscv32");
8900     return emitReadCycleWidePseudo(MI, BB);
8901   case RISCV::Select_GPR_Using_CC_GPR:
8902   case RISCV::Select_FPR16_Using_CC_GPR:
8903   case RISCV::Select_FPR32_Using_CC_GPR:
8904   case RISCV::Select_FPR64_Using_CC_GPR:
8905     return emitSelectPseudo(MI, BB, Subtarget);
8906   case RISCV::BuildPairF64Pseudo:
8907     return emitBuildPairF64Pseudo(MI, BB);
8908   case RISCV::SplitF64Pseudo:
8909     return emitSplitF64Pseudo(MI, BB);
8910   case RISCV::PseudoQuietFLE_H:
8911     return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
8912   case RISCV::PseudoQuietFLT_H:
8913     return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
8914   case RISCV::PseudoQuietFLE_S:
8915     return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
8916   case RISCV::PseudoQuietFLT_S:
8917     return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
8918   case RISCV::PseudoQuietFLE_D:
8919     return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
8920   case RISCV::PseudoQuietFLT_D:
8921     return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
8922   }
8923 }
8924 
8925 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
8926                                                         SDNode *Node) const {
8927   // Add FRM dependency to any instructions with dynamic rounding mode.
8928   unsigned Opc = MI.getOpcode();
8929   auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
8930   if (Idx < 0)
8931     return;
8932   if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
8933     return;
8934   // If the instruction already reads FRM, don't add another read.
8935   if (MI.readsRegister(RISCV::FRM))
8936     return;
8937   MI.addOperand(
8938       MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
8939 }
8940 
8941 // Calling Convention Implementation.
8942 // The expectations for frontend ABI lowering vary from target to target.
8943 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
8944 // details, but this is a longer term goal. For now, we simply try to keep the
8945 // role of the frontend as simple and well-defined as possible. The rules can
8946 // be summarised as:
8947 // * Never split up large scalar arguments. We handle them here.
8948 // * If a hardfloat calling convention is being used, and the struct may be
8949 // passed in a pair of registers (fp+fp, int+fp), and both registers are
8950 // available, then pass as two separate arguments. If either the GPRs or FPRs
8951 // are exhausted, then pass according to the rule below.
8952 // * If a struct could never be passed in registers or directly in a stack
8953 // slot (as it is larger than 2*XLEN and the floating point rules don't
8954 // apply), then pass it using a pointer with the byval attribute.
8955 // * If a struct is less than 2*XLEN, then coerce to either a two-element
8956 // word-sized array or a 2*XLEN scalar (depending on alignment).
8957 // * The frontend can determine whether a struct is returned by reference or
8958 // not based on its size and fields. If it will be returned by reference, the
8959 // frontend must modify the prototype so a pointer with the sret annotation is
8960 // passed as the first argument. This is not necessary for large scalar
8961 // returns.
8962 // * Struct return values and varargs should be coerced to structs containing
8963 // register-size fields in the same situations they would be for fixed
8964 // arguments.
8965 
8966 static const MCPhysReg ArgGPRs[] = {
8967   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
8968   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
8969 };
8970 static const MCPhysReg ArgFPR16s[] = {
8971   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
8972   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
8973 };
8974 static const MCPhysReg ArgFPR32s[] = {
8975   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
8976   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
8977 };
8978 static const MCPhysReg ArgFPR64s[] = {
8979   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
8980   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
8981 };
8982 // This is an interim calling convention and it may be changed in the future.
8983 static const MCPhysReg ArgVRs[] = {
8984     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
8985     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
8986     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
8987 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
8988                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
8989                                      RISCV::V20M2, RISCV::V22M2};
8990 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
8991                                      RISCV::V20M4};
8992 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
8993 
8994 // Pass a 2*XLEN argument that has been split into two XLEN values through
8995 // registers or the stack as necessary.
8996 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
8997                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
8998                                 MVT ValVT2, MVT LocVT2,
8999                                 ISD::ArgFlagsTy ArgFlags2) {
9000   unsigned XLenInBytes = XLen / 8;
9001   if (Register Reg = State.AllocateReg(ArgGPRs)) {
9002     // At least one half can be passed via register.
9003     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
9004                                      VA1.getLocVT(), CCValAssign::Full));
9005   } else {
9006     // Both halves must be passed on the stack, with proper alignment.
9007     Align StackAlign =
9008         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
9009     State.addLoc(
9010         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
9011                             State.AllocateStack(XLenInBytes, StackAlign),
9012                             VA1.getLocVT(), CCValAssign::Full));
9013     State.addLoc(CCValAssign::getMem(
9014         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
9015         LocVT2, CCValAssign::Full));
9016     return false;
9017   }
9018 
9019   if (Register Reg = State.AllocateReg(ArgGPRs)) {
9020     // The second half can also be passed via register.
9021     State.addLoc(
9022         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
9023   } else {
9024     // The second half is passed via the stack, without additional alignment.
9025     State.addLoc(CCValAssign::getMem(
9026         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
9027         LocVT2, CCValAssign::Full));
9028   }
9029 
9030   return false;
9031 }
9032 
9033 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
9034                                Optional<unsigned> FirstMaskArgument,
9035                                CCState &State, const RISCVTargetLowering &TLI) {
9036   const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
9037   if (RC == &RISCV::VRRegClass) {
9038     // Assign the first mask argument to V0.
9039     // This is an interim calling convention and it may be changed in the
9040     // future.
9041     if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue())
9042       return State.AllocateReg(RISCV::V0);
9043     return State.AllocateReg(ArgVRs);
9044   }
9045   if (RC == &RISCV::VRM2RegClass)
9046     return State.AllocateReg(ArgVRM2s);
9047   if (RC == &RISCV::VRM4RegClass)
9048     return State.AllocateReg(ArgVRM4s);
9049   if (RC == &RISCV::VRM8RegClass)
9050     return State.AllocateReg(ArgVRM8s);
9051   llvm_unreachable("Unhandled register class for ValueType");
9052 }
9053 
9054 // Implements the RISC-V calling convention. Returns true upon failure.
9055 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
9056                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
9057                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
9058                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
9059                      Optional<unsigned> FirstMaskArgument) {
9060   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
9061   assert(XLen == 32 || XLen == 64);
9062   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
9063 
9064   // Any return value split in to more than two values can't be returned
9065   // directly. Vectors are returned via the available vector registers.
9066   if (!LocVT.isVector() && IsRet && ValNo > 1)
9067     return true;
9068 
9069   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
9070   // variadic argument, or if no F16/F32 argument registers are available.
9071   bool UseGPRForF16_F32 = true;
9072   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
9073   // variadic argument, or if no F64 argument registers are available.
9074   bool UseGPRForF64 = true;
9075 
9076   switch (ABI) {
9077   default:
9078     llvm_unreachable("Unexpected ABI");
9079   case RISCVABI::ABI_ILP32:
9080   case RISCVABI::ABI_LP64:
9081     break;
9082   case RISCVABI::ABI_ILP32F:
9083   case RISCVABI::ABI_LP64F:
9084     UseGPRForF16_F32 = !IsFixed;
9085     break;
9086   case RISCVABI::ABI_ILP32D:
9087   case RISCVABI::ABI_LP64D:
9088     UseGPRForF16_F32 = !IsFixed;
9089     UseGPRForF64 = !IsFixed;
9090     break;
9091   }
9092 
9093   // FPR16, FPR32, and FPR64 alias each other.
9094   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
9095     UseGPRForF16_F32 = true;
9096     UseGPRForF64 = true;
9097   }
9098 
9099   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
9100   // similar local variables rather than directly checking against the target
9101   // ABI.
9102 
9103   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
9104     LocVT = XLenVT;
9105     LocInfo = CCValAssign::BCvt;
9106   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
9107     LocVT = MVT::i64;
9108     LocInfo = CCValAssign::BCvt;
9109   }
9110 
9111   // If this is a variadic argument, the RISC-V calling convention requires
9112   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
9113   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
9114   // be used regardless of whether the original argument was split during
9115   // legalisation or not. The argument will not be passed by registers if the
9116   // original type is larger than 2*XLEN, so the register alignment rule does
9117   // not apply.
9118   unsigned TwoXLenInBytes = (2 * XLen) / 8;
9119   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
9120       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
9121     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
9122     // Skip 'odd' register if necessary.
9123     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
9124       State.AllocateReg(ArgGPRs);
9125   }
9126 
9127   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
9128   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
9129       State.getPendingArgFlags();
9130 
9131   assert(PendingLocs.size() == PendingArgFlags.size() &&
9132          "PendingLocs and PendingArgFlags out of sync");
9133 
9134   // Handle passing f64 on RV32D with a soft float ABI or when floating point
9135   // registers are exhausted.
9136   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
9137     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
9138            "Can't lower f64 if it is split");
9139     // Depending on available argument GPRS, f64 may be passed in a pair of
9140     // GPRs, split between a GPR and the stack, or passed completely on the
9141     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
9142     // cases.
9143     Register Reg = State.AllocateReg(ArgGPRs);
9144     LocVT = MVT::i32;
9145     if (!Reg) {
9146       unsigned StackOffset = State.AllocateStack(8, Align(8));
9147       State.addLoc(
9148           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9149       return false;
9150     }
9151     if (!State.AllocateReg(ArgGPRs))
9152       State.AllocateStack(4, Align(4));
9153     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9154     return false;
9155   }
9156 
9157   // Fixed-length vectors are located in the corresponding scalable-vector
9158   // container types.
9159   if (ValVT.isFixedLengthVector())
9160     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
9161 
9162   // Split arguments might be passed indirectly, so keep track of the pending
9163   // values. Split vectors are passed via a mix of registers and indirectly, so
9164   // treat them as we would any other argument.
9165   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
9166     LocVT = XLenVT;
9167     LocInfo = CCValAssign::Indirect;
9168     PendingLocs.push_back(
9169         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
9170     PendingArgFlags.push_back(ArgFlags);
9171     if (!ArgFlags.isSplitEnd()) {
9172       return false;
9173     }
9174   }
9175 
9176   // If the split argument only had two elements, it should be passed directly
9177   // in registers or on the stack.
9178   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
9179       PendingLocs.size() <= 2) {
9180     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
9181     // Apply the normal calling convention rules to the first half of the
9182     // split argument.
9183     CCValAssign VA = PendingLocs[0];
9184     ISD::ArgFlagsTy AF = PendingArgFlags[0];
9185     PendingLocs.clear();
9186     PendingArgFlags.clear();
9187     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
9188                                ArgFlags);
9189   }
9190 
9191   // Allocate to a register if possible, or else a stack slot.
9192   Register Reg;
9193   unsigned StoreSizeBytes = XLen / 8;
9194   Align StackAlign = Align(XLen / 8);
9195 
9196   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
9197     Reg = State.AllocateReg(ArgFPR16s);
9198   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
9199     Reg = State.AllocateReg(ArgFPR32s);
9200   else if (ValVT == MVT::f64 && !UseGPRForF64)
9201     Reg = State.AllocateReg(ArgFPR64s);
9202   else if (ValVT.isVector()) {
9203     Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
9204     if (!Reg) {
9205       // For return values, the vector must be passed fully via registers or
9206       // via the stack.
9207       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
9208       // but we're using all of them.
9209       if (IsRet)
9210         return true;
9211       // Try using a GPR to pass the address
9212       if ((Reg = State.AllocateReg(ArgGPRs))) {
9213         LocVT = XLenVT;
9214         LocInfo = CCValAssign::Indirect;
9215       } else if (ValVT.isScalableVector()) {
9216         LocVT = XLenVT;
9217         LocInfo = CCValAssign::Indirect;
9218       } else {
9219         // Pass fixed-length vectors on the stack.
9220         LocVT = ValVT;
9221         StoreSizeBytes = ValVT.getStoreSize();
9222         // Align vectors to their element sizes, being careful for vXi1
9223         // vectors.
9224         StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
9225       }
9226     }
9227   } else {
9228     Reg = State.AllocateReg(ArgGPRs);
9229   }
9230 
9231   unsigned StackOffset =
9232       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
9233 
9234   // If we reach this point and PendingLocs is non-empty, we must be at the
9235   // end of a split argument that must be passed indirectly.
9236   if (!PendingLocs.empty()) {
9237     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
9238     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
9239 
9240     for (auto &It : PendingLocs) {
9241       if (Reg)
9242         It.convertToReg(Reg);
9243       else
9244         It.convertToMem(StackOffset);
9245       State.addLoc(It);
9246     }
9247     PendingLocs.clear();
9248     PendingArgFlags.clear();
9249     return false;
9250   }
9251 
9252   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
9253           (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
9254          "Expected an XLenVT or vector types at this stage");
9255 
9256   if (Reg) {
9257     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9258     return false;
9259   }
9260 
9261   // When a floating-point value is passed on the stack, no bit-conversion is
9262   // needed.
9263   if (ValVT.isFloatingPoint()) {
9264     LocVT = ValVT;
9265     LocInfo = CCValAssign::Full;
9266   }
9267   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9268   return false;
9269 }
9270 
9271 template <typename ArgTy>
9272 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
9273   for (const auto &ArgIdx : enumerate(Args)) {
9274     MVT ArgVT = ArgIdx.value().VT;
9275     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
9276       return ArgIdx.index();
9277   }
9278   return None;
9279 }
9280 
9281 void RISCVTargetLowering::analyzeInputArgs(
9282     MachineFunction &MF, CCState &CCInfo,
9283     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
9284     RISCVCCAssignFn Fn) const {
9285   unsigned NumArgs = Ins.size();
9286   FunctionType *FType = MF.getFunction().getFunctionType();
9287 
9288   Optional<unsigned> FirstMaskArgument;
9289   if (Subtarget.hasVInstructions())
9290     FirstMaskArgument = preAssignMask(Ins);
9291 
9292   for (unsigned i = 0; i != NumArgs; ++i) {
9293     MVT ArgVT = Ins[i].VT;
9294     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
9295 
9296     Type *ArgTy = nullptr;
9297     if (IsRet)
9298       ArgTy = FType->getReturnType();
9299     else if (Ins[i].isOrigArg())
9300       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
9301 
9302     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
9303     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
9304            ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
9305            FirstMaskArgument)) {
9306       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
9307                         << EVT(ArgVT).getEVTString() << '\n');
9308       llvm_unreachable(nullptr);
9309     }
9310   }
9311 }
9312 
9313 void RISCVTargetLowering::analyzeOutputArgs(
9314     MachineFunction &MF, CCState &CCInfo,
9315     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
9316     CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
9317   unsigned NumArgs = Outs.size();
9318 
9319   Optional<unsigned> FirstMaskArgument;
9320   if (Subtarget.hasVInstructions())
9321     FirstMaskArgument = preAssignMask(Outs);
9322 
9323   for (unsigned i = 0; i != NumArgs; i++) {
9324     MVT ArgVT = Outs[i].VT;
9325     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
9326     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
9327 
9328     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
9329     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
9330            ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
9331            FirstMaskArgument)) {
9332       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
9333                         << EVT(ArgVT).getEVTString() << "\n");
9334       llvm_unreachable(nullptr);
9335     }
9336   }
9337 }
9338 
9339 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
9340 // values.
9341 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
9342                                    const CCValAssign &VA, const SDLoc &DL,
9343                                    const RISCVSubtarget &Subtarget) {
9344   switch (VA.getLocInfo()) {
9345   default:
9346     llvm_unreachable("Unexpected CCValAssign::LocInfo");
9347   case CCValAssign::Full:
9348     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
9349       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
9350     break;
9351   case CCValAssign::BCvt:
9352     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
9353       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
9354     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9355       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
9356     else
9357       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
9358     break;
9359   }
9360   return Val;
9361 }
9362 
9363 // The caller is responsible for loading the full value if the argument is
9364 // passed with CCValAssign::Indirect.
9365 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
9366                                 const CCValAssign &VA, const SDLoc &DL,
9367                                 const RISCVTargetLowering &TLI) {
9368   MachineFunction &MF = DAG.getMachineFunction();
9369   MachineRegisterInfo &RegInfo = MF.getRegInfo();
9370   EVT LocVT = VA.getLocVT();
9371   SDValue Val;
9372   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
9373   Register VReg = RegInfo.createVirtualRegister(RC);
9374   RegInfo.addLiveIn(VA.getLocReg(), VReg);
9375   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
9376 
9377   if (VA.getLocInfo() == CCValAssign::Indirect)
9378     return Val;
9379 
9380   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
9381 }
9382 
9383 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
9384                                    const CCValAssign &VA, const SDLoc &DL,
9385                                    const RISCVSubtarget &Subtarget) {
9386   EVT LocVT = VA.getLocVT();
9387 
9388   switch (VA.getLocInfo()) {
9389   default:
9390     llvm_unreachable("Unexpected CCValAssign::LocInfo");
9391   case CCValAssign::Full:
9392     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
9393       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
9394     break;
9395   case CCValAssign::BCvt:
9396     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
9397       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
9398     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9399       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
9400     else
9401       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
9402     break;
9403   }
9404   return Val;
9405 }
9406 
9407 // The caller is responsible for loading the full value if the argument is
9408 // passed with CCValAssign::Indirect.
9409 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
9410                                 const CCValAssign &VA, const SDLoc &DL) {
9411   MachineFunction &MF = DAG.getMachineFunction();
9412   MachineFrameInfo &MFI = MF.getFrameInfo();
9413   EVT LocVT = VA.getLocVT();
9414   EVT ValVT = VA.getValVT();
9415   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
9416   if (ValVT.isScalableVector()) {
9417     // When the value is a scalable vector, we save the pointer which points to
9418     // the scalable vector value in the stack. The ValVT will be the pointer
9419     // type, instead of the scalable vector type.
9420     ValVT = LocVT;
9421   }
9422   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
9423                                  /*IsImmutable=*/true);
9424   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
9425   SDValue Val;
9426 
9427   ISD::LoadExtType ExtType;
9428   switch (VA.getLocInfo()) {
9429   default:
9430     llvm_unreachable("Unexpected CCValAssign::LocInfo");
9431   case CCValAssign::Full:
9432   case CCValAssign::Indirect:
9433   case CCValAssign::BCvt:
9434     ExtType = ISD::NON_EXTLOAD;
9435     break;
9436   }
9437   Val = DAG.getExtLoad(
9438       ExtType, DL, LocVT, Chain, FIN,
9439       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
9440   return Val;
9441 }
9442 
9443 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
9444                                        const CCValAssign &VA, const SDLoc &DL) {
9445   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
9446          "Unexpected VA");
9447   MachineFunction &MF = DAG.getMachineFunction();
9448   MachineFrameInfo &MFI = MF.getFrameInfo();
9449   MachineRegisterInfo &RegInfo = MF.getRegInfo();
9450 
9451   if (VA.isMemLoc()) {
9452     // f64 is passed on the stack.
9453     int FI =
9454         MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
9455     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9456     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
9457                        MachinePointerInfo::getFixedStack(MF, FI));
9458   }
9459 
9460   assert(VA.isRegLoc() && "Expected register VA assignment");
9461 
9462   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
9463   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
9464   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
9465   SDValue Hi;
9466   if (VA.getLocReg() == RISCV::X17) {
9467     // Second half of f64 is passed on the stack.
9468     int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
9469     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9470     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
9471                      MachinePointerInfo::getFixedStack(MF, FI));
9472   } else {
9473     // Second half of f64 is passed in another GPR.
9474     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
9475     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
9476     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
9477   }
9478   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
9479 }
9480 
9481 // FastCC has less than 1% performance improvement for some particular
9482 // benchmark. But theoretically, it may has benenfit for some cases.
9483 static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
9484                             unsigned ValNo, MVT ValVT, MVT LocVT,
9485                             CCValAssign::LocInfo LocInfo,
9486                             ISD::ArgFlagsTy ArgFlags, CCState &State,
9487                             bool IsFixed, bool IsRet, Type *OrigTy,
9488                             const RISCVTargetLowering &TLI,
9489                             Optional<unsigned> FirstMaskArgument) {
9490 
9491   // X5 and X6 might be used for save-restore libcall.
9492   static const MCPhysReg GPRList[] = {
9493       RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
9494       RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
9495       RISCV::X29, RISCV::X30, RISCV::X31};
9496 
9497   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9498     if (unsigned Reg = State.AllocateReg(GPRList)) {
9499       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9500       return false;
9501     }
9502   }
9503 
9504   if (LocVT == MVT::f16) {
9505     static const MCPhysReg FPR16List[] = {
9506         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
9507         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
9508         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
9509         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
9510     if (unsigned Reg = State.AllocateReg(FPR16List)) {
9511       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9512       return false;
9513     }
9514   }
9515 
9516   if (LocVT == MVT::f32) {
9517     static const MCPhysReg FPR32List[] = {
9518         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
9519         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
9520         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
9521         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
9522     if (unsigned Reg = State.AllocateReg(FPR32List)) {
9523       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9524       return false;
9525     }
9526   }
9527 
9528   if (LocVT == MVT::f64) {
9529     static const MCPhysReg FPR64List[] = {
9530         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
9531         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
9532         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
9533         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
9534     if (unsigned Reg = State.AllocateReg(FPR64List)) {
9535       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9536       return false;
9537     }
9538   }
9539 
9540   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
9541     unsigned Offset4 = State.AllocateStack(4, Align(4));
9542     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
9543     return false;
9544   }
9545 
9546   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
9547     unsigned Offset5 = State.AllocateStack(8, Align(8));
9548     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
9549     return false;
9550   }
9551 
9552   if (LocVT.isVector()) {
9553     if (unsigned Reg =
9554             allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
9555       // Fixed-length vectors are located in the corresponding scalable-vector
9556       // container types.
9557       if (ValVT.isFixedLengthVector())
9558         LocVT = TLI.getContainerForFixedLengthVector(LocVT);
9559       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9560     } else {
9561       // Try and pass the address via a "fast" GPR.
9562       if (unsigned GPRReg = State.AllocateReg(GPRList)) {
9563         LocInfo = CCValAssign::Indirect;
9564         LocVT = TLI.getSubtarget().getXLenVT();
9565         State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
9566       } else if (ValVT.isFixedLengthVector()) {
9567         auto StackAlign =
9568             MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
9569         unsigned StackOffset =
9570             State.AllocateStack(ValVT.getStoreSize(), StackAlign);
9571         State.addLoc(
9572             CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9573       } else {
9574         // Can't pass scalable vectors on the stack.
9575         return true;
9576       }
9577     }
9578 
9579     return false;
9580   }
9581 
9582   return true; // CC didn't match.
9583 }
9584 
9585 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
9586                          CCValAssign::LocInfo LocInfo,
9587                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
9588 
9589   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9590     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
9591     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
9592     static const MCPhysReg GPRList[] = {
9593         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
9594         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
9595     if (unsigned Reg = State.AllocateReg(GPRList)) {
9596       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9597       return false;
9598     }
9599   }
9600 
9601   if (LocVT == MVT::f32) {
9602     // Pass in STG registers: F1, ..., F6
9603     //                        fs0 ... fs5
9604     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
9605                                           RISCV::F18_F, RISCV::F19_F,
9606                                           RISCV::F20_F, RISCV::F21_F};
9607     if (unsigned Reg = State.AllocateReg(FPR32List)) {
9608       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9609       return false;
9610     }
9611   }
9612 
9613   if (LocVT == MVT::f64) {
9614     // Pass in STG registers: D1, ..., D6
9615     //                        fs6 ... fs11
9616     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
9617                                           RISCV::F24_D, RISCV::F25_D,
9618                                           RISCV::F26_D, RISCV::F27_D};
9619     if (unsigned Reg = State.AllocateReg(FPR64List)) {
9620       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9621       return false;
9622     }
9623   }
9624 
9625   report_fatal_error("No registers left in GHC calling convention");
9626   return true;
9627 }
9628 
9629 // Transform physical registers into virtual registers.
9630 SDValue RISCVTargetLowering::LowerFormalArguments(
9631     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9632     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9633     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9634 
9635   MachineFunction &MF = DAG.getMachineFunction();
9636 
9637   switch (CallConv) {
9638   default:
9639     report_fatal_error("Unsupported calling convention");
9640   case CallingConv::C:
9641   case CallingConv::Fast:
9642     break;
9643   case CallingConv::GHC:
9644     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
9645         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
9646       report_fatal_error(
9647         "GHC calling convention requires the F and D instruction set extensions");
9648   }
9649 
9650   const Function &Func = MF.getFunction();
9651   if (Func.hasFnAttribute("interrupt")) {
9652     if (!Func.arg_empty())
9653       report_fatal_error(
9654         "Functions with the interrupt attribute cannot have arguments!");
9655 
9656     StringRef Kind =
9657       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
9658 
9659     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
9660       report_fatal_error(
9661         "Function interrupt attribute argument not supported!");
9662   }
9663 
9664   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9665   MVT XLenVT = Subtarget.getXLenVT();
9666   unsigned XLenInBytes = Subtarget.getXLen() / 8;
9667   // Used with vargs to acumulate store chains.
9668   std::vector<SDValue> OutChains;
9669 
9670   // Assign locations to all of the incoming arguments.
9671   SmallVector<CCValAssign, 16> ArgLocs;
9672   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9673 
9674   if (CallConv == CallingConv::GHC)
9675     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
9676   else
9677     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
9678                      CallConv == CallingConv::Fast ? CC_RISCV_FastCC
9679                                                    : CC_RISCV);
9680 
9681   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
9682     CCValAssign &VA = ArgLocs[i];
9683     SDValue ArgValue;
9684     // Passing f64 on RV32D with a soft float ABI must be handled as a special
9685     // case.
9686     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
9687       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
9688     else if (VA.isRegLoc())
9689       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
9690     else
9691       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9692 
9693     if (VA.getLocInfo() == CCValAssign::Indirect) {
9694       // If the original argument was split and passed by reference (e.g. i128
9695       // on RV32), we need to load all parts of it here (using the same
9696       // address). Vectors may be partly split to registers and partly to the
9697       // stack, in which case the base address is partly offset and subsequent
9698       // stores are relative to that.
9699       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
9700                                    MachinePointerInfo()));
9701       unsigned ArgIndex = Ins[i].OrigArgIndex;
9702       unsigned ArgPartOffset = Ins[i].PartOffset;
9703       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
9704       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
9705         CCValAssign &PartVA = ArgLocs[i + 1];
9706         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
9707         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9708         if (PartVA.getValVT().isScalableVector())
9709           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
9710         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
9711         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
9712                                      MachinePointerInfo()));
9713         ++i;
9714       }
9715       continue;
9716     }
9717     InVals.push_back(ArgValue);
9718   }
9719 
9720   if (IsVarArg) {
9721     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
9722     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
9723     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
9724     MachineFrameInfo &MFI = MF.getFrameInfo();
9725     MachineRegisterInfo &RegInfo = MF.getRegInfo();
9726     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
9727 
9728     // Offset of the first variable argument from stack pointer, and size of
9729     // the vararg save area. For now, the varargs save area is either zero or
9730     // large enough to hold a0-a7.
9731     int VaArgOffset, VarArgsSaveSize;
9732 
9733     // If all registers are allocated, then all varargs must be passed on the
9734     // stack and we don't need to save any argregs.
9735     if (ArgRegs.size() == Idx) {
9736       VaArgOffset = CCInfo.getNextStackOffset();
9737       VarArgsSaveSize = 0;
9738     } else {
9739       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
9740       VaArgOffset = -VarArgsSaveSize;
9741     }
9742 
9743     // Record the frame index of the first variable argument
9744     // which is a value necessary to VASTART.
9745     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
9746     RVFI->setVarArgsFrameIndex(FI);
9747 
9748     // If saving an odd number of registers then create an extra stack slot to
9749     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
9750     // offsets to even-numbered registered remain 2*XLEN-aligned.
9751     if (Idx % 2) {
9752       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
9753       VarArgsSaveSize += XLenInBytes;
9754     }
9755 
9756     // Copy the integer registers that may have been used for passing varargs
9757     // to the vararg save area.
9758     for (unsigned I = Idx; I < ArgRegs.size();
9759          ++I, VaArgOffset += XLenInBytes) {
9760       const Register Reg = RegInfo.createVirtualRegister(RC);
9761       RegInfo.addLiveIn(ArgRegs[I], Reg);
9762       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
9763       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
9764       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9765       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
9766                                    MachinePointerInfo::getFixedStack(MF, FI));
9767       cast<StoreSDNode>(Store.getNode())
9768           ->getMemOperand()
9769           ->setValue((Value *)nullptr);
9770       OutChains.push_back(Store);
9771     }
9772     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
9773   }
9774 
9775   // All stores are grouped in one node to allow the matching between
9776   // the size of Ins and InVals. This only happens for vararg functions.
9777   if (!OutChains.empty()) {
9778     OutChains.push_back(Chain);
9779     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
9780   }
9781 
9782   return Chain;
9783 }
9784 
9785 /// isEligibleForTailCallOptimization - Check whether the call is eligible
9786 /// for tail call optimization.
9787 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
9788 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
9789     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
9790     const SmallVector<CCValAssign, 16> &ArgLocs) const {
9791 
9792   auto &Callee = CLI.Callee;
9793   auto CalleeCC = CLI.CallConv;
9794   auto &Outs = CLI.Outs;
9795   auto &Caller = MF.getFunction();
9796   auto CallerCC = Caller.getCallingConv();
9797 
9798   // Exception-handling functions need a special set of instructions to
9799   // indicate a return to the hardware. Tail-calling another function would
9800   // probably break this.
9801   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
9802   // should be expanded as new function attributes are introduced.
9803   if (Caller.hasFnAttribute("interrupt"))
9804     return false;
9805 
9806   // Do not tail call opt if the stack is used to pass parameters.
9807   if (CCInfo.getNextStackOffset() != 0)
9808     return false;
9809 
9810   // Do not tail call opt if any parameters need to be passed indirectly.
9811   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
9812   // passed indirectly. So the address of the value will be passed in a
9813   // register, or if not available, then the address is put on the stack. In
9814   // order to pass indirectly, space on the stack often needs to be allocated
9815   // in order to store the value. In this case the CCInfo.getNextStackOffset()
9816   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
9817   // are passed CCValAssign::Indirect.
9818   for (auto &VA : ArgLocs)
9819     if (VA.getLocInfo() == CCValAssign::Indirect)
9820       return false;
9821 
9822   // Do not tail call opt if either caller or callee uses struct return
9823   // semantics.
9824   auto IsCallerStructRet = Caller.hasStructRetAttr();
9825   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
9826   if (IsCallerStructRet || IsCalleeStructRet)
9827     return false;
9828 
9829   // Externally-defined functions with weak linkage should not be
9830   // tail-called. The behaviour of branch instructions in this situation (as
9831   // used for tail calls) is implementation-defined, so we cannot rely on the
9832   // linker replacing the tail call with a return.
9833   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
9834     const GlobalValue *GV = G->getGlobal();
9835     if (GV->hasExternalWeakLinkage())
9836       return false;
9837   }
9838 
9839   // The callee has to preserve all registers the caller needs to preserve.
9840   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9841   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
9842   if (CalleeCC != CallerCC) {
9843     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
9844     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9845       return false;
9846   }
9847 
9848   // Byval parameters hand the function a pointer directly into the stack area
9849   // we want to reuse during a tail call. Working around this *is* possible
9850   // but less efficient and uglier in LowerCall.
9851   for (auto &Arg : Outs)
9852     if (Arg.Flags.isByVal())
9853       return false;
9854 
9855   return true;
9856 }
9857 
9858 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
9859   return DAG.getDataLayout().getPrefTypeAlign(
9860       VT.getTypeForEVT(*DAG.getContext()));
9861 }
9862 
9863 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
9864 // and output parameter nodes.
9865 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
9866                                        SmallVectorImpl<SDValue> &InVals) const {
9867   SelectionDAG &DAG = CLI.DAG;
9868   SDLoc &DL = CLI.DL;
9869   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
9870   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
9871   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
9872   SDValue Chain = CLI.Chain;
9873   SDValue Callee = CLI.Callee;
9874   bool &IsTailCall = CLI.IsTailCall;
9875   CallingConv::ID CallConv = CLI.CallConv;
9876   bool IsVarArg = CLI.IsVarArg;
9877   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9878   MVT XLenVT = Subtarget.getXLenVT();
9879 
9880   MachineFunction &MF = DAG.getMachineFunction();
9881 
9882   // Analyze the operands of the call, assigning locations to each operand.
9883   SmallVector<CCValAssign, 16> ArgLocs;
9884   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9885 
9886   if (CallConv == CallingConv::GHC)
9887     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
9888   else
9889     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
9890                       CallConv == CallingConv::Fast ? CC_RISCV_FastCC
9891                                                     : CC_RISCV);
9892 
9893   // Check if it's really possible to do a tail call.
9894   if (IsTailCall)
9895     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
9896 
9897   if (IsTailCall)
9898     ++NumTailCalls;
9899   else if (CLI.CB && CLI.CB->isMustTailCall())
9900     report_fatal_error("failed to perform tail call elimination on a call "
9901                        "site marked musttail");
9902 
9903   // Get a count of how many bytes are to be pushed on the stack.
9904   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
9905 
9906   // Create local copies for byval args
9907   SmallVector<SDValue, 8> ByValArgs;
9908   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9909     ISD::ArgFlagsTy Flags = Outs[i].Flags;
9910     if (!Flags.isByVal())
9911       continue;
9912 
9913     SDValue Arg = OutVals[i];
9914     unsigned Size = Flags.getByValSize();
9915     Align Alignment = Flags.getNonZeroByValAlign();
9916 
9917     int FI =
9918         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
9919     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9920     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
9921 
9922     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
9923                           /*IsVolatile=*/false,
9924                           /*AlwaysInline=*/false, IsTailCall,
9925                           MachinePointerInfo(), MachinePointerInfo());
9926     ByValArgs.push_back(FIPtr);
9927   }
9928 
9929   if (!IsTailCall)
9930     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
9931 
9932   // Copy argument values to their designated locations.
9933   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
9934   SmallVector<SDValue, 8> MemOpChains;
9935   SDValue StackPtr;
9936   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
9937     CCValAssign &VA = ArgLocs[i];
9938     SDValue ArgValue = OutVals[i];
9939     ISD::ArgFlagsTy Flags = Outs[i].Flags;
9940 
9941     // Handle passing f64 on RV32D with a soft float ABI as a special case.
9942     bool IsF64OnRV32DSoftABI =
9943         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
9944     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
9945       SDValue SplitF64 = DAG.getNode(
9946           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
9947       SDValue Lo = SplitF64.getValue(0);
9948       SDValue Hi = SplitF64.getValue(1);
9949 
9950       Register RegLo = VA.getLocReg();
9951       RegsToPass.push_back(std::make_pair(RegLo, Lo));
9952 
9953       if (RegLo == RISCV::X17) {
9954         // Second half of f64 is passed on the stack.
9955         // Work out the address of the stack slot.
9956         if (!StackPtr.getNode())
9957           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
9958         // Emit the store.
9959         MemOpChains.push_back(
9960             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
9961       } else {
9962         // Second half of f64 is passed in another GPR.
9963         assert(RegLo < RISCV::X31 && "Invalid register pair");
9964         Register RegHigh = RegLo + 1;
9965         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
9966       }
9967       continue;
9968     }
9969 
9970     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
9971     // as any other MemLoc.
9972 
9973     // Promote the value if needed.
9974     // For now, only handle fully promoted and indirect arguments.
9975     if (VA.getLocInfo() == CCValAssign::Indirect) {
9976       // Store the argument in a stack slot and pass its address.
9977       Align StackAlign =
9978           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
9979                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
9980       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
9981       // If the original argument was split (e.g. i128), we need
9982       // to store the required parts of it here (and pass just one address).
9983       // Vectors may be partly split to registers and partly to the stack, in
9984       // which case the base address is partly offset and subsequent stores are
9985       // relative to that.
9986       unsigned ArgIndex = Outs[i].OrigArgIndex;
9987       unsigned ArgPartOffset = Outs[i].PartOffset;
9988       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
9989       // Calculate the total size to store. We don't have access to what we're
9990       // actually storing other than performing the loop and collecting the
9991       // info.
9992       SmallVector<std::pair<SDValue, SDValue>> Parts;
9993       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
9994         SDValue PartValue = OutVals[i + 1];
9995         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
9996         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9997         EVT PartVT = PartValue.getValueType();
9998         if (PartVT.isScalableVector())
9999           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
10000         StoredSize += PartVT.getStoreSize();
10001         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
10002         Parts.push_back(std::make_pair(PartValue, Offset));
10003         ++i;
10004       }
10005       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
10006       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
10007       MemOpChains.push_back(
10008           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
10009                        MachinePointerInfo::getFixedStack(MF, FI)));
10010       for (const auto &Part : Parts) {
10011         SDValue PartValue = Part.first;
10012         SDValue PartOffset = Part.second;
10013         SDValue Address =
10014             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
10015         MemOpChains.push_back(
10016             DAG.getStore(Chain, DL, PartValue, Address,
10017                          MachinePointerInfo::getFixedStack(MF, FI)));
10018       }
10019       ArgValue = SpillSlot;
10020     } else {
10021       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
10022     }
10023 
10024     // Use local copy if it is a byval arg.
10025     if (Flags.isByVal())
10026       ArgValue = ByValArgs[j++];
10027 
10028     if (VA.isRegLoc()) {
10029       // Queue up the argument copies and emit them at the end.
10030       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
10031     } else {
10032       assert(VA.isMemLoc() && "Argument not register or memory");
10033       assert(!IsTailCall && "Tail call not allowed if stack is used "
10034                             "for passing parameters");
10035 
10036       // Work out the address of the stack slot.
10037       if (!StackPtr.getNode())
10038         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
10039       SDValue Address =
10040           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
10041                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
10042 
10043       // Emit the store.
10044       MemOpChains.push_back(
10045           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
10046     }
10047   }
10048 
10049   // Join the stores, which are independent of one another.
10050   if (!MemOpChains.empty())
10051     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
10052 
10053   SDValue Glue;
10054 
10055   // Build a sequence of copy-to-reg nodes, chained and glued together.
10056   for (auto &Reg : RegsToPass) {
10057     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
10058     Glue = Chain.getValue(1);
10059   }
10060 
10061   // Validate that none of the argument registers have been marked as
10062   // reserved, if so report an error. Do the same for the return address if this
10063   // is not a tailcall.
10064   validateCCReservedRegs(RegsToPass, MF);
10065   if (!IsTailCall &&
10066       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
10067     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
10068         MF.getFunction(),
10069         "Return address register required, but has been reserved."});
10070 
10071   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
10072   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
10073   // split it and then direct call can be matched by PseudoCALL.
10074   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
10075     const GlobalValue *GV = S->getGlobal();
10076 
10077     unsigned OpFlags = RISCVII::MO_CALL;
10078     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
10079       OpFlags = RISCVII::MO_PLT;
10080 
10081     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
10082   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
10083     unsigned OpFlags = RISCVII::MO_CALL;
10084 
10085     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
10086                                                  nullptr))
10087       OpFlags = RISCVII::MO_PLT;
10088 
10089     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
10090   }
10091 
10092   // The first call operand is the chain and the second is the target address.
10093   SmallVector<SDValue, 8> Ops;
10094   Ops.push_back(Chain);
10095   Ops.push_back(Callee);
10096 
10097   // Add argument registers to the end of the list so that they are
10098   // known live into the call.
10099   for (auto &Reg : RegsToPass)
10100     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
10101 
10102   if (!IsTailCall) {
10103     // Add a register mask operand representing the call-preserved registers.
10104     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
10105     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
10106     assert(Mask && "Missing call preserved mask for calling convention");
10107     Ops.push_back(DAG.getRegisterMask(Mask));
10108   }
10109 
10110   // Glue the call to the argument copies, if any.
10111   if (Glue.getNode())
10112     Ops.push_back(Glue);
10113 
10114   // Emit the call.
10115   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10116 
10117   if (IsTailCall) {
10118     MF.getFrameInfo().setHasTailCall();
10119     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
10120   }
10121 
10122   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
10123   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
10124   Glue = Chain.getValue(1);
10125 
10126   // Mark the end of the call, which is glued to the call itself.
10127   Chain = DAG.getCALLSEQ_END(Chain,
10128                              DAG.getConstant(NumBytes, DL, PtrVT, true),
10129                              DAG.getConstant(0, DL, PtrVT, true),
10130                              Glue, DL);
10131   Glue = Chain.getValue(1);
10132 
10133   // Assign locations to each value returned by this call.
10134   SmallVector<CCValAssign, 16> RVLocs;
10135   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
10136   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
10137 
10138   // Copy all of the result registers out of their specified physreg.
10139   for (auto &VA : RVLocs) {
10140     // Copy the value out
10141     SDValue RetValue =
10142         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
10143     // Glue the RetValue to the end of the call sequence
10144     Chain = RetValue.getValue(1);
10145     Glue = RetValue.getValue(2);
10146 
10147     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10148       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
10149       SDValue RetValue2 =
10150           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
10151       Chain = RetValue2.getValue(1);
10152       Glue = RetValue2.getValue(2);
10153       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
10154                              RetValue2);
10155     }
10156 
10157     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
10158 
10159     InVals.push_back(RetValue);
10160   }
10161 
10162   return Chain;
10163 }
10164 
10165 bool RISCVTargetLowering::CanLowerReturn(
10166     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
10167     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
10168   SmallVector<CCValAssign, 16> RVLocs;
10169   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
10170 
10171   Optional<unsigned> FirstMaskArgument;
10172   if (Subtarget.hasVInstructions())
10173     FirstMaskArgument = preAssignMask(Outs);
10174 
10175   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
10176     MVT VT = Outs[i].VT;
10177     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
10178     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
10179     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
10180                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
10181                  *this, FirstMaskArgument))
10182       return false;
10183   }
10184   return true;
10185 }
10186 
10187 SDValue
10188 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
10189                                  bool IsVarArg,
10190                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
10191                                  const SmallVectorImpl<SDValue> &OutVals,
10192                                  const SDLoc &DL, SelectionDAG &DAG) const {
10193   const MachineFunction &MF = DAG.getMachineFunction();
10194   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
10195 
10196   // Stores the assignment of the return value to a location.
10197   SmallVector<CCValAssign, 16> RVLocs;
10198 
10199   // Info about the registers and stack slot.
10200   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
10201                  *DAG.getContext());
10202 
10203   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
10204                     nullptr, CC_RISCV);
10205 
10206   if (CallConv == CallingConv::GHC && !RVLocs.empty())
10207     report_fatal_error("GHC functions return void only");
10208 
10209   SDValue Glue;
10210   SmallVector<SDValue, 4> RetOps(1, Chain);
10211 
10212   // Copy the result values into the output registers.
10213   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
10214     SDValue Val = OutVals[i];
10215     CCValAssign &VA = RVLocs[i];
10216     assert(VA.isRegLoc() && "Can only return in registers!");
10217 
10218     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10219       // Handle returning f64 on RV32D with a soft float ABI.
10220       assert(VA.isRegLoc() && "Expected return via registers");
10221       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
10222                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
10223       SDValue Lo = SplitF64.getValue(0);
10224       SDValue Hi = SplitF64.getValue(1);
10225       Register RegLo = VA.getLocReg();
10226       assert(RegLo < RISCV::X31 && "Invalid register pair");
10227       Register RegHi = RegLo + 1;
10228 
10229       if (STI.isRegisterReservedByUser(RegLo) ||
10230           STI.isRegisterReservedByUser(RegHi))
10231         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
10232             MF.getFunction(),
10233             "Return value register required, but has been reserved."});
10234 
10235       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
10236       Glue = Chain.getValue(1);
10237       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
10238       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
10239       Glue = Chain.getValue(1);
10240       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
10241     } else {
10242       // Handle a 'normal' return.
10243       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
10244       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
10245 
10246       if (STI.isRegisterReservedByUser(VA.getLocReg()))
10247         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
10248             MF.getFunction(),
10249             "Return value register required, but has been reserved."});
10250 
10251       // Guarantee that all emitted copies are stuck together.
10252       Glue = Chain.getValue(1);
10253       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
10254     }
10255   }
10256 
10257   RetOps[0] = Chain; // Update chain.
10258 
10259   // Add the glue node if we have it.
10260   if (Glue.getNode()) {
10261     RetOps.push_back(Glue);
10262   }
10263 
10264   unsigned RetOpc = RISCVISD::RET_FLAG;
10265   // Interrupt service routines use different return instructions.
10266   const Function &Func = DAG.getMachineFunction().getFunction();
10267   if (Func.hasFnAttribute("interrupt")) {
10268     if (!Func.getReturnType()->isVoidTy())
10269       report_fatal_error(
10270           "Functions with the interrupt attribute must have void return type!");
10271 
10272     MachineFunction &MF = DAG.getMachineFunction();
10273     StringRef Kind =
10274       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
10275 
10276     if (Kind == "user")
10277       RetOpc = RISCVISD::URET_FLAG;
10278     else if (Kind == "supervisor")
10279       RetOpc = RISCVISD::SRET_FLAG;
10280     else
10281       RetOpc = RISCVISD::MRET_FLAG;
10282   }
10283 
10284   return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
10285 }
10286 
10287 void RISCVTargetLowering::validateCCReservedRegs(
10288     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
10289     MachineFunction &MF) const {
10290   const Function &F = MF.getFunction();
10291   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
10292 
10293   if (llvm::any_of(Regs, [&STI](auto Reg) {
10294         return STI.isRegisterReservedByUser(Reg.first);
10295       }))
10296     F.getContext().diagnose(DiagnosticInfoUnsupported{
10297         F, "Argument register required, but has been reserved."});
10298 }
10299 
10300 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
10301   return CI->isTailCall();
10302 }
10303 
10304 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
10305 #define NODE_NAME_CASE(NODE)                                                   \
10306   case RISCVISD::NODE:                                                         \
10307     return "RISCVISD::" #NODE;
10308   // clang-format off
10309   switch ((RISCVISD::NodeType)Opcode) {
10310   case RISCVISD::FIRST_NUMBER:
10311     break;
10312   NODE_NAME_CASE(RET_FLAG)
10313   NODE_NAME_CASE(URET_FLAG)
10314   NODE_NAME_CASE(SRET_FLAG)
10315   NODE_NAME_CASE(MRET_FLAG)
10316   NODE_NAME_CASE(CALL)
10317   NODE_NAME_CASE(SELECT_CC)
10318   NODE_NAME_CASE(BR_CC)
10319   NODE_NAME_CASE(BuildPairF64)
10320   NODE_NAME_CASE(SplitF64)
10321   NODE_NAME_CASE(TAIL)
10322   NODE_NAME_CASE(MULHSU)
10323   NODE_NAME_CASE(SLLW)
10324   NODE_NAME_CASE(SRAW)
10325   NODE_NAME_CASE(SRLW)
10326   NODE_NAME_CASE(DIVW)
10327   NODE_NAME_CASE(DIVUW)
10328   NODE_NAME_CASE(REMUW)
10329   NODE_NAME_CASE(ROLW)
10330   NODE_NAME_CASE(RORW)
10331   NODE_NAME_CASE(CLZW)
10332   NODE_NAME_CASE(CTZW)
10333   NODE_NAME_CASE(FSLW)
10334   NODE_NAME_CASE(FSRW)
10335   NODE_NAME_CASE(FSL)
10336   NODE_NAME_CASE(FSR)
10337   NODE_NAME_CASE(FMV_H_X)
10338   NODE_NAME_CASE(FMV_X_ANYEXTH)
10339   NODE_NAME_CASE(FMV_W_X_RV64)
10340   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
10341   NODE_NAME_CASE(FCVT_X)
10342   NODE_NAME_CASE(FCVT_XU)
10343   NODE_NAME_CASE(FCVT_W_RV64)
10344   NODE_NAME_CASE(FCVT_WU_RV64)
10345   NODE_NAME_CASE(STRICT_FCVT_W_RV64)
10346   NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
10347   NODE_NAME_CASE(READ_CYCLE_WIDE)
10348   NODE_NAME_CASE(GREV)
10349   NODE_NAME_CASE(GREVW)
10350   NODE_NAME_CASE(GORC)
10351   NODE_NAME_CASE(GORCW)
10352   NODE_NAME_CASE(SHFL)
10353   NODE_NAME_CASE(SHFLW)
10354   NODE_NAME_CASE(UNSHFL)
10355   NODE_NAME_CASE(UNSHFLW)
10356   NODE_NAME_CASE(BFP)
10357   NODE_NAME_CASE(BFPW)
10358   NODE_NAME_CASE(BCOMPRESS)
10359   NODE_NAME_CASE(BCOMPRESSW)
10360   NODE_NAME_CASE(BDECOMPRESS)
10361   NODE_NAME_CASE(BDECOMPRESSW)
10362   NODE_NAME_CASE(VMV_V_X_VL)
10363   NODE_NAME_CASE(VFMV_V_F_VL)
10364   NODE_NAME_CASE(VMV_X_S)
10365   NODE_NAME_CASE(VMV_S_X_VL)
10366   NODE_NAME_CASE(VFMV_S_F_VL)
10367   NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
10368   NODE_NAME_CASE(READ_VLENB)
10369   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
10370   NODE_NAME_CASE(VSLIDEUP_VL)
10371   NODE_NAME_CASE(VSLIDE1UP_VL)
10372   NODE_NAME_CASE(VSLIDEDOWN_VL)
10373   NODE_NAME_CASE(VSLIDE1DOWN_VL)
10374   NODE_NAME_CASE(VID_VL)
10375   NODE_NAME_CASE(VFNCVT_ROD_VL)
10376   NODE_NAME_CASE(VECREDUCE_ADD_VL)
10377   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
10378   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
10379   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
10380   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
10381   NODE_NAME_CASE(VECREDUCE_AND_VL)
10382   NODE_NAME_CASE(VECREDUCE_OR_VL)
10383   NODE_NAME_CASE(VECREDUCE_XOR_VL)
10384   NODE_NAME_CASE(VECREDUCE_FADD_VL)
10385   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
10386   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
10387   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
10388   NODE_NAME_CASE(ADD_VL)
10389   NODE_NAME_CASE(AND_VL)
10390   NODE_NAME_CASE(MUL_VL)
10391   NODE_NAME_CASE(OR_VL)
10392   NODE_NAME_CASE(SDIV_VL)
10393   NODE_NAME_CASE(SHL_VL)
10394   NODE_NAME_CASE(SREM_VL)
10395   NODE_NAME_CASE(SRA_VL)
10396   NODE_NAME_CASE(SRL_VL)
10397   NODE_NAME_CASE(SUB_VL)
10398   NODE_NAME_CASE(UDIV_VL)
10399   NODE_NAME_CASE(UREM_VL)
10400   NODE_NAME_CASE(XOR_VL)
10401   NODE_NAME_CASE(SADDSAT_VL)
10402   NODE_NAME_CASE(UADDSAT_VL)
10403   NODE_NAME_CASE(SSUBSAT_VL)
10404   NODE_NAME_CASE(USUBSAT_VL)
10405   NODE_NAME_CASE(FADD_VL)
10406   NODE_NAME_CASE(FSUB_VL)
10407   NODE_NAME_CASE(FMUL_VL)
10408   NODE_NAME_CASE(FDIV_VL)
10409   NODE_NAME_CASE(FNEG_VL)
10410   NODE_NAME_CASE(FABS_VL)
10411   NODE_NAME_CASE(FSQRT_VL)
10412   NODE_NAME_CASE(FMA_VL)
10413   NODE_NAME_CASE(FCOPYSIGN_VL)
10414   NODE_NAME_CASE(SMIN_VL)
10415   NODE_NAME_CASE(SMAX_VL)
10416   NODE_NAME_CASE(UMIN_VL)
10417   NODE_NAME_CASE(UMAX_VL)
10418   NODE_NAME_CASE(FMINNUM_VL)
10419   NODE_NAME_CASE(FMAXNUM_VL)
10420   NODE_NAME_CASE(MULHS_VL)
10421   NODE_NAME_CASE(MULHU_VL)
10422   NODE_NAME_CASE(FP_TO_SINT_VL)
10423   NODE_NAME_CASE(FP_TO_UINT_VL)
10424   NODE_NAME_CASE(SINT_TO_FP_VL)
10425   NODE_NAME_CASE(UINT_TO_FP_VL)
10426   NODE_NAME_CASE(FP_EXTEND_VL)
10427   NODE_NAME_CASE(FP_ROUND_VL)
10428   NODE_NAME_CASE(VWMUL_VL)
10429   NODE_NAME_CASE(VWMULU_VL)
10430   NODE_NAME_CASE(VWMULSU_VL)
10431   NODE_NAME_CASE(VWADD_VL)
10432   NODE_NAME_CASE(VWADDU_VL)
10433   NODE_NAME_CASE(VWSUB_VL)
10434   NODE_NAME_CASE(VWSUBU_VL)
10435   NODE_NAME_CASE(VWADD_W_VL)
10436   NODE_NAME_CASE(VWADDU_W_VL)
10437   NODE_NAME_CASE(VWSUB_W_VL)
10438   NODE_NAME_CASE(VWSUBU_W_VL)
10439   NODE_NAME_CASE(SETCC_VL)
10440   NODE_NAME_CASE(VSELECT_VL)
10441   NODE_NAME_CASE(VP_MERGE_VL)
10442   NODE_NAME_CASE(VMAND_VL)
10443   NODE_NAME_CASE(VMOR_VL)
10444   NODE_NAME_CASE(VMXOR_VL)
10445   NODE_NAME_CASE(VMCLR_VL)
10446   NODE_NAME_CASE(VMSET_VL)
10447   NODE_NAME_CASE(VRGATHER_VX_VL)
10448   NODE_NAME_CASE(VRGATHER_VV_VL)
10449   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
10450   NODE_NAME_CASE(VSEXT_VL)
10451   NODE_NAME_CASE(VZEXT_VL)
10452   NODE_NAME_CASE(VCPOP_VL)
10453   NODE_NAME_CASE(VLE_VL)
10454   NODE_NAME_CASE(VSE_VL)
10455   NODE_NAME_CASE(READ_CSR)
10456   NODE_NAME_CASE(WRITE_CSR)
10457   NODE_NAME_CASE(SWAP_CSR)
10458   }
10459   // clang-format on
10460   return nullptr;
10461 #undef NODE_NAME_CASE
10462 }
10463 
10464 /// getConstraintType - Given a constraint letter, return the type of
10465 /// constraint it is for this target.
10466 RISCVTargetLowering::ConstraintType
10467 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
10468   if (Constraint.size() == 1) {
10469     switch (Constraint[0]) {
10470     default:
10471       break;
10472     case 'f':
10473       return C_RegisterClass;
10474     case 'I':
10475     case 'J':
10476     case 'K':
10477       return C_Immediate;
10478     case 'A':
10479       return C_Memory;
10480     case 'S': // A symbolic address
10481       return C_Other;
10482     }
10483   } else {
10484     if (Constraint == "vr" || Constraint == "vm")
10485       return C_RegisterClass;
10486   }
10487   return TargetLowering::getConstraintType(Constraint);
10488 }
10489 
10490 std::pair<unsigned, const TargetRegisterClass *>
10491 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
10492                                                   StringRef Constraint,
10493                                                   MVT VT) const {
10494   // First, see if this is a constraint that directly corresponds to a
10495   // RISCV register class.
10496   if (Constraint.size() == 1) {
10497     switch (Constraint[0]) {
10498     case 'r':
10499       // TODO: Support fixed vectors up to XLen for P extension?
10500       if (VT.isVector())
10501         break;
10502       return std::make_pair(0U, &RISCV::GPRRegClass);
10503     case 'f':
10504       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
10505         return std::make_pair(0U, &RISCV::FPR16RegClass);
10506       if (Subtarget.hasStdExtF() && VT == MVT::f32)
10507         return std::make_pair(0U, &RISCV::FPR32RegClass);
10508       if (Subtarget.hasStdExtD() && VT == MVT::f64)
10509         return std::make_pair(0U, &RISCV::FPR64RegClass);
10510       break;
10511     default:
10512       break;
10513     }
10514   } else if (Constraint == "vr") {
10515     for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
10516                            &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
10517       if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
10518         return std::make_pair(0U, RC);
10519     }
10520   } else if (Constraint == "vm") {
10521     if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
10522       return std::make_pair(0U, &RISCV::VMV0RegClass);
10523   }
10524 
10525   // Clang will correctly decode the usage of register name aliases into their
10526   // official names. However, other frontends like `rustc` do not. This allows
10527   // users of these frontends to use the ABI names for registers in LLVM-style
10528   // register constraints.
10529   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
10530                                .Case("{zero}", RISCV::X0)
10531                                .Case("{ra}", RISCV::X1)
10532                                .Case("{sp}", RISCV::X2)
10533                                .Case("{gp}", RISCV::X3)
10534                                .Case("{tp}", RISCV::X4)
10535                                .Case("{t0}", RISCV::X5)
10536                                .Case("{t1}", RISCV::X6)
10537                                .Case("{t2}", RISCV::X7)
10538                                .Cases("{s0}", "{fp}", RISCV::X8)
10539                                .Case("{s1}", RISCV::X9)
10540                                .Case("{a0}", RISCV::X10)
10541                                .Case("{a1}", RISCV::X11)
10542                                .Case("{a2}", RISCV::X12)
10543                                .Case("{a3}", RISCV::X13)
10544                                .Case("{a4}", RISCV::X14)
10545                                .Case("{a5}", RISCV::X15)
10546                                .Case("{a6}", RISCV::X16)
10547                                .Case("{a7}", RISCV::X17)
10548                                .Case("{s2}", RISCV::X18)
10549                                .Case("{s3}", RISCV::X19)
10550                                .Case("{s4}", RISCV::X20)
10551                                .Case("{s5}", RISCV::X21)
10552                                .Case("{s6}", RISCV::X22)
10553                                .Case("{s7}", RISCV::X23)
10554                                .Case("{s8}", RISCV::X24)
10555                                .Case("{s9}", RISCV::X25)
10556                                .Case("{s10}", RISCV::X26)
10557                                .Case("{s11}", RISCV::X27)
10558                                .Case("{t3}", RISCV::X28)
10559                                .Case("{t4}", RISCV::X29)
10560                                .Case("{t5}", RISCV::X30)
10561                                .Case("{t6}", RISCV::X31)
10562                                .Default(RISCV::NoRegister);
10563   if (XRegFromAlias != RISCV::NoRegister)
10564     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
10565 
10566   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
10567   // TableGen record rather than the AsmName to choose registers for InlineAsm
10568   // constraints, plus we want to match those names to the widest floating point
10569   // register type available, manually select floating point registers here.
10570   //
10571   // The second case is the ABI name of the register, so that frontends can also
10572   // use the ABI names in register constraint lists.
10573   if (Subtarget.hasStdExtF()) {
10574     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
10575                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
10576                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
10577                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
10578                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
10579                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
10580                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
10581                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
10582                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
10583                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
10584                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
10585                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
10586                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
10587                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
10588                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
10589                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
10590                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
10591                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
10592                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
10593                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
10594                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
10595                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
10596                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
10597                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
10598                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
10599                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
10600                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
10601                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
10602                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
10603                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
10604                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
10605                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
10606                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
10607                         .Default(RISCV::NoRegister);
10608     if (FReg != RISCV::NoRegister) {
10609       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
10610       if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
10611         unsigned RegNo = FReg - RISCV::F0_F;
10612         unsigned DReg = RISCV::F0_D + RegNo;
10613         return std::make_pair(DReg, &RISCV::FPR64RegClass);
10614       }
10615       if (VT == MVT::f32 || VT == MVT::Other)
10616         return std::make_pair(FReg, &RISCV::FPR32RegClass);
10617       if (Subtarget.hasStdExtZfh() && VT == MVT::f16) {
10618         unsigned RegNo = FReg - RISCV::F0_F;
10619         unsigned HReg = RISCV::F0_H + RegNo;
10620         return std::make_pair(HReg, &RISCV::FPR16RegClass);
10621       }
10622     }
10623   }
10624 
10625   if (Subtarget.hasVInstructions()) {
10626     Register VReg = StringSwitch<Register>(Constraint.lower())
10627                         .Case("{v0}", RISCV::V0)
10628                         .Case("{v1}", RISCV::V1)
10629                         .Case("{v2}", RISCV::V2)
10630                         .Case("{v3}", RISCV::V3)
10631                         .Case("{v4}", RISCV::V4)
10632                         .Case("{v5}", RISCV::V5)
10633                         .Case("{v6}", RISCV::V6)
10634                         .Case("{v7}", RISCV::V7)
10635                         .Case("{v8}", RISCV::V8)
10636                         .Case("{v9}", RISCV::V9)
10637                         .Case("{v10}", RISCV::V10)
10638                         .Case("{v11}", RISCV::V11)
10639                         .Case("{v12}", RISCV::V12)
10640                         .Case("{v13}", RISCV::V13)
10641                         .Case("{v14}", RISCV::V14)
10642                         .Case("{v15}", RISCV::V15)
10643                         .Case("{v16}", RISCV::V16)
10644                         .Case("{v17}", RISCV::V17)
10645                         .Case("{v18}", RISCV::V18)
10646                         .Case("{v19}", RISCV::V19)
10647                         .Case("{v20}", RISCV::V20)
10648                         .Case("{v21}", RISCV::V21)
10649                         .Case("{v22}", RISCV::V22)
10650                         .Case("{v23}", RISCV::V23)
10651                         .Case("{v24}", RISCV::V24)
10652                         .Case("{v25}", RISCV::V25)
10653                         .Case("{v26}", RISCV::V26)
10654                         .Case("{v27}", RISCV::V27)
10655                         .Case("{v28}", RISCV::V28)
10656                         .Case("{v29}", RISCV::V29)
10657                         .Case("{v30}", RISCV::V30)
10658                         .Case("{v31}", RISCV::V31)
10659                         .Default(RISCV::NoRegister);
10660     if (VReg != RISCV::NoRegister) {
10661       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
10662         return std::make_pair(VReg, &RISCV::VMRegClass);
10663       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
10664         return std::make_pair(VReg, &RISCV::VRRegClass);
10665       for (const auto *RC :
10666            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
10667         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
10668           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
10669           return std::make_pair(VReg, RC);
10670         }
10671       }
10672     }
10673   }
10674 
10675   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10676 }
10677 
10678 unsigned
10679 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
10680   // Currently only support length 1 constraints.
10681   if (ConstraintCode.size() == 1) {
10682     switch (ConstraintCode[0]) {
10683     case 'A':
10684       return InlineAsm::Constraint_A;
10685     default:
10686       break;
10687     }
10688   }
10689 
10690   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
10691 }
10692 
10693 void RISCVTargetLowering::LowerAsmOperandForConstraint(
10694     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
10695     SelectionDAG &DAG) const {
10696   // Currently only support length 1 constraints.
10697   if (Constraint.length() == 1) {
10698     switch (Constraint[0]) {
10699     case 'I':
10700       // Validate & create a 12-bit signed immediate operand.
10701       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10702         uint64_t CVal = C->getSExtValue();
10703         if (isInt<12>(CVal))
10704           Ops.push_back(
10705               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
10706       }
10707       return;
10708     case 'J':
10709       // Validate & create an integer zero operand.
10710       if (auto *C = dyn_cast<ConstantSDNode>(Op))
10711         if (C->getZExtValue() == 0)
10712           Ops.push_back(
10713               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
10714       return;
10715     case 'K':
10716       // Validate & create a 5-bit unsigned immediate operand.
10717       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10718         uint64_t CVal = C->getZExtValue();
10719         if (isUInt<5>(CVal))
10720           Ops.push_back(
10721               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
10722       }
10723       return;
10724     case 'S':
10725       if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
10726         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
10727                                                  GA->getValueType(0)));
10728       } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
10729         Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
10730                                                 BA->getValueType(0)));
10731       }
10732       return;
10733     default:
10734       break;
10735     }
10736   }
10737   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
10738 }
10739 
10740 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
10741                                                    Instruction *Inst,
10742                                                    AtomicOrdering Ord) const {
10743   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
10744     return Builder.CreateFence(Ord);
10745   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
10746     return Builder.CreateFence(AtomicOrdering::Release);
10747   return nullptr;
10748 }
10749 
10750 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
10751                                                     Instruction *Inst,
10752                                                     AtomicOrdering Ord) const {
10753   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
10754     return Builder.CreateFence(AtomicOrdering::Acquire);
10755   return nullptr;
10756 }
10757 
10758 TargetLowering::AtomicExpansionKind
10759 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
10760   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
10761   // point operations can't be used in an lr/sc sequence without breaking the
10762   // forward-progress guarantee.
10763   if (AI->isFloatingPointOperation())
10764     return AtomicExpansionKind::CmpXChg;
10765 
10766   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10767   if (Size == 8 || Size == 16)
10768     return AtomicExpansionKind::MaskedIntrinsic;
10769   return AtomicExpansionKind::None;
10770 }
10771 
10772 static Intrinsic::ID
10773 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
10774   if (XLen == 32) {
10775     switch (BinOp) {
10776     default:
10777       llvm_unreachable("Unexpected AtomicRMW BinOp");
10778     case AtomicRMWInst::Xchg:
10779       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
10780     case AtomicRMWInst::Add:
10781       return Intrinsic::riscv_masked_atomicrmw_add_i32;
10782     case AtomicRMWInst::Sub:
10783       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
10784     case AtomicRMWInst::Nand:
10785       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
10786     case AtomicRMWInst::Max:
10787       return Intrinsic::riscv_masked_atomicrmw_max_i32;
10788     case AtomicRMWInst::Min:
10789       return Intrinsic::riscv_masked_atomicrmw_min_i32;
10790     case AtomicRMWInst::UMax:
10791       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
10792     case AtomicRMWInst::UMin:
10793       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
10794     }
10795   }
10796 
10797   if (XLen == 64) {
10798     switch (BinOp) {
10799     default:
10800       llvm_unreachable("Unexpected AtomicRMW BinOp");
10801     case AtomicRMWInst::Xchg:
10802       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
10803     case AtomicRMWInst::Add:
10804       return Intrinsic::riscv_masked_atomicrmw_add_i64;
10805     case AtomicRMWInst::Sub:
10806       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
10807     case AtomicRMWInst::Nand:
10808       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
10809     case AtomicRMWInst::Max:
10810       return Intrinsic::riscv_masked_atomicrmw_max_i64;
10811     case AtomicRMWInst::Min:
10812       return Intrinsic::riscv_masked_atomicrmw_min_i64;
10813     case AtomicRMWInst::UMax:
10814       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
10815     case AtomicRMWInst::UMin:
10816       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
10817     }
10818   }
10819 
10820   llvm_unreachable("Unexpected XLen\n");
10821 }
10822 
10823 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
10824     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10825     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10826   unsigned XLen = Subtarget.getXLen();
10827   Value *Ordering =
10828       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
10829   Type *Tys[] = {AlignedAddr->getType()};
10830   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
10831       AI->getModule(),
10832       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
10833 
10834   if (XLen == 64) {
10835     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
10836     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10837     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
10838   }
10839 
10840   Value *Result;
10841 
10842   // Must pass the shift amount needed to sign extend the loaded value prior
10843   // to performing a signed comparison for min/max. ShiftAmt is the number of
10844   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
10845   // is the number of bits to left+right shift the value in order to
10846   // sign-extend.
10847   if (AI->getOperation() == AtomicRMWInst::Min ||
10848       AI->getOperation() == AtomicRMWInst::Max) {
10849     const DataLayout &DL = AI->getModule()->getDataLayout();
10850     unsigned ValWidth =
10851         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
10852     Value *SextShamt =
10853         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
10854     Result = Builder.CreateCall(LrwOpScwLoop,
10855                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
10856   } else {
10857     Result =
10858         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
10859   }
10860 
10861   if (XLen == 64)
10862     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10863   return Result;
10864 }
10865 
10866 TargetLowering::AtomicExpansionKind
10867 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
10868     AtomicCmpXchgInst *CI) const {
10869   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
10870   if (Size == 8 || Size == 16)
10871     return AtomicExpansionKind::MaskedIntrinsic;
10872   return AtomicExpansionKind::None;
10873 }
10874 
10875 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
10876     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
10877     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
10878   unsigned XLen = Subtarget.getXLen();
10879   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
10880   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
10881   if (XLen == 64) {
10882     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
10883     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
10884     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10885     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
10886   }
10887   Type *Tys[] = {AlignedAddr->getType()};
10888   Function *MaskedCmpXchg =
10889       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
10890   Value *Result = Builder.CreateCall(
10891       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
10892   if (XLen == 64)
10893     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10894   return Result;
10895 }
10896 
10897 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
10898   return false;
10899 }
10900 
10901 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
10902                                                EVT VT) const {
10903   if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
10904     return false;
10905 
10906   switch (FPVT.getSimpleVT().SimpleTy) {
10907   case MVT::f16:
10908     return Subtarget.hasStdExtZfh();
10909   case MVT::f32:
10910     return Subtarget.hasStdExtF();
10911   case MVT::f64:
10912     return Subtarget.hasStdExtD();
10913   default:
10914     return false;
10915   }
10916 }
10917 
10918 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
10919   // If we are using the small code model, we can reduce size of jump table
10920   // entry to 4 bytes.
10921   if (Subtarget.is64Bit() && !isPositionIndependent() &&
10922       getTargetMachine().getCodeModel() == CodeModel::Small) {
10923     return MachineJumpTableInfo::EK_Custom32;
10924   }
10925   return TargetLowering::getJumpTableEncoding();
10926 }
10927 
10928 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
10929     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
10930     unsigned uid, MCContext &Ctx) const {
10931   assert(Subtarget.is64Bit() && !isPositionIndependent() &&
10932          getTargetMachine().getCodeModel() == CodeModel::Small);
10933   return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
10934 }
10935 
10936 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
10937                                                      EVT VT) const {
10938   VT = VT.getScalarType();
10939 
10940   if (!VT.isSimple())
10941     return false;
10942 
10943   switch (VT.getSimpleVT().SimpleTy) {
10944   case MVT::f16:
10945     return Subtarget.hasStdExtZfh();
10946   case MVT::f32:
10947     return Subtarget.hasStdExtF();
10948   case MVT::f64:
10949     return Subtarget.hasStdExtD();
10950   default:
10951     break;
10952   }
10953 
10954   return false;
10955 }
10956 
10957 Register RISCVTargetLowering::getExceptionPointerRegister(
10958     const Constant *PersonalityFn) const {
10959   return RISCV::X10;
10960 }
10961 
10962 Register RISCVTargetLowering::getExceptionSelectorRegister(
10963     const Constant *PersonalityFn) const {
10964   return RISCV::X11;
10965 }
10966 
10967 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
10968   // Return false to suppress the unnecessary extensions if the LibCall
10969   // arguments or return value is f32 type for LP64 ABI.
10970   RISCVABI::ABI ABI = Subtarget.getTargetABI();
10971   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
10972     return false;
10973 
10974   return true;
10975 }
10976 
10977 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
10978   if (Subtarget.is64Bit() && Type == MVT::i32)
10979     return true;
10980 
10981   return IsSigned;
10982 }
10983 
10984 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
10985                                                  SDValue C) const {
10986   // Check integral scalar types.
10987   if (VT.isScalarInteger()) {
10988     // Omit the optimization if the sub target has the M extension and the data
10989     // size exceeds XLen.
10990     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
10991       return false;
10992     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
10993       // Break the MUL to a SLLI and an ADD/SUB.
10994       const APInt &Imm = ConstNode->getAPIntValue();
10995       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
10996           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
10997         return true;
10998       // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
10999       if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
11000           ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
11001            (Imm - 8).isPowerOf2()))
11002         return true;
11003       // Omit the following optimization if the sub target has the M extension
11004       // and the data size >= XLen.
11005       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
11006         return false;
11007       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
11008       // a pair of LUI/ADDI.
11009       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
11010         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
11011         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
11012             (1 - ImmS).isPowerOf2())
11013         return true;
11014       }
11015     }
11016   }
11017 
11018   return false;
11019 }
11020 
11021 bool RISCVTargetLowering::isMulAddWithConstProfitable(
11022     const SDValue &AddNode, const SDValue &ConstNode) const {
11023   // Let the DAGCombiner decide for vectors.
11024   EVT VT = AddNode.getValueType();
11025   if (VT.isVector())
11026     return true;
11027 
11028   // Let the DAGCombiner decide for larger types.
11029   if (VT.getScalarSizeInBits() > Subtarget.getXLen())
11030     return true;
11031 
11032   // It is worse if c1 is simm12 while c1*c2 is not.
11033   ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
11034   ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
11035   const APInt &C1 = C1Node->getAPIntValue();
11036   const APInt &C2 = C2Node->getAPIntValue();
11037   if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
11038     return false;
11039 
11040   // Default to true and let the DAGCombiner decide.
11041   return true;
11042 }
11043 
11044 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
11045     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
11046     bool *Fast) const {
11047   if (!VT.isVector())
11048     return false;
11049 
11050   EVT ElemVT = VT.getVectorElementType();
11051   if (Alignment >= ElemVT.getStoreSize()) {
11052     if (Fast)
11053       *Fast = true;
11054     return true;
11055   }
11056 
11057   return false;
11058 }
11059 
11060 bool RISCVTargetLowering::splitValueIntoRegisterParts(
11061     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
11062     unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
11063   bool IsABIRegCopy = CC.hasValue();
11064   EVT ValueVT = Val.getValueType();
11065   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
11066     // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
11067     // and cast to f32.
11068     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
11069     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
11070     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
11071                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
11072     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
11073     Parts[0] = Val;
11074     return true;
11075   }
11076 
11077   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
11078     LLVMContext &Context = *DAG.getContext();
11079     EVT ValueEltVT = ValueVT.getVectorElementType();
11080     EVT PartEltVT = PartVT.getVectorElementType();
11081     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
11082     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
11083     if (PartVTBitSize % ValueVTBitSize == 0) {
11084       assert(PartVTBitSize >= ValueVTBitSize);
11085       // If the element types are different, bitcast to the same element type of
11086       // PartVT first.
11087       // Give an example here, we want copy a <vscale x 1 x i8> value to
11088       // <vscale x 4 x i16>.
11089       // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
11090       // subvector, then we can bitcast to <vscale x 4 x i16>.
11091       if (ValueEltVT != PartEltVT) {
11092         if (PartVTBitSize > ValueVTBitSize) {
11093           unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
11094           assert(Count != 0 && "The number of element should not be zero.");
11095           EVT SameEltTypeVT =
11096               EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
11097           Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
11098                             DAG.getUNDEF(SameEltTypeVT), Val,
11099                             DAG.getVectorIdxConstant(0, DL));
11100         }
11101         Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
11102       } else {
11103         Val =
11104             DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
11105                         Val, DAG.getVectorIdxConstant(0, DL));
11106       }
11107       Parts[0] = Val;
11108       return true;
11109     }
11110   }
11111   return false;
11112 }
11113 
11114 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
11115     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
11116     MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
11117   bool IsABIRegCopy = CC.hasValue();
11118   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
11119     SDValue Val = Parts[0];
11120 
11121     // Cast the f32 to i32, truncate to i16, and cast back to f16.
11122     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
11123     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
11124     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
11125     return Val;
11126   }
11127 
11128   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
11129     LLVMContext &Context = *DAG.getContext();
11130     SDValue Val = Parts[0];
11131     EVT ValueEltVT = ValueVT.getVectorElementType();
11132     EVT PartEltVT = PartVT.getVectorElementType();
11133     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
11134     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
11135     if (PartVTBitSize % ValueVTBitSize == 0) {
11136       assert(PartVTBitSize >= ValueVTBitSize);
11137       EVT SameEltTypeVT = ValueVT;
11138       // If the element types are different, convert it to the same element type
11139       // of PartVT.
11140       // Give an example here, we want copy a <vscale x 1 x i8> value from
11141       // <vscale x 4 x i16>.
11142       // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
11143       // then we can extract <vscale x 1 x i8>.
11144       if (ValueEltVT != PartEltVT) {
11145         unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
11146         assert(Count != 0 && "The number of element should not be zero.");
11147         SameEltTypeVT =
11148             EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
11149         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
11150       }
11151       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
11152                         DAG.getVectorIdxConstant(0, DL));
11153       return Val;
11154     }
11155   }
11156   return SDValue();
11157 }
11158 
11159 SDValue
11160 RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
11161                                    SelectionDAG &DAG,
11162                                    SmallVectorImpl<SDNode *> &Created) const {
11163   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
11164   if (isIntDivCheap(N->getValueType(0), Attr))
11165     return SDValue(N, 0); // Lower SDIV as SDIV
11166 
11167   assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
11168          "Unexpected divisor!");
11169 
11170   // Conditional move is needed, so do the transformation iff Zbt is enabled.
11171   if (!Subtarget.hasStdExtZbt())
11172     return SDValue();
11173 
11174   // When |Divisor| >= 2 ^ 12, it isn't profitable to do such transformation.
11175   // Besides, more critical path instructions will be generated when dividing
11176   // by 2. So we keep using the original DAGs for these cases.
11177   unsigned Lg2 = Divisor.countTrailingZeros();
11178   if (Lg2 == 1 || Lg2 >= 12)
11179     return SDValue();
11180 
11181   // fold (sdiv X, pow2)
11182   EVT VT = N->getValueType(0);
11183   if (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))
11184     return SDValue();
11185 
11186   SDLoc DL(N);
11187   SDValue N0 = N->getOperand(0);
11188   SDValue Zero = DAG.getConstant(0, DL, VT);
11189   SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
11190 
11191   // Add (N0 < 0) ? Pow2 - 1 : 0;
11192   SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT);
11193   SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
11194   SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
11195 
11196   Created.push_back(Cmp.getNode());
11197   Created.push_back(Add.getNode());
11198   Created.push_back(Sel.getNode());
11199 
11200   // Divide by pow2.
11201   SDValue SRA =
11202       DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT));
11203 
11204   // If we're dividing by a positive value, we're done.  Otherwise, we must
11205   // negate the result.
11206   if (Divisor.isNonNegative())
11207     return SRA;
11208 
11209   Created.push_back(SRA.getNode());
11210   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
11211 }
11212 
11213 #define GET_REGISTER_MATCHER
11214 #include "RISCVGenAsmMatcher.inc"
11215 
11216 Register
11217 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
11218                                        const MachineFunction &MF) const {
11219   Register Reg = MatchRegisterAltName(RegName);
11220   if (Reg == RISCV::NoRegister)
11221     Reg = MatchRegisterName(RegName);
11222   if (Reg == RISCV::NoRegister)
11223     report_fatal_error(
11224         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
11225   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
11226   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
11227     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
11228                              StringRef(RegName) + "\"."));
11229   return Reg;
11230 }
11231 
11232 namespace llvm {
11233 namespace RISCVVIntrinsicsTable {
11234 
11235 #define GET_RISCVVIntrinsicsTable_IMPL
11236 #include "RISCVGenSearchableTables.inc"
11237 
11238 } // namespace RISCVVIntrinsicsTable
11239 
11240 } // namespace llvm
11241