1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineJumpTableInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/CodeGen/ValueTypes.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/DiagnosticPrinter.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/KnownBits.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/raw_ostream.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "riscv-lower"
45 
46 STATISTIC(NumTailCalls, "Number of tail calls");
47 
48 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
49                                          const RISCVSubtarget &STI)
50     : TargetLowering(TM), Subtarget(STI) {
51 
52   if (Subtarget.isRV32E())
53     report_fatal_error("Codegen not yet implemented for RV32E");
54 
55   RISCVABI::ABI ABI = Subtarget.getTargetABI();
56   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
57 
58   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
59       !Subtarget.hasStdExtF()) {
60     errs() << "Hard-float 'f' ABI can't be used for a target that "
61                 "doesn't support the F instruction set extension (ignoring "
62                           "target-abi)\n";
63     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
64   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
65              !Subtarget.hasStdExtD()) {
66     errs() << "Hard-float 'd' ABI can't be used for a target that "
67               "doesn't support the D instruction set extension (ignoring "
68               "target-abi)\n";
69     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
70   }
71 
72   switch (ABI) {
73   default:
74     report_fatal_error("Don't know how to lower this ABI");
75   case RISCVABI::ABI_ILP32:
76   case RISCVABI::ABI_ILP32F:
77   case RISCVABI::ABI_ILP32D:
78   case RISCVABI::ABI_LP64:
79   case RISCVABI::ABI_LP64F:
80   case RISCVABI::ABI_LP64D:
81     break;
82   }
83 
84   MVT XLenVT = Subtarget.getXLenVT();
85 
86   // Set up the register classes.
87   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
88 
89   if (Subtarget.hasStdExtZfh())
90     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
91   if (Subtarget.hasStdExtF())
92     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
93   if (Subtarget.hasStdExtD())
94     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
95 
96   static const MVT::SimpleValueType BoolVecVTs[] = {
97       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
98       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
99   static const MVT::SimpleValueType IntVecVTs[] = {
100       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
101       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
102       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
103       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
104       MVT::nxv4i64, MVT::nxv8i64};
105   static const MVT::SimpleValueType F16VecVTs[] = {
106       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
107       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
108   static const MVT::SimpleValueType F32VecVTs[] = {
109       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
110   static const MVT::SimpleValueType F64VecVTs[] = {
111       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
112 
113   if (Subtarget.hasVInstructions()) {
114     auto addRegClassForRVV = [this](MVT VT) {
115       unsigned Size = VT.getSizeInBits().getKnownMinValue();
116       assert(Size <= 512 && isPowerOf2_32(Size));
117       const TargetRegisterClass *RC;
118       if (Size <= 64)
119         RC = &RISCV::VRRegClass;
120       else if (Size == 128)
121         RC = &RISCV::VRM2RegClass;
122       else if (Size == 256)
123         RC = &RISCV::VRM4RegClass;
124       else
125         RC = &RISCV::VRM8RegClass;
126 
127       addRegisterClass(VT, RC);
128     };
129 
130     for (MVT VT : BoolVecVTs)
131       addRegClassForRVV(VT);
132     for (MVT VT : IntVecVTs) {
133       if (VT.getVectorElementType() == MVT::i64 &&
134           !Subtarget.hasVInstructionsI64())
135         continue;
136       addRegClassForRVV(VT);
137     }
138 
139     if (Subtarget.hasVInstructionsF16())
140       for (MVT VT : F16VecVTs)
141         addRegClassForRVV(VT);
142 
143     if (Subtarget.hasVInstructionsF32())
144       for (MVT VT : F32VecVTs)
145         addRegClassForRVV(VT);
146 
147     if (Subtarget.hasVInstructionsF64())
148       for (MVT VT : F64VecVTs)
149         addRegClassForRVV(VT);
150 
151     if (Subtarget.useRVVForFixedLengthVectors()) {
152       auto addRegClassForFixedVectors = [this](MVT VT) {
153         MVT ContainerVT = getContainerForFixedLengthVector(VT);
154         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
155         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
156         addRegisterClass(VT, TRI.getRegClass(RCID));
157       };
158       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
159         if (useRVVForFixedLengthVectorVT(VT))
160           addRegClassForFixedVectors(VT);
161 
162       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
163         if (useRVVForFixedLengthVectorVT(VT))
164           addRegClassForFixedVectors(VT);
165     }
166   }
167 
168   // Compute derived properties from the register classes.
169   computeRegisterProperties(STI.getRegisterInfo());
170 
171   setStackPointerRegisterToSaveRestore(RISCV::X2);
172 
173   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
174     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
175 
176   // TODO: add all necessary setOperationAction calls.
177   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
178 
179   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
180   setOperationAction(ISD::BR_CC, XLenVT, Expand);
181   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
182   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
183 
184   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
185   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
186 
187   setOperationAction(ISD::VASTART, MVT::Other, Custom);
188   setOperationAction(ISD::VAARG, MVT::Other, Expand);
189   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
190   setOperationAction(ISD::VAEND, MVT::Other, Expand);
191 
192   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
193   if (!Subtarget.hasStdExtZbb()) {
194     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
195     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
196   }
197 
198   if (Subtarget.is64Bit()) {
199     setOperationAction(ISD::ADD, MVT::i32, Custom);
200     setOperationAction(ISD::SUB, MVT::i32, Custom);
201     setOperationAction(ISD::SHL, MVT::i32, Custom);
202     setOperationAction(ISD::SRA, MVT::i32, Custom);
203     setOperationAction(ISD::SRL, MVT::i32, Custom);
204 
205     setOperationAction(ISD::UADDO, MVT::i32, Custom);
206     setOperationAction(ISD::USUBO, MVT::i32, Custom);
207     setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
208     setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
209   } else {
210     setLibcallName(RTLIB::SHL_I128, nullptr);
211     setLibcallName(RTLIB::SRL_I128, nullptr);
212     setLibcallName(RTLIB::SRA_I128, nullptr);
213     setLibcallName(RTLIB::MUL_I128, nullptr);
214     setLibcallName(RTLIB::MULO_I64, nullptr);
215   }
216 
217   if (!Subtarget.hasStdExtM()) {
218     setOperationAction(ISD::MUL, XLenVT, Expand);
219     setOperationAction(ISD::MULHS, XLenVT, Expand);
220     setOperationAction(ISD::MULHU, XLenVT, Expand);
221     setOperationAction(ISD::SDIV, XLenVT, Expand);
222     setOperationAction(ISD::UDIV, XLenVT, Expand);
223     setOperationAction(ISD::SREM, XLenVT, Expand);
224     setOperationAction(ISD::UREM, XLenVT, Expand);
225   } else {
226     if (Subtarget.is64Bit()) {
227       setOperationAction(ISD::MUL, MVT::i32, Custom);
228       setOperationAction(ISD::MUL, MVT::i128, Custom);
229 
230       setOperationAction(ISD::SDIV, MVT::i8, Custom);
231       setOperationAction(ISD::UDIV, MVT::i8, Custom);
232       setOperationAction(ISD::UREM, MVT::i8, Custom);
233       setOperationAction(ISD::SDIV, MVT::i16, Custom);
234       setOperationAction(ISD::UDIV, MVT::i16, Custom);
235       setOperationAction(ISD::UREM, MVT::i16, Custom);
236       setOperationAction(ISD::SDIV, MVT::i32, Custom);
237       setOperationAction(ISD::UDIV, MVT::i32, Custom);
238       setOperationAction(ISD::UREM, MVT::i32, Custom);
239     } else {
240       setOperationAction(ISD::MUL, MVT::i64, Custom);
241     }
242   }
243 
244   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
245   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
246   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
247   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
248 
249   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
250   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
251   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
252 
253   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
254     if (Subtarget.is64Bit()) {
255       setOperationAction(ISD::ROTL, MVT::i32, Custom);
256       setOperationAction(ISD::ROTR, MVT::i32, Custom);
257     }
258   } else {
259     setOperationAction(ISD::ROTL, XLenVT, Expand);
260     setOperationAction(ISD::ROTR, XLenVT, Expand);
261   }
262 
263   if (Subtarget.hasStdExtZbp()) {
264     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
265     // more combining.
266     setOperationAction(ISD::BITREVERSE, XLenVT,   Custom);
267     setOperationAction(ISD::BSWAP,      XLenVT,   Custom);
268     setOperationAction(ISD::BITREVERSE, MVT::i8,  Custom);
269     // BSWAP i8 doesn't exist.
270     setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
271     setOperationAction(ISD::BSWAP,      MVT::i16, Custom);
272 
273     if (Subtarget.is64Bit()) {
274       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
275       setOperationAction(ISD::BSWAP,      MVT::i32, Custom);
276     }
277   } else {
278     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
279     // pattern match it directly in isel.
280     setOperationAction(ISD::BSWAP, XLenVT,
281                        Subtarget.hasStdExtZbb() ? Legal : Expand);
282   }
283 
284   if (Subtarget.hasStdExtZbb()) {
285     setOperationAction(ISD::SMIN, XLenVT, Legal);
286     setOperationAction(ISD::SMAX, XLenVT, Legal);
287     setOperationAction(ISD::UMIN, XLenVT, Legal);
288     setOperationAction(ISD::UMAX, XLenVT, Legal);
289 
290     if (Subtarget.is64Bit()) {
291       setOperationAction(ISD::CTTZ, MVT::i32, Custom);
292       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
293       setOperationAction(ISD::CTLZ, MVT::i32, Custom);
294       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
295     }
296   } else {
297     setOperationAction(ISD::CTTZ, XLenVT, Expand);
298     setOperationAction(ISD::CTLZ, XLenVT, Expand);
299     setOperationAction(ISD::CTPOP, XLenVT, Expand);
300   }
301 
302   if (Subtarget.hasStdExtZbt()) {
303     setOperationAction(ISD::FSHL, XLenVT, Custom);
304     setOperationAction(ISD::FSHR, XLenVT, Custom);
305     setOperationAction(ISD::SELECT, XLenVT, Legal);
306 
307     if (Subtarget.is64Bit()) {
308       setOperationAction(ISD::FSHL, MVT::i32, Custom);
309       setOperationAction(ISD::FSHR, MVT::i32, Custom);
310     }
311   } else {
312     setOperationAction(ISD::SELECT, XLenVT, Custom);
313   }
314 
315   static const ISD::CondCode FPCCToExpand[] = {
316       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
317       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
318       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
319 
320   static const ISD::NodeType FPOpToExpand[] = {
321       ISD::FSIN, ISD::FCOS,       ISD::FSINCOS,   ISD::FPOW,
322       ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
323 
324   if (Subtarget.hasStdExtZfh())
325     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
326 
327   if (Subtarget.hasStdExtZfh()) {
328     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
329     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
330     setOperationAction(ISD::LRINT, MVT::f16, Legal);
331     setOperationAction(ISD::LLRINT, MVT::f16, Legal);
332     setOperationAction(ISD::LROUND, MVT::f16, Legal);
333     setOperationAction(ISD::LLROUND, MVT::f16, Legal);
334     setOperationAction(ISD::STRICT_LRINT, MVT::f16, Legal);
335     setOperationAction(ISD::STRICT_LLRINT, MVT::f16, Legal);
336     setOperationAction(ISD::STRICT_LROUND, MVT::f16, Legal);
337     setOperationAction(ISD::STRICT_LLROUND, MVT::f16, Legal);
338     setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal);
339     setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal);
340     setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal);
341     setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal);
342     setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal);
343     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
344     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
345     setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal);
346     setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Legal);
347     setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Legal);
348     for (auto CC : FPCCToExpand)
349       setCondCodeAction(CC, MVT::f16, Expand);
350     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
351     setOperationAction(ISD::SELECT, MVT::f16, Custom);
352     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
353 
354     setOperationAction(ISD::FREM,       MVT::f16, Promote);
355     setOperationAction(ISD::FCEIL,      MVT::f16, Promote);
356     setOperationAction(ISD::FFLOOR,     MVT::f16, Promote);
357     setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
358     setOperationAction(ISD::FRINT,      MVT::f16, Promote);
359     setOperationAction(ISD::FROUND,     MVT::f16, Promote);
360     setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
361     setOperationAction(ISD::FTRUNC,     MVT::f16, Promote);
362     setOperationAction(ISD::FPOW,       MVT::f16, Promote);
363     setOperationAction(ISD::FPOWI,      MVT::f16, Promote);
364     setOperationAction(ISD::FCOS,       MVT::f16, Promote);
365     setOperationAction(ISD::FSIN,       MVT::f16, Promote);
366     setOperationAction(ISD::FSINCOS,    MVT::f16, Promote);
367     setOperationAction(ISD::FEXP,       MVT::f16, Promote);
368     setOperationAction(ISD::FEXP2,      MVT::f16, Promote);
369     setOperationAction(ISD::FLOG,       MVT::f16, Promote);
370     setOperationAction(ISD::FLOG2,      MVT::f16, Promote);
371     setOperationAction(ISD::FLOG10,     MVT::f16, Promote);
372 
373     // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
374     // complete support for all operations in LegalizeDAG.
375 
376     // We need to custom promote this.
377     if (Subtarget.is64Bit())
378       setOperationAction(ISD::FPOWI, MVT::i32, Custom);
379   }
380 
381   if (Subtarget.hasStdExtF()) {
382     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
383     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
384     setOperationAction(ISD::LRINT, MVT::f32, Legal);
385     setOperationAction(ISD::LLRINT, MVT::f32, Legal);
386     setOperationAction(ISD::LROUND, MVT::f32, Legal);
387     setOperationAction(ISD::LLROUND, MVT::f32, Legal);
388     setOperationAction(ISD::STRICT_LRINT, MVT::f32, Legal);
389     setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Legal);
390     setOperationAction(ISD::STRICT_LROUND, MVT::f32, Legal);
391     setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Legal);
392     setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
393     setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
394     setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
395     setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
396     setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
397     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
398     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
399     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
400     for (auto CC : FPCCToExpand)
401       setCondCodeAction(CC, MVT::f32, Expand);
402     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
403     setOperationAction(ISD::SELECT, MVT::f32, Custom);
404     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
405     for (auto Op : FPOpToExpand)
406       setOperationAction(Op, MVT::f32, Expand);
407     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
408     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
409   }
410 
411   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
412     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
413 
414   if (Subtarget.hasStdExtD()) {
415     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
416     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
417     setOperationAction(ISD::LRINT, MVT::f64, Legal);
418     setOperationAction(ISD::LLRINT, MVT::f64, Legal);
419     setOperationAction(ISD::LROUND, MVT::f64, Legal);
420     setOperationAction(ISD::LLROUND, MVT::f64, Legal);
421     setOperationAction(ISD::STRICT_LRINT, MVT::f64, Legal);
422     setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Legal);
423     setOperationAction(ISD::STRICT_LROUND, MVT::f64, Legal);
424     setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Legal);
425     setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
426     setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
427     setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
428     setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
429     setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
430     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
431     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
432     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
433     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
434     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
435     for (auto CC : FPCCToExpand)
436       setCondCodeAction(CC, MVT::f64, Expand);
437     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
438     setOperationAction(ISD::SELECT, MVT::f64, Custom);
439     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
440     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
441     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
442     for (auto Op : FPOpToExpand)
443       setOperationAction(Op, MVT::f64, Expand);
444     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
445     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
446   }
447 
448   if (Subtarget.is64Bit()) {
449     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
450     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
451     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
452     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
453   }
454 
455   if (Subtarget.hasStdExtF()) {
456     setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
457     setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
458 
459     setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal);
460     setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal);
461     setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal);
462     setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal);
463 
464     setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
465     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
466   }
467 
468   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
469   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
470   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
471   setOperationAction(ISD::JumpTable, XLenVT, Custom);
472 
473   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
474 
475   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
476   // Unfortunately this can't be determined just from the ISA naming string.
477   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
478                      Subtarget.is64Bit() ? Legal : Custom);
479 
480   setOperationAction(ISD::TRAP, MVT::Other, Legal);
481   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
482   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
483   if (Subtarget.is64Bit())
484     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
485 
486   if (Subtarget.hasStdExtA()) {
487     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
488     setMinCmpXchgSizeInBits(32);
489   } else {
490     setMaxAtomicSizeInBitsSupported(0);
491   }
492 
493   setBooleanContents(ZeroOrOneBooleanContent);
494 
495   if (Subtarget.hasVInstructions()) {
496     setBooleanVectorContents(ZeroOrOneBooleanContent);
497 
498     setOperationAction(ISD::VSCALE, XLenVT, Custom);
499 
500     // RVV intrinsics may have illegal operands.
501     // We also need to custom legalize vmv.x.s.
502     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
503     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
504     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
505     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
506     if (Subtarget.is64Bit()) {
507       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
508     } else {
509       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
510       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
511     }
512 
513     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
514     setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
515 
516     static const unsigned IntegerVPOps[] = {
517         ISD::VP_ADD,         ISD::VP_SUB,         ISD::VP_MUL,
518         ISD::VP_SDIV,        ISD::VP_UDIV,        ISD::VP_SREM,
519         ISD::VP_UREM,        ISD::VP_AND,         ISD::VP_OR,
520         ISD::VP_XOR,         ISD::VP_ASHR,        ISD::VP_LSHR,
521         ISD::VP_SHL,         ISD::VP_REDUCE_ADD,  ISD::VP_REDUCE_AND,
522         ISD::VP_REDUCE_OR,   ISD::VP_REDUCE_XOR,  ISD::VP_REDUCE_SMAX,
523         ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
524         ISD::VP_SELECT};
525 
526     static const unsigned FloatingPointVPOps[] = {
527         ISD::VP_FADD,        ISD::VP_FSUB,        ISD::VP_FMUL,
528         ISD::VP_FDIV,        ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
529         ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SELECT};
530 
531     if (!Subtarget.is64Bit()) {
532       // We must custom-lower certain vXi64 operations on RV32 due to the vector
533       // element type being illegal.
534       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
535       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
536 
537       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
538       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
539       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
540       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
541       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
542       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
543       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
544       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
545 
546       setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
547       setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
548       setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
549       setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
550       setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
551       setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
552       setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
553       setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
554     }
555 
556     for (MVT VT : BoolVecVTs) {
557       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
558 
559       // Mask VTs are custom-expanded into a series of standard nodes
560       setOperationAction(ISD::TRUNCATE, VT, Custom);
561       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
562       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
563       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
564 
565       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
566       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
567 
568       setOperationAction(ISD::SELECT, VT, Custom);
569       setOperationAction(ISD::SELECT_CC, VT, Expand);
570       setOperationAction(ISD::VSELECT, VT, Expand);
571       setOperationAction(ISD::VP_SELECT, VT, Expand);
572 
573       setOperationAction(ISD::VP_AND, VT, Custom);
574       setOperationAction(ISD::VP_OR, VT, Custom);
575       setOperationAction(ISD::VP_XOR, VT, Custom);
576 
577       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
578       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
579       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
580 
581       setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
582       setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
583       setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
584 
585       // RVV has native int->float & float->int conversions where the
586       // element type sizes are within one power-of-two of each other. Any
587       // wider distances between type sizes have to be lowered as sequences
588       // which progressively narrow the gap in stages.
589       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
590       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
591       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
592       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
593 
594       // Expand all extending loads to types larger than this, and truncating
595       // stores from types larger than this.
596       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
597         setTruncStoreAction(OtherVT, VT, Expand);
598         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
599         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
600         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
601       }
602     }
603 
604     for (MVT VT : IntVecVTs) {
605       if (VT.getVectorElementType() == MVT::i64 &&
606           !Subtarget.hasVInstructionsI64())
607         continue;
608 
609       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
610       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
611 
612       // Vectors implement MULHS/MULHU.
613       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
614       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
615 
616       setOperationAction(ISD::SMIN, VT, Legal);
617       setOperationAction(ISD::SMAX, VT, Legal);
618       setOperationAction(ISD::UMIN, VT, Legal);
619       setOperationAction(ISD::UMAX, VT, Legal);
620 
621       setOperationAction(ISD::ROTL, VT, Expand);
622       setOperationAction(ISD::ROTR, VT, Expand);
623 
624       setOperationAction(ISD::CTTZ, VT, Expand);
625       setOperationAction(ISD::CTLZ, VT, Expand);
626       setOperationAction(ISD::CTPOP, VT, Expand);
627 
628       setOperationAction(ISD::BSWAP, VT, Expand);
629 
630       // Custom-lower extensions and truncations from/to mask types.
631       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
632       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
633       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
634 
635       // RVV has native int->float & float->int conversions where the
636       // element type sizes are within one power-of-two of each other. Any
637       // wider distances between type sizes have to be lowered as sequences
638       // which progressively narrow the gap in stages.
639       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
640       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
641       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
642       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
643 
644       setOperationAction(ISD::SADDSAT, VT, Legal);
645       setOperationAction(ISD::UADDSAT, VT, Legal);
646       setOperationAction(ISD::SSUBSAT, VT, Legal);
647       setOperationAction(ISD::USUBSAT, VT, Legal);
648 
649       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
650       // nodes which truncate by one power of two at a time.
651       setOperationAction(ISD::TRUNCATE, VT, Custom);
652 
653       // Custom-lower insert/extract operations to simplify patterns.
654       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
655       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
656 
657       // Custom-lower reduction operations to set up the corresponding custom
658       // nodes' operands.
659       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
660       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
661       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
662       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
663       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
664       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
665       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
666       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
667 
668       for (unsigned VPOpc : IntegerVPOps)
669         setOperationAction(VPOpc, VT, Custom);
670 
671       setOperationAction(ISD::LOAD, VT, Custom);
672       setOperationAction(ISD::STORE, VT, Custom);
673 
674       setOperationAction(ISD::MLOAD, VT, Custom);
675       setOperationAction(ISD::MSTORE, VT, Custom);
676       setOperationAction(ISD::MGATHER, VT, Custom);
677       setOperationAction(ISD::MSCATTER, VT, Custom);
678 
679       setOperationAction(ISD::VP_LOAD, VT, Custom);
680       setOperationAction(ISD::VP_STORE, VT, Custom);
681       setOperationAction(ISD::VP_GATHER, VT, Custom);
682       setOperationAction(ISD::VP_SCATTER, VT, Custom);
683 
684       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
685       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
686       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
687 
688       setOperationAction(ISD::SELECT, VT, Custom);
689       setOperationAction(ISD::SELECT_CC, VT, Expand);
690 
691       setOperationAction(ISD::STEP_VECTOR, VT, Custom);
692       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
693 
694       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
695         setTruncStoreAction(VT, OtherVT, Expand);
696         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
697         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
698         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
699       }
700 
701       // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
702       // type that can represent the value exactly.
703       if (VT.getVectorElementType() != MVT::i64) {
704         MVT FloatEltVT =
705             VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
706         EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
707         if (isTypeLegal(FloatVT)) {
708           setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
709           setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
710         }
711       }
712     }
713 
714     // Expand various CCs to best match the RVV ISA, which natively supports UNE
715     // but no other unordered comparisons, and supports all ordered comparisons
716     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
717     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
718     // and we pattern-match those back to the "original", swapping operands once
719     // more. This way we catch both operations and both "vf" and "fv" forms with
720     // fewer patterns.
721     static const ISD::CondCode VFPCCToExpand[] = {
722         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
723         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
724         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
725     };
726 
727     // Sets common operation actions on RVV floating-point vector types.
728     const auto SetCommonVFPActions = [&](MVT VT) {
729       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
730       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
731       // sizes are within one power-of-two of each other. Therefore conversions
732       // between vXf16 and vXf64 must be lowered as sequences which convert via
733       // vXf32.
734       setOperationAction(ISD::FP_ROUND, VT, Custom);
735       setOperationAction(ISD::FP_EXTEND, VT, Custom);
736       // Custom-lower insert/extract operations to simplify patterns.
737       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
738       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
739       // Expand various condition codes (explained above).
740       for (auto CC : VFPCCToExpand)
741         setCondCodeAction(CC, VT, Expand);
742 
743       setOperationAction(ISD::FMINNUM, VT, Legal);
744       setOperationAction(ISD::FMAXNUM, VT, Legal);
745 
746       setOperationAction(ISD::FTRUNC, VT, Custom);
747       setOperationAction(ISD::FCEIL, VT, Custom);
748       setOperationAction(ISD::FFLOOR, VT, Custom);
749 
750       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
751       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
752       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
753       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
754 
755       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
756 
757       setOperationAction(ISD::LOAD, VT, Custom);
758       setOperationAction(ISD::STORE, VT, Custom);
759 
760       setOperationAction(ISD::MLOAD, VT, Custom);
761       setOperationAction(ISD::MSTORE, VT, Custom);
762       setOperationAction(ISD::MGATHER, VT, Custom);
763       setOperationAction(ISD::MSCATTER, VT, Custom);
764 
765       setOperationAction(ISD::VP_LOAD, VT, Custom);
766       setOperationAction(ISD::VP_STORE, VT, Custom);
767       setOperationAction(ISD::VP_GATHER, VT, Custom);
768       setOperationAction(ISD::VP_SCATTER, VT, Custom);
769 
770       setOperationAction(ISD::SELECT, VT, Custom);
771       setOperationAction(ISD::SELECT_CC, VT, Expand);
772 
773       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
774       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
775       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
776 
777       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
778 
779       for (unsigned VPOpc : FloatingPointVPOps)
780         setOperationAction(VPOpc, VT, Custom);
781     };
782 
783     // Sets common extload/truncstore actions on RVV floating-point vector
784     // types.
785     const auto SetCommonVFPExtLoadTruncStoreActions =
786         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
787           for (auto SmallVT : SmallerVTs) {
788             setTruncStoreAction(VT, SmallVT, Expand);
789             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
790           }
791         };
792 
793     if (Subtarget.hasVInstructionsF16())
794       for (MVT VT : F16VecVTs)
795         SetCommonVFPActions(VT);
796 
797     for (MVT VT : F32VecVTs) {
798       if (Subtarget.hasVInstructionsF32())
799         SetCommonVFPActions(VT);
800       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
801     }
802 
803     for (MVT VT : F64VecVTs) {
804       if (Subtarget.hasVInstructionsF64())
805         SetCommonVFPActions(VT);
806       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
807       SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
808     }
809 
810     if (Subtarget.useRVVForFixedLengthVectors()) {
811       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
812         if (!useRVVForFixedLengthVectorVT(VT))
813           continue;
814 
815         // By default everything must be expanded.
816         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
817           setOperationAction(Op, VT, Expand);
818         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
819           setTruncStoreAction(VT, OtherVT, Expand);
820           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
821           setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
822           setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
823         }
824 
825         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
826         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
827         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
828 
829         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
830         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
831 
832         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
833         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
834 
835         setOperationAction(ISD::LOAD, VT, Custom);
836         setOperationAction(ISD::STORE, VT, Custom);
837 
838         setOperationAction(ISD::SETCC, VT, Custom);
839 
840         setOperationAction(ISD::SELECT, VT, Custom);
841 
842         setOperationAction(ISD::TRUNCATE, VT, Custom);
843 
844         setOperationAction(ISD::BITCAST, VT, Custom);
845 
846         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
847         setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
848         setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
849 
850         setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
851         setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
852         setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
853 
854         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
855         setOperationAction(ISD::UINT_TO_FP, VT, Custom);
856         setOperationAction(ISD::FP_TO_SINT, VT, Custom);
857         setOperationAction(ISD::FP_TO_UINT, VT, Custom);
858 
859         // Operations below are different for between masks and other vectors.
860         if (VT.getVectorElementType() == MVT::i1) {
861           setOperationAction(ISD::VP_AND, VT, Custom);
862           setOperationAction(ISD::VP_OR, VT, Custom);
863           setOperationAction(ISD::VP_XOR, VT, Custom);
864           setOperationAction(ISD::AND, VT, Custom);
865           setOperationAction(ISD::OR, VT, Custom);
866           setOperationAction(ISD::XOR, VT, Custom);
867           continue;
868         }
869 
870         // Use SPLAT_VECTOR to prevent type legalization from destroying the
871         // splats when type legalizing i64 scalar on RV32.
872         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
873         // improvements first.
874         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
875           setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
876           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
877         }
878 
879         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
880         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
881 
882         setOperationAction(ISD::MLOAD, VT, Custom);
883         setOperationAction(ISD::MSTORE, VT, Custom);
884         setOperationAction(ISD::MGATHER, VT, Custom);
885         setOperationAction(ISD::MSCATTER, VT, Custom);
886 
887         setOperationAction(ISD::VP_LOAD, VT, Custom);
888         setOperationAction(ISD::VP_STORE, VT, Custom);
889         setOperationAction(ISD::VP_GATHER, VT, Custom);
890         setOperationAction(ISD::VP_SCATTER, VT, Custom);
891 
892         setOperationAction(ISD::ADD, VT, Custom);
893         setOperationAction(ISD::MUL, VT, Custom);
894         setOperationAction(ISD::SUB, VT, Custom);
895         setOperationAction(ISD::AND, VT, Custom);
896         setOperationAction(ISD::OR, VT, Custom);
897         setOperationAction(ISD::XOR, VT, Custom);
898         setOperationAction(ISD::SDIV, VT, Custom);
899         setOperationAction(ISD::SREM, VT, Custom);
900         setOperationAction(ISD::UDIV, VT, Custom);
901         setOperationAction(ISD::UREM, VT, Custom);
902         setOperationAction(ISD::SHL, VT, Custom);
903         setOperationAction(ISD::SRA, VT, Custom);
904         setOperationAction(ISD::SRL, VT, Custom);
905 
906         setOperationAction(ISD::SMIN, VT, Custom);
907         setOperationAction(ISD::SMAX, VT, Custom);
908         setOperationAction(ISD::UMIN, VT, Custom);
909         setOperationAction(ISD::UMAX, VT, Custom);
910         setOperationAction(ISD::ABS,  VT, Custom);
911 
912         setOperationAction(ISD::MULHS, VT, Custom);
913         setOperationAction(ISD::MULHU, VT, Custom);
914 
915         setOperationAction(ISD::SADDSAT, VT, Custom);
916         setOperationAction(ISD::UADDSAT, VT, Custom);
917         setOperationAction(ISD::SSUBSAT, VT, Custom);
918         setOperationAction(ISD::USUBSAT, VT, Custom);
919 
920         setOperationAction(ISD::VSELECT, VT, Custom);
921         setOperationAction(ISD::SELECT_CC, VT, Expand);
922 
923         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
924         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
925         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
926 
927         // Custom-lower reduction operations to set up the corresponding custom
928         // nodes' operands.
929         setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
930         setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
931         setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
932         setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
933         setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
934 
935         for (unsigned VPOpc : IntegerVPOps)
936           setOperationAction(VPOpc, VT, Custom);
937 
938         // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
939         // type that can represent the value exactly.
940         if (VT.getVectorElementType() != MVT::i64) {
941           MVT FloatEltVT =
942               VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
943           EVT FloatVT =
944               MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
945           if (isTypeLegal(FloatVT)) {
946             setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
947             setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
948           }
949         }
950       }
951 
952       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
953         if (!useRVVForFixedLengthVectorVT(VT))
954           continue;
955 
956         // By default everything must be expanded.
957         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
958           setOperationAction(Op, VT, Expand);
959         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
960           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
961           setTruncStoreAction(VT, OtherVT, Expand);
962         }
963 
964         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
965         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
966         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
967 
968         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
969         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
970         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
971         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
972         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
973 
974         setOperationAction(ISD::LOAD, VT, Custom);
975         setOperationAction(ISD::STORE, VT, Custom);
976         setOperationAction(ISD::MLOAD, VT, Custom);
977         setOperationAction(ISD::MSTORE, VT, Custom);
978         setOperationAction(ISD::MGATHER, VT, Custom);
979         setOperationAction(ISD::MSCATTER, VT, Custom);
980 
981         setOperationAction(ISD::VP_LOAD, VT, Custom);
982         setOperationAction(ISD::VP_STORE, VT, Custom);
983         setOperationAction(ISD::VP_GATHER, VT, Custom);
984         setOperationAction(ISD::VP_SCATTER, VT, Custom);
985 
986         setOperationAction(ISD::FADD, VT, Custom);
987         setOperationAction(ISD::FSUB, VT, Custom);
988         setOperationAction(ISD::FMUL, VT, Custom);
989         setOperationAction(ISD::FDIV, VT, Custom);
990         setOperationAction(ISD::FNEG, VT, Custom);
991         setOperationAction(ISD::FABS, VT, Custom);
992         setOperationAction(ISD::FCOPYSIGN, VT, Custom);
993         setOperationAction(ISD::FSQRT, VT, Custom);
994         setOperationAction(ISD::FMA, VT, Custom);
995         setOperationAction(ISD::FMINNUM, VT, Custom);
996         setOperationAction(ISD::FMAXNUM, VT, Custom);
997 
998         setOperationAction(ISD::FP_ROUND, VT, Custom);
999         setOperationAction(ISD::FP_EXTEND, VT, Custom);
1000 
1001         setOperationAction(ISD::FTRUNC, VT, Custom);
1002         setOperationAction(ISD::FCEIL, VT, Custom);
1003         setOperationAction(ISD::FFLOOR, VT, Custom);
1004 
1005         for (auto CC : VFPCCToExpand)
1006           setCondCodeAction(CC, VT, Expand);
1007 
1008         setOperationAction(ISD::VSELECT, VT, Custom);
1009         setOperationAction(ISD::SELECT, VT, Custom);
1010         setOperationAction(ISD::SELECT_CC, VT, Expand);
1011 
1012         setOperationAction(ISD::BITCAST, VT, Custom);
1013 
1014         setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1015         setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1016         setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1017         setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1018 
1019         for (unsigned VPOpc : FloatingPointVPOps)
1020           setOperationAction(VPOpc, VT, Custom);
1021       }
1022 
1023       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1024       setOperationAction(ISD::BITCAST, MVT::i8, Custom);
1025       setOperationAction(ISD::BITCAST, MVT::i16, Custom);
1026       setOperationAction(ISD::BITCAST, MVT::i32, Custom);
1027       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1028       setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1029       setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1030       setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1031     }
1032   }
1033 
1034   // Function alignments.
1035   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
1036   setMinFunctionAlignment(FunctionAlignment);
1037   setPrefFunctionAlignment(FunctionAlignment);
1038 
1039   setMinimumJumpTableEntries(5);
1040 
1041   // Jumps are expensive, compared to logic
1042   setJumpIsExpensive();
1043 
1044   setTargetDAGCombine(ISD::ADD);
1045   setTargetDAGCombine(ISD::SUB);
1046   setTargetDAGCombine(ISD::AND);
1047   setTargetDAGCombine(ISD::OR);
1048   setTargetDAGCombine(ISD::XOR);
1049   setTargetDAGCombine(ISD::ANY_EXTEND);
1050   if (Subtarget.hasStdExtF()) {
1051     setTargetDAGCombine(ISD::ZERO_EXTEND);
1052     setTargetDAGCombine(ISD::FP_TO_SINT);
1053     setTargetDAGCombine(ISD::FP_TO_UINT);
1054     setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
1055     setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
1056   }
1057   if (Subtarget.hasVInstructions()) {
1058     setTargetDAGCombine(ISD::FCOPYSIGN);
1059     setTargetDAGCombine(ISD::MGATHER);
1060     setTargetDAGCombine(ISD::MSCATTER);
1061     setTargetDAGCombine(ISD::VP_GATHER);
1062     setTargetDAGCombine(ISD::VP_SCATTER);
1063     setTargetDAGCombine(ISD::SRA);
1064     setTargetDAGCombine(ISD::SRL);
1065     setTargetDAGCombine(ISD::SHL);
1066     setTargetDAGCombine(ISD::STORE);
1067   }
1068 }
1069 
1070 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1071                                             LLVMContext &Context,
1072                                             EVT VT) const {
1073   if (!VT.isVector())
1074     return getPointerTy(DL);
1075   if (Subtarget.hasVInstructions() &&
1076       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1077     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1078   return VT.changeVectorElementTypeToInteger();
1079 }
1080 
1081 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1082   return Subtarget.getXLenVT();
1083 }
1084 
1085 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1086                                              const CallInst &I,
1087                                              MachineFunction &MF,
1088                                              unsigned Intrinsic) const {
1089   auto &DL = I.getModule()->getDataLayout();
1090   switch (Intrinsic) {
1091   default:
1092     return false;
1093   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1094   case Intrinsic::riscv_masked_atomicrmw_add_i32:
1095   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1096   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1097   case Intrinsic::riscv_masked_atomicrmw_max_i32:
1098   case Intrinsic::riscv_masked_atomicrmw_min_i32:
1099   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1100   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1101   case Intrinsic::riscv_masked_cmpxchg_i32: {
1102     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
1103     Info.opc = ISD::INTRINSIC_W_CHAIN;
1104     Info.memVT = MVT::getVT(PtrTy->getElementType());
1105     Info.ptrVal = I.getArgOperand(0);
1106     Info.offset = 0;
1107     Info.align = Align(4);
1108     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1109                  MachineMemOperand::MOVolatile;
1110     return true;
1111   }
1112   case Intrinsic::riscv_masked_strided_load:
1113     Info.opc = ISD::INTRINSIC_W_CHAIN;
1114     Info.ptrVal = I.getArgOperand(1);
1115     Info.memVT = getValueType(DL, I.getType()->getScalarType());
1116     Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
1117     Info.size = MemoryLocation::UnknownSize;
1118     Info.flags |= MachineMemOperand::MOLoad;
1119     return true;
1120   case Intrinsic::riscv_masked_strided_store:
1121     Info.opc = ISD::INTRINSIC_VOID;
1122     Info.ptrVal = I.getArgOperand(1);
1123     Info.memVT =
1124         getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
1125     Info.align = Align(
1126         DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
1127         8);
1128     Info.size = MemoryLocation::UnknownSize;
1129     Info.flags |= MachineMemOperand::MOStore;
1130     return true;
1131   }
1132 }
1133 
1134 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1135                                                 const AddrMode &AM, Type *Ty,
1136                                                 unsigned AS,
1137                                                 Instruction *I) const {
1138   // No global is ever allowed as a base.
1139   if (AM.BaseGV)
1140     return false;
1141 
1142   // Require a 12-bit signed offset.
1143   if (!isInt<12>(AM.BaseOffs))
1144     return false;
1145 
1146   switch (AM.Scale) {
1147   case 0: // "r+i" or just "i", depending on HasBaseReg.
1148     break;
1149   case 1:
1150     if (!AM.HasBaseReg) // allow "r+i".
1151       break;
1152     return false; // disallow "r+r" or "r+r+i".
1153   default:
1154     return false;
1155   }
1156 
1157   return true;
1158 }
1159 
1160 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1161   return isInt<12>(Imm);
1162 }
1163 
1164 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1165   return isInt<12>(Imm);
1166 }
1167 
1168 // On RV32, 64-bit integers are split into their high and low parts and held
1169 // in two different registers, so the trunc is free since the low register can
1170 // just be used.
1171 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1172   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1173     return false;
1174   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1175   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1176   return (SrcBits == 64 && DestBits == 32);
1177 }
1178 
1179 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1180   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1181       !SrcVT.isInteger() || !DstVT.isInteger())
1182     return false;
1183   unsigned SrcBits = SrcVT.getSizeInBits();
1184   unsigned DestBits = DstVT.getSizeInBits();
1185   return (SrcBits == 64 && DestBits == 32);
1186 }
1187 
1188 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1189   // Zexts are free if they can be combined with a load.
1190   // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1191   // poorly with type legalization of compares preferring sext.
1192   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1193     EVT MemVT = LD->getMemoryVT();
1194     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1195         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1196          LD->getExtensionType() == ISD::ZEXTLOAD))
1197       return true;
1198   }
1199 
1200   return TargetLowering::isZExtFree(Val, VT2);
1201 }
1202 
1203 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1204   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1205 }
1206 
1207 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
1208   return Subtarget.hasStdExtZbb();
1209 }
1210 
1211 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
1212   return Subtarget.hasStdExtZbb();
1213 }
1214 
1215 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1216   EVT VT = Y.getValueType();
1217 
1218   // FIXME: Support vectors once we have tests.
1219   if (VT.isVector())
1220     return false;
1221 
1222   return Subtarget.hasStdExtZbb() && !isa<ConstantSDNode>(Y);
1223 }
1224 
1225 /// Check if sinking \p I's operands to I's basic block is profitable, because
1226 /// the operands can be folded into a target instruction, e.g.
1227 /// splats of scalars can fold into vector instructions.
1228 bool RISCVTargetLowering::shouldSinkOperands(
1229     Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1230   using namespace llvm::PatternMatch;
1231 
1232   if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1233     return false;
1234 
1235   auto IsSinker = [&](Instruction *I, int Operand) {
1236     switch (I->getOpcode()) {
1237     case Instruction::Add:
1238     case Instruction::Sub:
1239     case Instruction::Mul:
1240     case Instruction::And:
1241     case Instruction::Or:
1242     case Instruction::Xor:
1243     case Instruction::FAdd:
1244     case Instruction::FSub:
1245     case Instruction::FMul:
1246     case Instruction::FDiv:
1247     case Instruction::ICmp:
1248     case Instruction::FCmp:
1249       return true;
1250     case Instruction::Shl:
1251     case Instruction::LShr:
1252     case Instruction::AShr:
1253     case Instruction::UDiv:
1254     case Instruction::SDiv:
1255     case Instruction::URem:
1256     case Instruction::SRem:
1257       return Operand == 1;
1258     case Instruction::Call:
1259       if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1260         switch (II->getIntrinsicID()) {
1261         case Intrinsic::fma:
1262           return Operand == 0 || Operand == 1;
1263         // FIXME: Our patterns can only match vx/vf instructions when the splat
1264         // it on the RHS, because TableGen doesn't recognize our VP operations
1265         // as commutative.
1266         case Intrinsic::vp_add:
1267         case Intrinsic::vp_mul:
1268         case Intrinsic::vp_and:
1269         case Intrinsic::vp_or:
1270         case Intrinsic::vp_xor:
1271         case Intrinsic::vp_fadd:
1272         case Intrinsic::vp_fsub:
1273         case Intrinsic::vp_fmul:
1274         case Intrinsic::vp_fdiv:
1275         case Intrinsic::vp_shl:
1276         case Intrinsic::vp_lshr:
1277         case Intrinsic::vp_ashr:
1278         case Intrinsic::vp_udiv:
1279         case Intrinsic::vp_sdiv:
1280         case Intrinsic::vp_urem:
1281         case Intrinsic::vp_srem:
1282           return Operand == 1;
1283         // ... the one exception is vp.sub which has explicit patterns for both
1284         // LHS and RHS (as vrsub).
1285         case Intrinsic::vp_sub:
1286           return Operand == 0 || Operand == 1;
1287         default:
1288           return false;
1289         }
1290       }
1291       return false;
1292     default:
1293       return false;
1294     }
1295   };
1296 
1297   for (auto OpIdx : enumerate(I->operands())) {
1298     if (!IsSinker(I, OpIdx.index()))
1299       continue;
1300 
1301     Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1302     // Make sure we are not already sinking this operand
1303     if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1304       continue;
1305 
1306     // We are looking for a splat that can be sunk.
1307     if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1308                              m_Undef(), m_ZeroMask())))
1309       continue;
1310 
1311     // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1312     // and vector registers
1313     for (Use &U : Op->uses()) {
1314       Instruction *Insn = cast<Instruction>(U.getUser());
1315       if (!IsSinker(Insn, U.getOperandNo()))
1316         return false;
1317     }
1318 
1319     Ops.push_back(&Op->getOperandUse(0));
1320     Ops.push_back(&OpIdx.value());
1321   }
1322   return true;
1323 }
1324 
1325 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1326                                        bool ForCodeSize) const {
1327   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1328   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1329     return false;
1330   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1331     return false;
1332   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1333     return false;
1334   return Imm.isZero();
1335 }
1336 
1337 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
1338   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1339          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1340          (VT == MVT::f64 && Subtarget.hasStdExtD());
1341 }
1342 
1343 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1344                                                       CallingConv::ID CC,
1345                                                       EVT VT) const {
1346   // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1347   // We might still end up using a GPR but that will be decided based on ABI.
1348   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1349   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1350     return MVT::f32;
1351 
1352   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1353 }
1354 
1355 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1356                                                            CallingConv::ID CC,
1357                                                            EVT VT) const {
1358   // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1359   // We might still end up using a GPR but that will be decided based on ABI.
1360   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1361   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1362     return 1;
1363 
1364   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1365 }
1366 
1367 // Changes the condition code and swaps operands if necessary, so the SetCC
1368 // operation matches one of the comparisons supported directly by branches
1369 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1370 // with 1/-1.
1371 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1372                                     ISD::CondCode &CC, SelectionDAG &DAG) {
1373   // Convert X > -1 to X >= 0.
1374   if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1375     RHS = DAG.getConstant(0, DL, RHS.getValueType());
1376     CC = ISD::SETGE;
1377     return;
1378   }
1379   // Convert X < 1 to 0 >= X.
1380   if (CC == ISD::SETLT && isOneConstant(RHS)) {
1381     RHS = LHS;
1382     LHS = DAG.getConstant(0, DL, RHS.getValueType());
1383     CC = ISD::SETGE;
1384     return;
1385   }
1386 
1387   switch (CC) {
1388   default:
1389     break;
1390   case ISD::SETGT:
1391   case ISD::SETLE:
1392   case ISD::SETUGT:
1393   case ISD::SETULE:
1394     CC = ISD::getSetCCSwappedOperands(CC);
1395     std::swap(LHS, RHS);
1396     break;
1397   }
1398 }
1399 
1400 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
1401   assert(VT.isScalableVector() && "Expecting a scalable vector type");
1402   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1403   if (VT.getVectorElementType() == MVT::i1)
1404     KnownSize *= 8;
1405 
1406   switch (KnownSize) {
1407   default:
1408     llvm_unreachable("Invalid LMUL.");
1409   case 8:
1410     return RISCVII::VLMUL::LMUL_F8;
1411   case 16:
1412     return RISCVII::VLMUL::LMUL_F4;
1413   case 32:
1414     return RISCVII::VLMUL::LMUL_F2;
1415   case 64:
1416     return RISCVII::VLMUL::LMUL_1;
1417   case 128:
1418     return RISCVII::VLMUL::LMUL_2;
1419   case 256:
1420     return RISCVII::VLMUL::LMUL_4;
1421   case 512:
1422     return RISCVII::VLMUL::LMUL_8;
1423   }
1424 }
1425 
1426 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
1427   switch (LMul) {
1428   default:
1429     llvm_unreachable("Invalid LMUL.");
1430   case RISCVII::VLMUL::LMUL_F8:
1431   case RISCVII::VLMUL::LMUL_F4:
1432   case RISCVII::VLMUL::LMUL_F2:
1433   case RISCVII::VLMUL::LMUL_1:
1434     return RISCV::VRRegClassID;
1435   case RISCVII::VLMUL::LMUL_2:
1436     return RISCV::VRM2RegClassID;
1437   case RISCVII::VLMUL::LMUL_4:
1438     return RISCV::VRM4RegClassID;
1439   case RISCVII::VLMUL::LMUL_8:
1440     return RISCV::VRM8RegClassID;
1441   }
1442 }
1443 
1444 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
1445   RISCVII::VLMUL LMUL = getLMUL(VT);
1446   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1447       LMUL == RISCVII::VLMUL::LMUL_F4 ||
1448       LMUL == RISCVII::VLMUL::LMUL_F2 ||
1449       LMUL == RISCVII::VLMUL::LMUL_1) {
1450     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1451                   "Unexpected subreg numbering");
1452     return RISCV::sub_vrm1_0 + Index;
1453   }
1454   if (LMUL == RISCVII::VLMUL::LMUL_2) {
1455     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1456                   "Unexpected subreg numbering");
1457     return RISCV::sub_vrm2_0 + Index;
1458   }
1459   if (LMUL == RISCVII::VLMUL::LMUL_4) {
1460     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1461                   "Unexpected subreg numbering");
1462     return RISCV::sub_vrm4_0 + Index;
1463   }
1464   llvm_unreachable("Invalid vector type.");
1465 }
1466 
1467 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
1468   if (VT.getVectorElementType() == MVT::i1)
1469     return RISCV::VRRegClassID;
1470   return getRegClassIDForLMUL(getLMUL(VT));
1471 }
1472 
1473 // Attempt to decompose a subvector insert/extract between VecVT and
1474 // SubVecVT via subregister indices. Returns the subregister index that
1475 // can perform the subvector insert/extract with the given element index, as
1476 // well as the index corresponding to any leftover subvectors that must be
1477 // further inserted/extracted within the register class for SubVecVT.
1478 std::pair<unsigned, unsigned>
1479 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1480     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1481     const RISCVRegisterInfo *TRI) {
1482   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1483                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1484                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1485                 "Register classes not ordered");
1486   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1487   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1488   // Try to compose a subregister index that takes us from the incoming
1489   // LMUL>1 register class down to the outgoing one. At each step we half
1490   // the LMUL:
1491   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1492   // Note that this is not guaranteed to find a subregister index, such as
1493   // when we are extracting from one VR type to another.
1494   unsigned SubRegIdx = RISCV::NoSubRegister;
1495   for (const unsigned RCID :
1496        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1497     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1498       VecVT = VecVT.getHalfNumVectorElementsVT();
1499       bool IsHi =
1500           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1501       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1502                                             getSubregIndexByMVT(VecVT, IsHi));
1503       if (IsHi)
1504         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1505     }
1506   return {SubRegIdx, InsertExtractIdx};
1507 }
1508 
1509 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1510 // stores for those types.
1511 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1512   return !Subtarget.useRVVForFixedLengthVectors() ||
1513          (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
1514 }
1515 
1516 bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
1517   if (ScalarTy->isPointerTy())
1518     return true;
1519 
1520   if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1521       ScalarTy->isIntegerTy(32))
1522     return true;
1523 
1524   if (ScalarTy->isIntegerTy(64))
1525     return Subtarget.hasVInstructionsI64();
1526 
1527   if (ScalarTy->isHalfTy())
1528     return Subtarget.hasVInstructionsF16();
1529   if (ScalarTy->isFloatTy())
1530     return Subtarget.hasVInstructionsF32();
1531   if (ScalarTy->isDoubleTy())
1532     return Subtarget.hasVInstructionsF64();
1533 
1534   return false;
1535 }
1536 
1537 static bool useRVVForFixedLengthVectorVT(MVT VT,
1538                                          const RISCVSubtarget &Subtarget) {
1539   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1540   if (!Subtarget.useRVVForFixedLengthVectors())
1541     return false;
1542 
1543   // We only support a set of vector types with a consistent maximum fixed size
1544   // across all supported vector element types to avoid legalization issues.
1545   // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1546   // fixed-length vector type we support is 1024 bytes.
1547   if (VT.getFixedSizeInBits() > 1024 * 8)
1548     return false;
1549 
1550   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1551 
1552   MVT EltVT = VT.getVectorElementType();
1553 
1554   // Don't use RVV for vectors we cannot scalarize if required.
1555   switch (EltVT.SimpleTy) {
1556   // i1 is supported but has different rules.
1557   default:
1558     return false;
1559   case MVT::i1:
1560     // Masks can only use a single register.
1561     if (VT.getVectorNumElements() > MinVLen)
1562       return false;
1563     MinVLen /= 8;
1564     break;
1565   case MVT::i8:
1566   case MVT::i16:
1567   case MVT::i32:
1568     break;
1569   case MVT::i64:
1570     if (!Subtarget.hasVInstructionsI64())
1571       return false;
1572     break;
1573   case MVT::f16:
1574     if (!Subtarget.hasVInstructionsF16())
1575       return false;
1576     break;
1577   case MVT::f32:
1578     if (!Subtarget.hasVInstructionsF32())
1579       return false;
1580     break;
1581   case MVT::f64:
1582     if (!Subtarget.hasVInstructionsF64())
1583       return false;
1584     break;
1585   }
1586 
1587   // Reject elements larger than ELEN.
1588   if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
1589     return false;
1590 
1591   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1592   // Don't use RVV for types that don't fit.
1593   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1594     return false;
1595 
1596   // TODO: Perhaps an artificial restriction, but worth having whilst getting
1597   // the base fixed length RVV support in place.
1598   if (!VT.isPow2VectorType())
1599     return false;
1600 
1601   return true;
1602 }
1603 
1604 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1605   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1606 }
1607 
1608 // Return the largest legal scalable vector type that matches VT's element type.
1609 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
1610                                             const RISCVSubtarget &Subtarget) {
1611   // This may be called before legal types are setup.
1612   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1613           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1614          "Expected legal fixed length vector!");
1615 
1616   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1617   unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
1618 
1619   MVT EltVT = VT.getVectorElementType();
1620   switch (EltVT.SimpleTy) {
1621   default:
1622     llvm_unreachable("unexpected element type for RVV container");
1623   case MVT::i1:
1624   case MVT::i8:
1625   case MVT::i16:
1626   case MVT::i32:
1627   case MVT::i64:
1628   case MVT::f16:
1629   case MVT::f32:
1630   case MVT::f64: {
1631     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1632     // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1633     // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1634     unsigned NumElts =
1635         (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1636     NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1637     assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1638     return MVT::getScalableVectorVT(EltVT, NumElts);
1639   }
1640   }
1641 }
1642 
1643 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
1644                                             const RISCVSubtarget &Subtarget) {
1645   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1646                                           Subtarget);
1647 }
1648 
1649 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1650   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
1651 }
1652 
1653 // Grow V to consume an entire RVV register.
1654 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1655                                        const RISCVSubtarget &Subtarget) {
1656   assert(VT.isScalableVector() &&
1657          "Expected to convert into a scalable vector!");
1658   assert(V.getValueType().isFixedLengthVector() &&
1659          "Expected a fixed length vector operand!");
1660   SDLoc DL(V);
1661   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1662   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1663 }
1664 
1665 // Shrink V so it's just big enough to maintain a VT's worth of data.
1666 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1667                                          const RISCVSubtarget &Subtarget) {
1668   assert(VT.isFixedLengthVector() &&
1669          "Expected to convert into a fixed length vector!");
1670   assert(V.getValueType().isScalableVector() &&
1671          "Expected a scalable vector operand!");
1672   SDLoc DL(V);
1673   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1674   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1675 }
1676 
1677 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1678 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1679 // the vector type that it is contained in.
1680 static std::pair<SDValue, SDValue>
1681 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1682                 const RISCVSubtarget &Subtarget) {
1683   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1684   MVT XLenVT = Subtarget.getXLenVT();
1685   SDValue VL = VecVT.isFixedLengthVector()
1686                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1687                    : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1688   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1689   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1690   return {Mask, VL};
1691 }
1692 
1693 // As above but assuming the given type is a scalable vector type.
1694 static std::pair<SDValue, SDValue>
1695 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1696                         const RISCVSubtarget &Subtarget) {
1697   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1698   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1699 }
1700 
1701 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1702 // of either is (currently) supported. This can get us into an infinite loop
1703 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1704 // as a ..., etc.
1705 // Until either (or both) of these can reliably lower any node, reporting that
1706 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1707 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1708 // which is not desirable.
1709 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1710     EVT VT, unsigned DefinedValues) const {
1711   return false;
1712 }
1713 
1714 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1715   // Only splats are currently supported.
1716   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1717     return true;
1718 
1719   return false;
1720 }
1721 
1722 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
1723                                   const RISCVSubtarget &Subtarget) {
1724   // RISCV FP-to-int conversions saturate to the destination register size, but
1725   // don't produce 0 for nan. We can use a conversion instruction and fix the
1726   // nan case with a compare and a select.
1727   SDValue Src = Op.getOperand(0);
1728 
1729   EVT DstVT = Op.getValueType();
1730   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1731 
1732   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1733   unsigned Opc;
1734   if (SatVT == DstVT)
1735     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
1736   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1737     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
1738   else
1739     return SDValue();
1740   // FIXME: Support other SatVTs by clamping before or after the conversion.
1741 
1742   SDLoc DL(Op);
1743   SDValue FpToInt = DAG.getNode(
1744       Opc, DL, DstVT, Src,
1745       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
1746 
1747   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1748   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1749 }
1750 
1751 // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
1752 // and back. Taking care to avoid converting values that are nan or already
1753 // correct.
1754 // TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
1755 // have FRM dependencies modeled yet.
1756 static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
1757   MVT VT = Op.getSimpleValueType();
1758   assert(VT.isVector() && "Unexpected type");
1759 
1760   SDLoc DL(Op);
1761 
1762   // Freeze the source since we are increasing the number of uses.
1763   SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0));
1764 
1765   // Truncate to integer and convert back to FP.
1766   MVT IntVT = VT.changeVectorElementTypeToInteger();
1767   SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src);
1768   Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1769 
1770   MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1771 
1772   if (Op.getOpcode() == ISD::FCEIL) {
1773     // If the truncated value is the greater than or equal to the original
1774     // value, we've computed the ceil. Otherwise, we went the wrong way and
1775     // need to increase by 1.
1776     // FIXME: This should use a masked operation. Handle here or in isel?
1777     SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated,
1778                                  DAG.getConstantFP(1.0, DL, VT));
1779     SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT);
1780     Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1781   } else if (Op.getOpcode() == ISD::FFLOOR) {
1782     // If the truncated value is the less than or equal to the original value,
1783     // we've computed the floor. Otherwise, we went the wrong way and need to
1784     // decrease by 1.
1785     // FIXME: This should use a masked operation. Handle here or in isel?
1786     SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated,
1787                                  DAG.getConstantFP(1.0, DL, VT));
1788     SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT);
1789     Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1790   }
1791 
1792   // Restore the original sign so that -0.0 is preserved.
1793   Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1794 
1795   // Determine the largest integer that can be represented exactly. This and
1796   // values larger than it don't have any fractional bits so don't need to
1797   // be converted.
1798   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1799   unsigned Precision = APFloat::semanticsPrecision(FltSem);
1800   APFloat MaxVal = APFloat(FltSem);
1801   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1802                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1803   SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1804 
1805   // If abs(Src) was larger than MaxVal or nan, keep it.
1806   SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1807   SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1808   return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1809 }
1810 
1811 static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
1812                                  const RISCVSubtarget &Subtarget) {
1813   MVT VT = Op.getSimpleValueType();
1814   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1815 
1816   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1817 
1818   SDLoc DL(Op);
1819   SDValue Mask, VL;
1820   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1821 
1822   unsigned Opc =
1823       VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
1824   SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
1825   return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1826 }
1827 
1828 struct VIDSequence {
1829   int64_t StepNumerator;
1830   unsigned StepDenominator;
1831   int64_t Addend;
1832 };
1833 
1834 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1835 // to the (non-zero) step S and start value X. This can be then lowered as the
1836 // RVV sequence (VID * S) + X, for example.
1837 // The step S is represented as an integer numerator divided by a positive
1838 // denominator. Note that the implementation currently only identifies
1839 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1840 // cannot detect 2/3, for example.
1841 // Note that this method will also match potentially unappealing index
1842 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1843 // determine whether this is worth generating code for.
1844 static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
1845   unsigned NumElts = Op.getNumOperands();
1846   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1847   if (!Op.getValueType().isInteger())
1848     return None;
1849 
1850   Optional<unsigned> SeqStepDenom;
1851   Optional<int64_t> SeqStepNum, SeqAddend;
1852   Optional<std::pair<uint64_t, unsigned>> PrevElt;
1853   unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1854   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1855     // Assume undef elements match the sequence; we just have to be careful
1856     // when interpolating across them.
1857     if (Op.getOperand(Idx).isUndef())
1858       continue;
1859     // The BUILD_VECTOR must be all constants.
1860     if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1861       return None;
1862 
1863     uint64_t Val = Op.getConstantOperandVal(Idx) &
1864                    maskTrailingOnes<uint64_t>(EltSizeInBits);
1865 
1866     if (PrevElt) {
1867       // Calculate the step since the last non-undef element, and ensure
1868       // it's consistent across the entire sequence.
1869       unsigned IdxDiff = Idx - PrevElt->second;
1870       int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1871 
1872       // A zero-value value difference means that we're somewhere in the middle
1873       // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1874       // step change before evaluating the sequence.
1875       if (ValDiff != 0) {
1876         int64_t Remainder = ValDiff % IdxDiff;
1877         // Normalize the step if it's greater than 1.
1878         if (Remainder != ValDiff) {
1879           // The difference must cleanly divide the element span.
1880           if (Remainder != 0)
1881             return None;
1882           ValDiff /= IdxDiff;
1883           IdxDiff = 1;
1884         }
1885 
1886         if (!SeqStepNum)
1887           SeqStepNum = ValDiff;
1888         else if (ValDiff != SeqStepNum)
1889           return None;
1890 
1891         if (!SeqStepDenom)
1892           SeqStepDenom = IdxDiff;
1893         else if (IdxDiff != *SeqStepDenom)
1894           return None;
1895       }
1896     }
1897 
1898     // Record and/or check any addend.
1899     if (SeqStepNum && SeqStepDenom) {
1900       uint64_t ExpectedVal =
1901           (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1902       int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1903       if (!SeqAddend)
1904         SeqAddend = Addend;
1905       else if (SeqAddend != Addend)
1906         return None;
1907     }
1908 
1909     // Record this non-undef element for later.
1910     if (!PrevElt || PrevElt->first != Val)
1911       PrevElt = std::make_pair(Val, Idx);
1912   }
1913   // We need to have logged both a step and an addend for this to count as
1914   // a legal index sequence.
1915   if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
1916     return None;
1917 
1918   return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
1919 }
1920 
1921 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
1922                                  const RISCVSubtarget &Subtarget) {
1923   MVT VT = Op.getSimpleValueType();
1924   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1925 
1926   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1927 
1928   SDLoc DL(Op);
1929   SDValue Mask, VL;
1930   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1931 
1932   MVT XLenVT = Subtarget.getXLenVT();
1933   unsigned NumElts = Op.getNumOperands();
1934 
1935   if (VT.getVectorElementType() == MVT::i1) {
1936     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1937       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1938       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1939     }
1940 
1941     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1942       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1943       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1944     }
1945 
1946     // Lower constant mask BUILD_VECTORs via an integer vector type, in
1947     // scalar integer chunks whose bit-width depends on the number of mask
1948     // bits and XLEN.
1949     // First, determine the most appropriate scalar integer type to use. This
1950     // is at most XLenVT, but may be shrunk to a smaller vector element type
1951     // according to the size of the final vector - use i8 chunks rather than
1952     // XLenVT if we're producing a v8i1. This results in more consistent
1953     // codegen across RV32 and RV64.
1954     unsigned NumViaIntegerBits =
1955         std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1956     NumViaIntegerBits = std::min(NumViaIntegerBits,
1957                                  Subtarget.getMaxELENForFixedLengthVectors());
1958     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1959       // If we have to use more than one INSERT_VECTOR_ELT then this
1960       // optimization is likely to increase code size; avoid peforming it in
1961       // such a case. We can use a load from a constant pool in this case.
1962       if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1963         return SDValue();
1964       // Now we can create our integer vector type. Note that it may be larger
1965       // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1966       MVT IntegerViaVecVT =
1967           MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1968                            divideCeil(NumElts, NumViaIntegerBits));
1969 
1970       uint64_t Bits = 0;
1971       unsigned BitPos = 0, IntegerEltIdx = 0;
1972       SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1973 
1974       for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1975         // Once we accumulate enough bits to fill our scalar type, insert into
1976         // our vector and clear our accumulated data.
1977         if (I != 0 && I % NumViaIntegerBits == 0) {
1978           if (NumViaIntegerBits <= 32)
1979             Bits = SignExtend64(Bits, 32);
1980           SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1981           Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1982                             Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1983           Bits = 0;
1984           BitPos = 0;
1985           IntegerEltIdx++;
1986         }
1987         SDValue V = Op.getOperand(I);
1988         bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
1989         Bits |= ((uint64_t)BitValue << BitPos);
1990       }
1991 
1992       // Insert the (remaining) scalar value into position in our integer
1993       // vector type.
1994       if (NumViaIntegerBits <= 32)
1995         Bits = SignExtend64(Bits, 32);
1996       SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1997       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
1998                         DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1999 
2000       if (NumElts < NumViaIntegerBits) {
2001         // If we're producing a smaller vector than our minimum legal integer
2002         // type, bitcast to the equivalent (known-legal) mask type, and extract
2003         // our final mask.
2004         assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
2005         Vec = DAG.getBitcast(MVT::v8i1, Vec);
2006         Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
2007                           DAG.getConstant(0, DL, XLenVT));
2008       } else {
2009         // Else we must have produced an integer type with the same size as the
2010         // mask type; bitcast for the final result.
2011         assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
2012         Vec = DAG.getBitcast(VT, Vec);
2013       }
2014 
2015       return Vec;
2016     }
2017 
2018     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
2019     // vector type, we have a legal equivalently-sized i8 type, so we can use
2020     // that.
2021     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
2022     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
2023 
2024     SDValue WideVec;
2025     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2026       // For a splat, perform a scalar truncate before creating the wider
2027       // vector.
2028       assert(Splat.getValueType() == XLenVT &&
2029              "Unexpected type for i1 splat value");
2030       Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
2031                           DAG.getConstant(1, DL, XLenVT));
2032       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
2033     } else {
2034       SmallVector<SDValue, 8> Ops(Op->op_values());
2035       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
2036       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
2037       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
2038     }
2039 
2040     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
2041   }
2042 
2043   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2044     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
2045                                         : RISCVISD::VMV_V_X_VL;
2046     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
2047     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2048   }
2049 
2050   // Try and match index sequences, which we can lower to the vid instruction
2051   // with optional modifications. An all-undef vector is matched by
2052   // getSplatValue, above.
2053   if (auto SimpleVID = isSimpleVIDSequence(Op)) {
2054     int64_t StepNumerator = SimpleVID->StepNumerator;
2055     unsigned StepDenominator = SimpleVID->StepDenominator;
2056     int64_t Addend = SimpleVID->Addend;
2057 
2058     assert(StepNumerator != 0 && "Invalid step");
2059     bool Negate = false;
2060     int64_t SplatStepVal = StepNumerator;
2061     unsigned StepOpcode = ISD::MUL;
2062     if (StepNumerator != 1) {
2063       if (isPowerOf2_64(std::abs(StepNumerator))) {
2064         Negate = StepNumerator < 0;
2065         StepOpcode = ISD::SHL;
2066         SplatStepVal = Log2_64(std::abs(StepNumerator));
2067       }
2068     }
2069 
2070     // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
2071     // threshold since it's the immediate value many RVV instructions accept.
2072     // There is no vmul.vi instruction so ensure multiply constant can fit in
2073     // a single addi instruction.
2074     if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
2075          (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
2076         isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) {
2077       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
2078       // Convert right out of the scalable type so we can use standard ISD
2079       // nodes for the rest of the computation. If we used scalable types with
2080       // these, we'd lose the fixed-length vector info and generate worse
2081       // vsetvli code.
2082       VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
2083       if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
2084           (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
2085         SDValue SplatStep = DAG.getSplatVector(
2086             VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
2087         VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
2088       }
2089       if (StepDenominator != 1) {
2090         SDValue SplatStep = DAG.getSplatVector(
2091             VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
2092         VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
2093       }
2094       if (Addend != 0 || Negate) {
2095         SDValue SplatAddend =
2096             DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
2097         VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
2098       }
2099       return VID;
2100     }
2101   }
2102 
2103   // Attempt to detect "hidden" splats, which only reveal themselves as splats
2104   // when re-interpreted as a vector with a larger element type. For example,
2105   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
2106   // could be instead splat as
2107   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
2108   // TODO: This optimization could also work on non-constant splats, but it
2109   // would require bit-manipulation instructions to construct the splat value.
2110   SmallVector<SDValue> Sequence;
2111   unsigned EltBitSize = VT.getScalarSizeInBits();
2112   const auto *BV = cast<BuildVectorSDNode>(Op);
2113   if (VT.isInteger() && EltBitSize < 64 &&
2114       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
2115       BV->getRepeatedSequence(Sequence) &&
2116       (Sequence.size() * EltBitSize) <= 64) {
2117     unsigned SeqLen = Sequence.size();
2118     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
2119     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
2120     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
2121             ViaIntVT == MVT::i64) &&
2122            "Unexpected sequence type");
2123 
2124     unsigned EltIdx = 0;
2125     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2126     uint64_t SplatValue = 0;
2127     // Construct the amalgamated value which can be splatted as this larger
2128     // vector type.
2129     for (const auto &SeqV : Sequence) {
2130       if (!SeqV.isUndef())
2131         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2132                        << (EltIdx * EltBitSize));
2133       EltIdx++;
2134     }
2135 
2136     // On RV64, sign-extend from 32 to 64 bits where possible in order to
2137     // achieve better constant materializion.
2138     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
2139       SplatValue = SignExtend64(SplatValue, 32);
2140 
2141     // Since we can't introduce illegal i64 types at this stage, we can only
2142     // perform an i64 splat on RV32 if it is its own sign-extended value. That
2143     // way we can use RVV instructions to splat.
2144     assert((ViaIntVT.bitsLE(XLenVT) ||
2145             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
2146            "Unexpected bitcast sequence");
2147     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2148       SDValue ViaVL =
2149           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
2150       MVT ViaContainerVT =
2151           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
2152       SDValue Splat =
2153           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
2154                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
2155       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
2156       return DAG.getBitcast(VT, Splat);
2157     }
2158   }
2159 
2160   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
2161   // which constitute a large proportion of the elements. In such cases we can
2162   // splat a vector with the dominant element and make up the shortfall with
2163   // INSERT_VECTOR_ELTs.
2164   // Note that this includes vectors of 2 elements by association. The
2165   // upper-most element is the "dominant" one, allowing us to use a splat to
2166   // "insert" the upper element, and an insert of the lower element at position
2167   // 0, which improves codegen.
2168   SDValue DominantValue;
2169   unsigned MostCommonCount = 0;
2170   DenseMap<SDValue, unsigned> ValueCounts;
2171   unsigned NumUndefElts =
2172       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
2173 
2174   // Track the number of scalar loads we know we'd be inserting, estimated as
2175   // any non-zero floating-point constant. Other kinds of element are either
2176   // already in registers or are materialized on demand. The threshold at which
2177   // a vector load is more desirable than several scalar materializion and
2178   // vector-insertion instructions is not known.
2179   unsigned NumScalarLoads = 0;
2180 
2181   for (SDValue V : Op->op_values()) {
2182     if (V.isUndef())
2183       continue;
2184 
2185     ValueCounts.insert(std::make_pair(V, 0));
2186     unsigned &Count = ValueCounts[V];
2187 
2188     if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2189       NumScalarLoads += !CFP->isExactlyValue(+0.0);
2190 
2191     // Is this value dominant? In case of a tie, prefer the highest element as
2192     // it's cheaper to insert near the beginning of a vector than it is at the
2193     // end.
2194     if (++Count >= MostCommonCount) {
2195       DominantValue = V;
2196       MostCommonCount = Count;
2197     }
2198   }
2199 
2200   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
2201   unsigned NumDefElts = NumElts - NumUndefElts;
2202   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2203 
2204   // Don't perform this optimization when optimizing for size, since
2205   // materializing elements and inserting them tends to cause code bloat.
2206   if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
2207       ((MostCommonCount > DominantValueCountThreshold) ||
2208        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
2209     // Start by splatting the most common element.
2210     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
2211 
2212     DenseSet<SDValue> Processed{DominantValue};
2213     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
2214     for (const auto &OpIdx : enumerate(Op->ops())) {
2215       const SDValue &V = OpIdx.value();
2216       if (V.isUndef() || !Processed.insert(V).second)
2217         continue;
2218       if (ValueCounts[V] == 1) {
2219         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
2220                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
2221       } else {
2222         // Blend in all instances of this value using a VSELECT, using a
2223         // mask where each bit signals whether that element is the one
2224         // we're after.
2225         SmallVector<SDValue> Ops;
2226         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
2227           return DAG.getConstant(V == V1, DL, XLenVT);
2228         });
2229         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
2230                           DAG.getBuildVector(SelMaskTy, DL, Ops),
2231                           DAG.getSplatBuildVector(VT, DL, V), Vec);
2232       }
2233     }
2234 
2235     return Vec;
2236   }
2237 
2238   return SDValue();
2239 }
2240 
2241 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
2242                                    SDValue Hi, SDValue VL, SelectionDAG &DAG) {
2243   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2244     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2245     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2246     // If Hi constant is all the same sign bit as Lo, lower this as a custom
2247     // node in order to try and match RVV vector/scalar instructions.
2248     if ((LoC >> 31) == HiC)
2249       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
2250 
2251     // If vl is equal to VLMax and Hi constant is equal to Lo, we could use
2252     // vmv.v.x whose EEW = 32 to lower it.
2253     auto *Const = dyn_cast<ConstantSDNode>(VL);
2254     if (LoC == HiC && Const && Const->getSExtValue() == RISCV::VLMaxSentinel) {
2255       MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
2256       // TODO: if vl <= min(VLMAX), we can also do this. But we could not
2257       // access the subtarget here now.
2258       auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, Lo, VL);
2259       return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
2260     }
2261   }
2262 
2263   // Fall back to a stack store and stride x0 vector load.
2264   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
2265 }
2266 
2267 // Called by type legalization to handle splat of i64 on RV32.
2268 // FIXME: We can optimize this when the type has sign or zero bits in one
2269 // of the halves.
2270 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
2271                                    SDValue VL, SelectionDAG &DAG) {
2272   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2273   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2274                            DAG.getConstant(0, DL, MVT::i32));
2275   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2276                            DAG.getConstant(1, DL, MVT::i32));
2277   return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
2278 }
2279 
2280 // This function lowers a splat of a scalar operand Splat with the vector
2281 // length VL. It ensures the final sequence is type legal, which is useful when
2282 // lowering a splat after type legalization.
2283 static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
2284                                 SelectionDAG &DAG,
2285                                 const RISCVSubtarget &Subtarget) {
2286   if (VT.isFloatingPoint()) {
2287     // If VL is 1, we could use vfmv.s.f.
2288     if (isOneConstant(VL))
2289       return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT),
2290                          Scalar, VL);
2291     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
2292   }
2293 
2294   MVT XLenVT = Subtarget.getXLenVT();
2295 
2296   // Simplest case is that the operand needs to be promoted to XLenVT.
2297   if (Scalar.getValueType().bitsLE(XLenVT)) {
2298     // If the operand is a constant, sign extend to increase our chances
2299     // of being able to use a .vi instruction. ANY_EXTEND would become a
2300     // a zero extend and the simm5 check in isel would fail.
2301     // FIXME: Should we ignore the upper bits in isel instead?
2302     unsigned ExtOpc =
2303         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2304     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2305     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
2306     // If VL is 1 and the scalar value won't benefit from immediate, we could
2307     // use vmv.s.x.
2308     if (isOneConstant(VL) &&
2309         (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2310       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
2311                          VL);
2312     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
2313   }
2314 
2315   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2316          "Unexpected scalar for splat lowering!");
2317 
2318   if (isOneConstant(VL) && isNullConstant(Scalar))
2319     return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT),
2320                        DAG.getConstant(0, DL, XLenVT), VL);
2321 
2322   // Otherwise use the more complicated splatting algorithm.
2323   return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
2324 }
2325 
2326 // Is the mask a slidedown that shifts in undefs.
2327 static int matchShuffleAsSlideDown(ArrayRef<int> Mask) {
2328   int Size = Mask.size();
2329 
2330   // Elements shifted in should be undef.
2331   auto CheckUndefs = [&](int Shift) {
2332     for (int i = Size - Shift; i != Size; ++i)
2333       if (Mask[i] >= 0)
2334         return false;
2335     return true;
2336   };
2337 
2338   // Elements should be shifted or undef.
2339   auto MatchShift = [&](int Shift) {
2340     for (int i = 0; i != Size - Shift; ++i)
2341        if (Mask[i] >= 0 && Mask[i] != Shift + i)
2342          return false;
2343     return true;
2344   };
2345 
2346   // Try all possible shifts.
2347   for (int Shift = 1; Shift != Size; ++Shift)
2348     if (CheckUndefs(Shift) && MatchShift(Shift))
2349       return Shift;
2350 
2351   // No match.
2352   return -1;
2353 }
2354 
2355 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
2356                                 const RISCVSubtarget &Subtarget) {
2357   // We need to be able to widen elements to the next larger integer type.
2358   if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors())
2359     return false;
2360 
2361   int Size = Mask.size();
2362   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
2363 
2364   int Srcs[] = {-1, -1};
2365   for (int i = 0; i != Size; ++i) {
2366     // Ignore undef elements.
2367     if (Mask[i] < 0)
2368       continue;
2369 
2370     // Is this an even or odd element.
2371     int Pol = i % 2;
2372 
2373     // Ensure we consistently use the same source for this element polarity.
2374     int Src = Mask[i] / Size;
2375     if (Srcs[Pol] < 0)
2376       Srcs[Pol] = Src;
2377     if (Srcs[Pol] != Src)
2378       return false;
2379 
2380     // Make sure the element within the source is appropriate for this element
2381     // in the destination.
2382     int Elt = Mask[i] % Size;
2383     if (Elt != i / 2)
2384       return false;
2385   }
2386 
2387   // We need to find a source for each polarity and they can't be the same.
2388   if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
2389     return false;
2390 
2391   // Swap the sources if the second source was in the even polarity.
2392   SwapSources = Srcs[0] > Srcs[1];
2393 
2394   return true;
2395 }
2396 
2397 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
2398                                    const RISCVSubtarget &Subtarget) {
2399   SDValue V1 = Op.getOperand(0);
2400   SDValue V2 = Op.getOperand(1);
2401   SDLoc DL(Op);
2402   MVT XLenVT = Subtarget.getXLenVT();
2403   MVT VT = Op.getSimpleValueType();
2404   unsigned NumElts = VT.getVectorNumElements();
2405   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
2406 
2407   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2408 
2409   SDValue TrueMask, VL;
2410   std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2411 
2412   if (SVN->isSplat()) {
2413     const int Lane = SVN->getSplatIndex();
2414     if (Lane >= 0) {
2415       MVT SVT = VT.getVectorElementType();
2416 
2417       // Turn splatted vector load into a strided load with an X0 stride.
2418       SDValue V = V1;
2419       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
2420       // with undef.
2421       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
2422       int Offset = Lane;
2423       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
2424         int OpElements =
2425             V.getOperand(0).getSimpleValueType().getVectorNumElements();
2426         V = V.getOperand(Offset / OpElements);
2427         Offset %= OpElements;
2428       }
2429 
2430       // We need to ensure the load isn't atomic or volatile.
2431       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
2432         auto *Ld = cast<LoadSDNode>(V);
2433         Offset *= SVT.getStoreSize();
2434         SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2435                                                    TypeSize::Fixed(Offset), DL);
2436 
2437         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2438         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2439           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2440           SDValue IntID =
2441               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2442           SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
2443                            DAG.getRegister(RISCV::X0, XLenVT), VL};
2444           SDValue NewLoad = DAG.getMemIntrinsicNode(
2445               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2446               DAG.getMachineFunction().getMachineMemOperand(
2447                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2448           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2449           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2450         }
2451 
2452         // Otherwise use a scalar load and splat. This will give the best
2453         // opportunity to fold a splat into the operation. ISel can turn it into
2454         // the x0 strided load if we aren't able to fold away the select.
2455         if (SVT.isFloatingPoint())
2456           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2457                           Ld->getPointerInfo().getWithOffset(Offset),
2458                           Ld->getOriginalAlign(),
2459                           Ld->getMemOperand()->getFlags());
2460         else
2461           V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2462                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
2463                              Ld->getOriginalAlign(),
2464                              Ld->getMemOperand()->getFlags());
2465         DAG.makeEquivalentMemoryOrdering(Ld, V);
2466 
2467         unsigned Opc =
2468             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
2469         SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
2470         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2471       }
2472 
2473       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2474       assert(Lane < (int)NumElts && "Unexpected lane!");
2475       SDValue Gather =
2476           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
2477                       DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
2478       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2479     }
2480   }
2481 
2482   ArrayRef<int> Mask = SVN->getMask();
2483 
2484   // Try to match as a slidedown.
2485   int SlideAmt = matchShuffleAsSlideDown(Mask);
2486   if (SlideAmt >= 0) {
2487     // TODO: Should we reduce the VL to account for the upper undef elements?
2488     // Requires additional vsetvlis, but might be faster to execute.
2489     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2490     SDValue SlideDown =
2491         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2492                     DAG.getUNDEF(ContainerVT), V1,
2493                     DAG.getConstant(SlideAmt, DL, XLenVT),
2494                     TrueMask, VL);
2495     return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
2496   }
2497 
2498   // Detect an interleave shuffle and lower to
2499   // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
2500   bool SwapSources;
2501   if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
2502     // Swap sources if needed.
2503     if (SwapSources)
2504       std::swap(V1, V2);
2505 
2506     // Extract the lower half of the vectors.
2507     MVT HalfVT = VT.getHalfNumVectorElementsVT();
2508     V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
2509                      DAG.getConstant(0, DL, XLenVT));
2510     V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
2511                      DAG.getConstant(0, DL, XLenVT));
2512 
2513     // Double the element width and halve the number of elements in an int type.
2514     unsigned EltBits = VT.getScalarSizeInBits();
2515     MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
2516     MVT WideIntVT =
2517         MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
2518     // Convert this to a scalable vector. We need to base this on the
2519     // destination size to ensure there's always a type with a smaller LMUL.
2520     MVT WideIntContainerVT =
2521         getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
2522 
2523     // Convert sources to scalable vectors with the same element count as the
2524     // larger type.
2525     MVT HalfContainerVT = MVT::getVectorVT(
2526         VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
2527     V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
2528     V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
2529 
2530     // Cast sources to integer.
2531     MVT IntEltVT = MVT::getIntegerVT(EltBits);
2532     MVT IntHalfVT =
2533         MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
2534     V1 = DAG.getBitcast(IntHalfVT, V1);
2535     V2 = DAG.getBitcast(IntHalfVT, V2);
2536 
2537     // Freeze V2 since we use it twice and we need to be sure that the add and
2538     // multiply see the same value.
2539     V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2);
2540 
2541     // Recreate TrueMask using the widened type's element count.
2542     MVT MaskVT =
2543         MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount());
2544     TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2545 
2546     // Widen V1 and V2 with 0s and add one copy of V2 to V1.
2547     SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
2548                               V2, TrueMask, VL);
2549     // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
2550     SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
2551                                      DAG.getAllOnesConstant(DL, XLenVT));
2552     SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
2553                                    V2, Multiplier, TrueMask, VL);
2554     // Add the new copies to our previous addition giving us 2^eltbits copies of
2555     // V2. This is equivalent to shifting V2 left by eltbits. This should
2556     // combine with the vwmulu.vv above to form vwmaccu.vv.
2557     Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
2558                       TrueMask, VL);
2559     // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
2560     // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
2561     // vector VT.
2562     ContainerVT =
2563         MVT::getVectorVT(VT.getVectorElementType(),
2564                          WideIntContainerVT.getVectorElementCount() * 2);
2565     Add = DAG.getBitcast(ContainerVT, Add);
2566     return convertFromScalableVector(VT, Add, DAG, Subtarget);
2567   }
2568 
2569   // Detect shuffles which can be re-expressed as vector selects; these are
2570   // shuffles in which each element in the destination is taken from an element
2571   // at the corresponding index in either source vectors.
2572   bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
2573     int MaskIndex = MaskIdx.value();
2574     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2575   });
2576 
2577   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2578 
2579   SmallVector<SDValue> MaskVals;
2580   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2581   // merged with a second vrgather.
2582   SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2583 
2584   // By default we preserve the original operand order, and use a mask to
2585   // select LHS as true and RHS as false. However, since RVV vector selects may
2586   // feature splats but only on the LHS, we may choose to invert our mask and
2587   // instead select between RHS and LHS.
2588   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2589   bool InvertMask = IsSelect == SwapOps;
2590 
2591   // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2592   // half.
2593   DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2594 
2595   // Now construct the mask that will be used by the vselect or blended
2596   // vrgather operation. For vrgathers, construct the appropriate indices into
2597   // each vector.
2598   for (int MaskIndex : Mask) {
2599     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2600     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2601     if (!IsSelect) {
2602       bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2603       GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2604                                      ? DAG.getConstant(MaskIndex, DL, XLenVT)
2605                                      : DAG.getUNDEF(XLenVT));
2606       GatherIndicesRHS.push_back(
2607           IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2608                             : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2609       if (IsLHSOrUndefIndex && MaskIndex >= 0)
2610         ++LHSIndexCounts[MaskIndex];
2611       if (!IsLHSOrUndefIndex)
2612         ++RHSIndexCounts[MaskIndex - NumElts];
2613     }
2614   }
2615 
2616   if (SwapOps) {
2617     std::swap(V1, V2);
2618     std::swap(GatherIndicesLHS, GatherIndicesRHS);
2619   }
2620 
2621   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2622   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2623   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2624 
2625   if (IsSelect)
2626     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2627 
2628   if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2629     // On such a large vector we're unable to use i8 as the index type.
2630     // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2631     // may involve vector splitting if we're already at LMUL=8, or our
2632     // user-supplied maximum fixed-length LMUL.
2633     return SDValue();
2634   }
2635 
2636   unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2637   unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2638   MVT IndexVT = VT.changeTypeToInteger();
2639   // Since we can't introduce illegal index types at this stage, use i16 and
2640   // vrgatherei16 if the corresponding index type for plain vrgather is greater
2641   // than XLenVT.
2642   if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2643     GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2644     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2645   }
2646 
2647   MVT IndexContainerVT =
2648       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2649 
2650   SDValue Gather;
2651   // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2652   // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2653   if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2654     Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
2655   } else {
2656     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2657     // If only one index is used, we can use a "splat" vrgather.
2658     // TODO: We can splat the most-common index and fix-up any stragglers, if
2659     // that's beneficial.
2660     if (LHSIndexCounts.size() == 1) {
2661       int SplatIndex = LHSIndexCounts.begin()->getFirst();
2662       Gather =
2663           DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2664                       DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2665     } else {
2666       SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2667       LHSIndices =
2668           convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2669 
2670       Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2671                            TrueMask, VL);
2672     }
2673   }
2674 
2675   // If a second vector operand is used by this shuffle, blend it in with an
2676   // additional vrgather.
2677   if (!V2.isUndef()) {
2678     V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2679     // If only one index is used, we can use a "splat" vrgather.
2680     // TODO: We can splat the most-common index and fix-up any stragglers, if
2681     // that's beneficial.
2682     if (RHSIndexCounts.size() == 1) {
2683       int SplatIndex = RHSIndexCounts.begin()->getFirst();
2684       V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2685                        DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2686     } else {
2687       SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2688       RHSIndices =
2689           convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2690       V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
2691                        VL);
2692     }
2693 
2694     MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2695     SelectMask =
2696         convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2697 
2698     Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
2699                          Gather, VL);
2700   }
2701 
2702   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2703 }
2704 
2705 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
2706                                      SDLoc DL, SelectionDAG &DAG,
2707                                      const RISCVSubtarget &Subtarget) {
2708   if (VT.isScalableVector())
2709     return DAG.getFPExtendOrRound(Op, DL, VT);
2710   assert(VT.isFixedLengthVector() &&
2711          "Unexpected value type for RVV FP extend/round lowering");
2712   SDValue Mask, VL;
2713   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2714   unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
2715                         ? RISCVISD::FP_EXTEND_VL
2716                         : RISCVISD::FP_ROUND_VL;
2717   return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
2718 }
2719 
2720 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
2721 // the exponent.
2722 static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
2723   MVT VT = Op.getSimpleValueType();
2724   unsigned EltSize = VT.getScalarSizeInBits();
2725   SDValue Src = Op.getOperand(0);
2726   SDLoc DL(Op);
2727 
2728   // We need a FP type that can represent the value.
2729   // TODO: Use f16 for i8 when possible?
2730   MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
2731   MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
2732 
2733   // Legal types should have been checked in the RISCVTargetLowering
2734   // constructor.
2735   // TODO: Splitting may make sense in some cases.
2736   assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
2737          "Expected legal float type!");
2738 
2739   // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
2740   // The trailing zero count is equal to log2 of this single bit value.
2741   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
2742     SDValue Neg =
2743         DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
2744     Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
2745   }
2746 
2747   // We have a legal FP type, convert to it.
2748   SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
2749   // Bitcast to integer and shift the exponent to the LSB.
2750   EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
2751   SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
2752   unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
2753   SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
2754                               DAG.getConstant(ShiftAmt, DL, IntVT));
2755   // Truncate back to original type to allow vnsrl.
2756   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
2757   // The exponent contains log2 of the value in biased form.
2758   unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
2759 
2760   // For trailing zeros, we just need to subtract the bias.
2761   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
2762     return DAG.getNode(ISD::SUB, DL, VT, Trunc,
2763                        DAG.getConstant(ExponentBias, DL, VT));
2764 
2765   // For leading zeros, we need to remove the bias and convert from log2 to
2766   // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
2767   unsigned Adjust = ExponentBias + (EltSize - 1);
2768   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
2769 }
2770 
2771 // While RVV has alignment restrictions, we should always be able to load as a
2772 // legal equivalently-sized byte-typed vector instead. This method is
2773 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2774 // the load is already correctly-aligned, it returns SDValue().
2775 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2776                                                     SelectionDAG &DAG) const {
2777   auto *Load = cast<LoadSDNode>(Op);
2778   assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2779 
2780   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
2781                                      Load->getMemoryVT(),
2782                                      *Load->getMemOperand()))
2783     return SDValue();
2784 
2785   SDLoc DL(Op);
2786   MVT VT = Op.getSimpleValueType();
2787   unsigned EltSizeBits = VT.getScalarSizeInBits();
2788   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2789          "Unexpected unaligned RVV load type");
2790   MVT NewVT =
2791       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2792   assert(NewVT.isValid() &&
2793          "Expecting equally-sized RVV vector types to be legal");
2794   SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2795                           Load->getPointerInfo(), Load->getOriginalAlign(),
2796                           Load->getMemOperand()->getFlags());
2797   return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2798 }
2799 
2800 // While RVV has alignment restrictions, we should always be able to store as a
2801 // legal equivalently-sized byte-typed vector instead. This method is
2802 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2803 // returns SDValue() if the store is already correctly aligned.
2804 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2805                                                      SelectionDAG &DAG) const {
2806   auto *Store = cast<StoreSDNode>(Op);
2807   assert(Store && Store->getValue().getValueType().isVector() &&
2808          "Expected vector store");
2809 
2810   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
2811                                      Store->getMemoryVT(),
2812                                      *Store->getMemOperand()))
2813     return SDValue();
2814 
2815   SDLoc DL(Op);
2816   SDValue StoredVal = Store->getValue();
2817   MVT VT = StoredVal.getSimpleValueType();
2818   unsigned EltSizeBits = VT.getScalarSizeInBits();
2819   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2820          "Unexpected unaligned RVV store type");
2821   MVT NewVT =
2822       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2823   assert(NewVT.isValid() &&
2824          "Expecting equally-sized RVV vector types to be legal");
2825   StoredVal = DAG.getBitcast(NewVT, StoredVal);
2826   return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2827                       Store->getPointerInfo(), Store->getOriginalAlign(),
2828                       Store->getMemOperand()->getFlags());
2829 }
2830 
2831 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
2832                                             SelectionDAG &DAG) const {
2833   switch (Op.getOpcode()) {
2834   default:
2835     report_fatal_error("unimplemented operand");
2836   case ISD::GlobalAddress:
2837     return lowerGlobalAddress(Op, DAG);
2838   case ISD::BlockAddress:
2839     return lowerBlockAddress(Op, DAG);
2840   case ISD::ConstantPool:
2841     return lowerConstantPool(Op, DAG);
2842   case ISD::JumpTable:
2843     return lowerJumpTable(Op, DAG);
2844   case ISD::GlobalTLSAddress:
2845     return lowerGlobalTLSAddress(Op, DAG);
2846   case ISD::SELECT:
2847     return lowerSELECT(Op, DAG);
2848   case ISD::BRCOND:
2849     return lowerBRCOND(Op, DAG);
2850   case ISD::VASTART:
2851     return lowerVASTART(Op, DAG);
2852   case ISD::FRAMEADDR:
2853     return lowerFRAMEADDR(Op, DAG);
2854   case ISD::RETURNADDR:
2855     return lowerRETURNADDR(Op, DAG);
2856   case ISD::SHL_PARTS:
2857     return lowerShiftLeftParts(Op, DAG);
2858   case ISD::SRA_PARTS:
2859     return lowerShiftRightParts(Op, DAG, true);
2860   case ISD::SRL_PARTS:
2861     return lowerShiftRightParts(Op, DAG, false);
2862   case ISD::BITCAST: {
2863     SDLoc DL(Op);
2864     EVT VT = Op.getValueType();
2865     SDValue Op0 = Op.getOperand(0);
2866     EVT Op0VT = Op0.getValueType();
2867     MVT XLenVT = Subtarget.getXLenVT();
2868     if (VT.isFixedLengthVector()) {
2869       // We can handle fixed length vector bitcasts with a simple replacement
2870       // in isel.
2871       if (Op0VT.isFixedLengthVector())
2872         return Op;
2873       // When bitcasting from scalar to fixed-length vector, insert the scalar
2874       // into a one-element vector of the result type, and perform a vector
2875       // bitcast.
2876       if (!Op0VT.isVector()) {
2877         EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2878         if (!isTypeLegal(BVT))
2879           return SDValue();
2880         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2881                                               DAG.getUNDEF(BVT), Op0,
2882                                               DAG.getConstant(0, DL, XLenVT)));
2883       }
2884       return SDValue();
2885     }
2886     // Custom-legalize bitcasts from fixed-length vector types to scalar types
2887     // thus: bitcast the vector to a one-element vector type whose element type
2888     // is the same as the result type, and extract the first element.
2889     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2890       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
2891       if (!isTypeLegal(BVT))
2892         return SDValue();
2893       SDValue BVec = DAG.getBitcast(BVT, Op0);
2894       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
2895                          DAG.getConstant(0, DL, XLenVT));
2896     }
2897     if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
2898       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
2899       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
2900       return FPConv;
2901     }
2902     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
2903         Subtarget.hasStdExtF()) {
2904       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2905       SDValue FPConv =
2906           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
2907       return FPConv;
2908     }
2909     return SDValue();
2910   }
2911   case ISD::INTRINSIC_WO_CHAIN:
2912     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2913   case ISD::INTRINSIC_W_CHAIN:
2914     return LowerINTRINSIC_W_CHAIN(Op, DAG);
2915   case ISD::INTRINSIC_VOID:
2916     return LowerINTRINSIC_VOID(Op, DAG);
2917   case ISD::BSWAP:
2918   case ISD::BITREVERSE: {
2919     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
2920     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2921     MVT VT = Op.getSimpleValueType();
2922     SDLoc DL(Op);
2923     // Start with the maximum immediate value which is the bitwidth - 1.
2924     unsigned Imm = VT.getSizeInBits() - 1;
2925     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
2926     if (Op.getOpcode() == ISD::BSWAP)
2927       Imm &= ~0x7U;
2928     return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
2929                        DAG.getConstant(Imm, DL, VT));
2930   }
2931   case ISD::FSHL:
2932   case ISD::FSHR: {
2933     MVT VT = Op.getSimpleValueType();
2934     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
2935     SDLoc DL(Op);
2936     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
2937     // use log(XLen) bits. Mask the shift amount accordingly to prevent
2938     // accidentally setting the extra bit.
2939     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
2940     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
2941                                 DAG.getConstant(ShAmtWidth, DL, VT));
2942     // fshl and fshr concatenate their operands in the same order. fsr and fsl
2943     // instruction use different orders. fshl will return its first operand for
2944     // shift of zero, fshr will return its second operand. fsl and fsr both
2945     // return rs1 so the ISD nodes need to have different operand orders.
2946     // Shift amount is in rs2.
2947     SDValue Op0 = Op.getOperand(0);
2948     SDValue Op1 = Op.getOperand(1);
2949     unsigned Opc = RISCVISD::FSL;
2950     if (Op.getOpcode() == ISD::FSHR) {
2951       std::swap(Op0, Op1);
2952       Opc = RISCVISD::FSR;
2953     }
2954     return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
2955   }
2956   case ISD::TRUNCATE: {
2957     SDLoc DL(Op);
2958     MVT VT = Op.getSimpleValueType();
2959     // Only custom-lower vector truncates
2960     if (!VT.isVector())
2961       return Op;
2962 
2963     // Truncates to mask types are handled differently
2964     if (VT.getVectorElementType() == MVT::i1)
2965       return lowerVectorMaskTrunc(Op, DAG);
2966 
2967     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
2968     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
2969     // truncate by one power of two at a time.
2970     MVT DstEltVT = VT.getVectorElementType();
2971 
2972     SDValue Src = Op.getOperand(0);
2973     MVT SrcVT = Src.getSimpleValueType();
2974     MVT SrcEltVT = SrcVT.getVectorElementType();
2975 
2976     assert(DstEltVT.bitsLT(SrcEltVT) &&
2977            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
2978            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
2979            "Unexpected vector truncate lowering");
2980 
2981     MVT ContainerVT = SrcVT;
2982     if (SrcVT.isFixedLengthVector()) {
2983       ContainerVT = getContainerForFixedLengthVector(SrcVT);
2984       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2985     }
2986 
2987     SDValue Result = Src;
2988     SDValue Mask, VL;
2989     std::tie(Mask, VL) =
2990         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
2991     LLVMContext &Context = *DAG.getContext();
2992     const ElementCount Count = ContainerVT.getVectorElementCount();
2993     do {
2994       SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2995       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
2996       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
2997                            Mask, VL);
2998     } while (SrcEltVT != DstEltVT);
2999 
3000     if (SrcVT.isFixedLengthVector())
3001       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
3002 
3003     return Result;
3004   }
3005   case ISD::ANY_EXTEND:
3006   case ISD::ZERO_EXTEND:
3007     if (Op.getOperand(0).getValueType().isVector() &&
3008         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3009       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
3010     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
3011   case ISD::SIGN_EXTEND:
3012     if (Op.getOperand(0).getValueType().isVector() &&
3013         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3014       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
3015     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
3016   case ISD::SPLAT_VECTOR_PARTS:
3017     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
3018   case ISD::INSERT_VECTOR_ELT:
3019     return lowerINSERT_VECTOR_ELT(Op, DAG);
3020   case ISD::EXTRACT_VECTOR_ELT:
3021     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
3022   case ISD::VSCALE: {
3023     MVT VT = Op.getSimpleValueType();
3024     SDLoc DL(Op);
3025     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
3026     // We define our scalable vector types for lmul=1 to use a 64 bit known
3027     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
3028     // vscale as VLENB / 8.
3029     static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
3030     if (isa<ConstantSDNode>(Op.getOperand(0))) {
3031       // We assume VLENB is a multiple of 8. We manually choose the best shift
3032       // here because SimplifyDemandedBits isn't always able to simplify it.
3033       uint64_t Val = Op.getConstantOperandVal(0);
3034       if (isPowerOf2_64(Val)) {
3035         uint64_t Log2 = Log2_64(Val);
3036         if (Log2 < 3)
3037           return DAG.getNode(ISD::SRL, DL, VT, VLENB,
3038                              DAG.getConstant(3 - Log2, DL, VT));
3039         if (Log2 > 3)
3040           return DAG.getNode(ISD::SHL, DL, VT, VLENB,
3041                              DAG.getConstant(Log2 - 3, DL, VT));
3042         return VLENB;
3043       }
3044       // If the multiplier is a multiple of 8, scale it down to avoid needing
3045       // to shift the VLENB value.
3046       if ((Val % 8) == 0)
3047         return DAG.getNode(ISD::MUL, DL, VT, VLENB,
3048                            DAG.getConstant(Val / 8, DL, VT));
3049     }
3050 
3051     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
3052                                  DAG.getConstant(3, DL, VT));
3053     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
3054   }
3055   case ISD::FPOWI: {
3056     // Custom promote f16 powi with illegal i32 integer type on RV64. Once
3057     // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
3058     if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
3059         Op.getOperand(1).getValueType() == MVT::i32) {
3060       SDLoc DL(Op);
3061       SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
3062       SDValue Powi =
3063           DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
3064       return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
3065                          DAG.getIntPtrConstant(0, DL));
3066     }
3067     return SDValue();
3068   }
3069   case ISD::FP_EXTEND: {
3070     // RVV can only do fp_extend to types double the size as the source. We
3071     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
3072     // via f32.
3073     SDLoc DL(Op);
3074     MVT VT = Op.getSimpleValueType();
3075     SDValue Src = Op.getOperand(0);
3076     MVT SrcVT = Src.getSimpleValueType();
3077 
3078     // Prepare any fixed-length vector operands.
3079     MVT ContainerVT = VT;
3080     if (SrcVT.isFixedLengthVector()) {
3081       ContainerVT = getContainerForFixedLengthVector(VT);
3082       MVT SrcContainerVT =
3083           ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
3084       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3085     }
3086 
3087     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
3088         SrcVT.getVectorElementType() != MVT::f16) {
3089       // For scalable vectors, we only need to close the gap between
3090       // vXf16->vXf64.
3091       if (!VT.isFixedLengthVector())
3092         return Op;
3093       // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
3094       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
3095       return convertFromScalableVector(VT, Src, DAG, Subtarget);
3096     }
3097 
3098     MVT InterVT = VT.changeVectorElementType(MVT::f32);
3099     MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
3100     SDValue IntermediateExtend = getRVVFPExtendOrRound(
3101         Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
3102 
3103     SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
3104                                            DL, DAG, Subtarget);
3105     if (VT.isFixedLengthVector())
3106       return convertFromScalableVector(VT, Extend, DAG, Subtarget);
3107     return Extend;
3108   }
3109   case ISD::FP_ROUND: {
3110     // RVV can only do fp_round to types half the size as the source. We
3111     // custom-lower f64->f16 rounds via RVV's round-to-odd float
3112     // conversion instruction.
3113     SDLoc DL(Op);
3114     MVT VT = Op.getSimpleValueType();
3115     SDValue Src = Op.getOperand(0);
3116     MVT SrcVT = Src.getSimpleValueType();
3117 
3118     // Prepare any fixed-length vector operands.
3119     MVT ContainerVT = VT;
3120     if (VT.isFixedLengthVector()) {
3121       MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3122       ContainerVT =
3123           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3124       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3125     }
3126 
3127     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
3128         SrcVT.getVectorElementType() != MVT::f64) {
3129       // For scalable vectors, we only need to close the gap between
3130       // vXf64<->vXf16.
3131       if (!VT.isFixedLengthVector())
3132         return Op;
3133       // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
3134       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
3135       return convertFromScalableVector(VT, Src, DAG, Subtarget);
3136     }
3137 
3138     SDValue Mask, VL;
3139     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3140 
3141     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
3142     SDValue IntermediateRound =
3143         DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
3144     SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
3145                                           DL, DAG, Subtarget);
3146 
3147     if (VT.isFixedLengthVector())
3148       return convertFromScalableVector(VT, Round, DAG, Subtarget);
3149     return Round;
3150   }
3151   case ISD::FP_TO_SINT:
3152   case ISD::FP_TO_UINT:
3153   case ISD::SINT_TO_FP:
3154   case ISD::UINT_TO_FP: {
3155     // RVV can only do fp<->int conversions to types half/double the size as
3156     // the source. We custom-lower any conversions that do two hops into
3157     // sequences.
3158     MVT VT = Op.getSimpleValueType();
3159     if (!VT.isVector())
3160       return Op;
3161     SDLoc DL(Op);
3162     SDValue Src = Op.getOperand(0);
3163     MVT EltVT = VT.getVectorElementType();
3164     MVT SrcVT = Src.getSimpleValueType();
3165     MVT SrcEltVT = SrcVT.getVectorElementType();
3166     unsigned EltSize = EltVT.getSizeInBits();
3167     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3168     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
3169            "Unexpected vector element types");
3170 
3171     bool IsInt2FP = SrcEltVT.isInteger();
3172     // Widening conversions
3173     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
3174       if (IsInt2FP) {
3175         // Do a regular integer sign/zero extension then convert to float.
3176         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
3177                                       VT.getVectorElementCount());
3178         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
3179                                  ? ISD::ZERO_EXTEND
3180                                  : ISD::SIGN_EXTEND;
3181         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
3182         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
3183       }
3184       // FP2Int
3185       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
3186       // Do one doubling fp_extend then complete the operation by converting
3187       // to int.
3188       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3189       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
3190       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
3191     }
3192 
3193     // Narrowing conversions
3194     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
3195       if (IsInt2FP) {
3196         // One narrowing int_to_fp, then an fp_round.
3197         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
3198         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3199         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
3200         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
3201       }
3202       // FP2Int
3203       // One narrowing fp_to_int, then truncate the integer. If the float isn't
3204       // representable by the integer, the result is poison.
3205       MVT IVecVT =
3206           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
3207                            VT.getVectorElementCount());
3208       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
3209       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
3210     }
3211 
3212     // Scalable vectors can exit here. Patterns will handle equally-sized
3213     // conversions halving/doubling ones.
3214     if (!VT.isFixedLengthVector())
3215       return Op;
3216 
3217     // For fixed-length vectors we lower to a custom "VL" node.
3218     unsigned RVVOpc = 0;
3219     switch (Op.getOpcode()) {
3220     default:
3221       llvm_unreachable("Impossible opcode");
3222     case ISD::FP_TO_SINT:
3223       RVVOpc = RISCVISD::FP_TO_SINT_VL;
3224       break;
3225     case ISD::FP_TO_UINT:
3226       RVVOpc = RISCVISD::FP_TO_UINT_VL;
3227       break;
3228     case ISD::SINT_TO_FP:
3229       RVVOpc = RISCVISD::SINT_TO_FP_VL;
3230       break;
3231     case ISD::UINT_TO_FP:
3232       RVVOpc = RISCVISD::UINT_TO_FP_VL;
3233       break;
3234     }
3235 
3236     MVT ContainerVT, SrcContainerVT;
3237     // Derive the reference container type from the larger vector type.
3238     if (SrcEltSize > EltSize) {
3239       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3240       ContainerVT =
3241           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3242     } else {
3243       ContainerVT = getContainerForFixedLengthVector(VT);
3244       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
3245     }
3246 
3247     SDValue Mask, VL;
3248     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3249 
3250     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3251     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
3252     return convertFromScalableVector(VT, Src, DAG, Subtarget);
3253   }
3254   case ISD::FP_TO_SINT_SAT:
3255   case ISD::FP_TO_UINT_SAT:
3256     return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
3257   case ISD::FTRUNC:
3258   case ISD::FCEIL:
3259   case ISD::FFLOOR:
3260     return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
3261   case ISD::VECREDUCE_ADD:
3262   case ISD::VECREDUCE_UMAX:
3263   case ISD::VECREDUCE_SMAX:
3264   case ISD::VECREDUCE_UMIN:
3265   case ISD::VECREDUCE_SMIN:
3266     return lowerVECREDUCE(Op, DAG);
3267   case ISD::VECREDUCE_AND:
3268   case ISD::VECREDUCE_OR:
3269   case ISD::VECREDUCE_XOR:
3270     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3271       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
3272     return lowerVECREDUCE(Op, DAG);
3273   case ISD::VECREDUCE_FADD:
3274   case ISD::VECREDUCE_SEQ_FADD:
3275   case ISD::VECREDUCE_FMIN:
3276   case ISD::VECREDUCE_FMAX:
3277     return lowerFPVECREDUCE(Op, DAG);
3278   case ISD::VP_REDUCE_ADD:
3279   case ISD::VP_REDUCE_UMAX:
3280   case ISD::VP_REDUCE_SMAX:
3281   case ISD::VP_REDUCE_UMIN:
3282   case ISD::VP_REDUCE_SMIN:
3283   case ISD::VP_REDUCE_FADD:
3284   case ISD::VP_REDUCE_SEQ_FADD:
3285   case ISD::VP_REDUCE_FMIN:
3286   case ISD::VP_REDUCE_FMAX:
3287     return lowerVPREDUCE(Op, DAG);
3288   case ISD::VP_REDUCE_AND:
3289   case ISD::VP_REDUCE_OR:
3290   case ISD::VP_REDUCE_XOR:
3291     if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
3292       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
3293     return lowerVPREDUCE(Op, DAG);
3294   case ISD::INSERT_SUBVECTOR:
3295     return lowerINSERT_SUBVECTOR(Op, DAG);
3296   case ISD::EXTRACT_SUBVECTOR:
3297     return lowerEXTRACT_SUBVECTOR(Op, DAG);
3298   case ISD::STEP_VECTOR:
3299     return lowerSTEP_VECTOR(Op, DAG);
3300   case ISD::VECTOR_REVERSE:
3301     return lowerVECTOR_REVERSE(Op, DAG);
3302   case ISD::BUILD_VECTOR:
3303     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
3304   case ISD::SPLAT_VECTOR:
3305     if (Op.getValueType().getVectorElementType() == MVT::i1)
3306       return lowerVectorMaskSplat(Op, DAG);
3307     return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
3308   case ISD::VECTOR_SHUFFLE:
3309     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
3310   case ISD::CONCAT_VECTORS: {
3311     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
3312     // better than going through the stack, as the default expansion does.
3313     SDLoc DL(Op);
3314     MVT VT = Op.getSimpleValueType();
3315     unsigned NumOpElts =
3316         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
3317     SDValue Vec = DAG.getUNDEF(VT);
3318     for (const auto &OpIdx : enumerate(Op->ops())) {
3319       SDValue SubVec = OpIdx.value();
3320       // Don't insert undef subvectors.
3321       if (SubVec.isUndef())
3322         continue;
3323       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
3324                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
3325     }
3326     return Vec;
3327   }
3328   case ISD::LOAD:
3329     if (auto V = expandUnalignedRVVLoad(Op, DAG))
3330       return V;
3331     if (Op.getValueType().isFixedLengthVector())
3332       return lowerFixedLengthVectorLoadToRVV(Op, DAG);
3333     return Op;
3334   case ISD::STORE:
3335     if (auto V = expandUnalignedRVVStore(Op, DAG))
3336       return V;
3337     if (Op.getOperand(1).getValueType().isFixedLengthVector())
3338       return lowerFixedLengthVectorStoreToRVV(Op, DAG);
3339     return Op;
3340   case ISD::MLOAD:
3341   case ISD::VP_LOAD:
3342     return lowerMaskedLoad(Op, DAG);
3343   case ISD::MSTORE:
3344   case ISD::VP_STORE:
3345     return lowerMaskedStore(Op, DAG);
3346   case ISD::SETCC:
3347     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
3348   case ISD::ADD:
3349     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
3350   case ISD::SUB:
3351     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
3352   case ISD::MUL:
3353     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
3354   case ISD::MULHS:
3355     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
3356   case ISD::MULHU:
3357     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
3358   case ISD::AND:
3359     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
3360                                               RISCVISD::AND_VL);
3361   case ISD::OR:
3362     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
3363                                               RISCVISD::OR_VL);
3364   case ISD::XOR:
3365     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
3366                                               RISCVISD::XOR_VL);
3367   case ISD::SDIV:
3368     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
3369   case ISD::SREM:
3370     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
3371   case ISD::UDIV:
3372     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
3373   case ISD::UREM:
3374     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
3375   case ISD::SHL:
3376   case ISD::SRA:
3377   case ISD::SRL:
3378     if (Op.getSimpleValueType().isFixedLengthVector())
3379       return lowerFixedLengthVectorShiftToRVV(Op, DAG);
3380     // This can be called for an i32 shift amount that needs to be promoted.
3381     assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
3382            "Unexpected custom legalisation");
3383     return SDValue();
3384   case ISD::SADDSAT:
3385     return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
3386   case ISD::UADDSAT:
3387     return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
3388   case ISD::SSUBSAT:
3389     return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
3390   case ISD::USUBSAT:
3391     return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
3392   case ISD::FADD:
3393     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
3394   case ISD::FSUB:
3395     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
3396   case ISD::FMUL:
3397     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
3398   case ISD::FDIV:
3399     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
3400   case ISD::FNEG:
3401     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
3402   case ISD::FABS:
3403     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
3404   case ISD::FSQRT:
3405     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
3406   case ISD::FMA:
3407     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
3408   case ISD::SMIN:
3409     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
3410   case ISD::SMAX:
3411     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
3412   case ISD::UMIN:
3413     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
3414   case ISD::UMAX:
3415     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
3416   case ISD::FMINNUM:
3417     return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
3418   case ISD::FMAXNUM:
3419     return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
3420   case ISD::ABS:
3421     return lowerABS(Op, DAG);
3422   case ISD::CTLZ_ZERO_UNDEF:
3423   case ISD::CTTZ_ZERO_UNDEF:
3424     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
3425   case ISD::VSELECT:
3426     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
3427   case ISD::FCOPYSIGN:
3428     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
3429   case ISD::MGATHER:
3430   case ISD::VP_GATHER:
3431     return lowerMaskedGather(Op, DAG);
3432   case ISD::MSCATTER:
3433   case ISD::VP_SCATTER:
3434     return lowerMaskedScatter(Op, DAG);
3435   case ISD::FLT_ROUNDS_:
3436     return lowerGET_ROUNDING(Op, DAG);
3437   case ISD::SET_ROUNDING:
3438     return lowerSET_ROUNDING(Op, DAG);
3439   case ISD::VP_SELECT:
3440     return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
3441   case ISD::VP_ADD:
3442     return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
3443   case ISD::VP_SUB:
3444     return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
3445   case ISD::VP_MUL:
3446     return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
3447   case ISD::VP_SDIV:
3448     return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
3449   case ISD::VP_UDIV:
3450     return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
3451   case ISD::VP_SREM:
3452     return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
3453   case ISD::VP_UREM:
3454     return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
3455   case ISD::VP_AND:
3456     return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
3457   case ISD::VP_OR:
3458     return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
3459   case ISD::VP_XOR:
3460     return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
3461   case ISD::VP_ASHR:
3462     return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
3463   case ISD::VP_LSHR:
3464     return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
3465   case ISD::VP_SHL:
3466     return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
3467   case ISD::VP_FADD:
3468     return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
3469   case ISD::VP_FSUB:
3470     return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
3471   case ISD::VP_FMUL:
3472     return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
3473   case ISD::VP_FDIV:
3474     return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
3475   }
3476 }
3477 
3478 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
3479                              SelectionDAG &DAG, unsigned Flags) {
3480   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3481 }
3482 
3483 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
3484                              SelectionDAG &DAG, unsigned Flags) {
3485   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3486                                    Flags);
3487 }
3488 
3489 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
3490                              SelectionDAG &DAG, unsigned Flags) {
3491   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3492                                    N->getOffset(), Flags);
3493 }
3494 
3495 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
3496                              SelectionDAG &DAG, unsigned Flags) {
3497   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3498 }
3499 
3500 template <class NodeTy>
3501 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3502                                      bool IsLocal) const {
3503   SDLoc DL(N);
3504   EVT Ty = getPointerTy(DAG.getDataLayout());
3505 
3506   if (isPositionIndependent()) {
3507     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3508     if (IsLocal)
3509       // Use PC-relative addressing to access the symbol. This generates the
3510       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
3511       // %pcrel_lo(auipc)).
3512       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3513 
3514     // Use PC-relative addressing to access the GOT for this symbol, then load
3515     // the address from the GOT. This generates the pattern (PseudoLA sym),
3516     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
3517     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
3518   }
3519 
3520   switch (getTargetMachine().getCodeModel()) {
3521   default:
3522     report_fatal_error("Unsupported code model for lowering");
3523   case CodeModel::Small: {
3524     // Generate a sequence for accessing addresses within the first 2 GiB of
3525     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
3526     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
3527     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
3528     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3529     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
3530   }
3531   case CodeModel::Medium: {
3532     // Generate a sequence for accessing addresses within any 2GiB range within
3533     // the address space. This generates the pattern (PseudoLLA sym), which
3534     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
3535     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3536     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3537   }
3538   }
3539 }
3540 
3541 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
3542                                                 SelectionDAG &DAG) const {
3543   SDLoc DL(Op);
3544   EVT Ty = Op.getValueType();
3545   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3546   int64_t Offset = N->getOffset();
3547   MVT XLenVT = Subtarget.getXLenVT();
3548 
3549   const GlobalValue *GV = N->getGlobal();
3550   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
3551   SDValue Addr = getAddr(N, DAG, IsLocal);
3552 
3553   // In order to maximise the opportunity for common subexpression elimination,
3554   // emit a separate ADD node for the global address offset instead of folding
3555   // it in the global address node. Later peephole optimisations may choose to
3556   // fold it back in when profitable.
3557   if (Offset != 0)
3558     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3559                        DAG.getConstant(Offset, DL, XLenVT));
3560   return Addr;
3561 }
3562 
3563 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
3564                                                SelectionDAG &DAG) const {
3565   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
3566 
3567   return getAddr(N, DAG);
3568 }
3569 
3570 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
3571                                                SelectionDAG &DAG) const {
3572   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
3573 
3574   return getAddr(N, DAG);
3575 }
3576 
3577 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
3578                                             SelectionDAG &DAG) const {
3579   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
3580 
3581   return getAddr(N, DAG);
3582 }
3583 
3584 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3585                                               SelectionDAG &DAG,
3586                                               bool UseGOT) const {
3587   SDLoc DL(N);
3588   EVT Ty = getPointerTy(DAG.getDataLayout());
3589   const GlobalValue *GV = N->getGlobal();
3590   MVT XLenVT = Subtarget.getXLenVT();
3591 
3592   if (UseGOT) {
3593     // Use PC-relative addressing to access the GOT for this TLS symbol, then
3594     // load the address from the GOT and add the thread pointer. This generates
3595     // the pattern (PseudoLA_TLS_IE sym), which expands to
3596     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
3597     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3598     SDValue Load =
3599         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
3600 
3601     // Add the thread pointer.
3602     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3603     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
3604   }
3605 
3606   // Generate a sequence for accessing the address relative to the thread
3607   // pointer, with the appropriate adjustment for the thread pointer offset.
3608   // This generates the pattern
3609   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
3610   SDValue AddrHi =
3611       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
3612   SDValue AddrAdd =
3613       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
3614   SDValue AddrLo =
3615       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
3616 
3617   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3618   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3619   SDValue MNAdd = SDValue(
3620       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
3621       0);
3622   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
3623 }
3624 
3625 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3626                                                SelectionDAG &DAG) const {
3627   SDLoc DL(N);
3628   EVT Ty = getPointerTy(DAG.getDataLayout());
3629   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3630   const GlobalValue *GV = N->getGlobal();
3631 
3632   // Use a PC-relative addressing mode to access the global dynamic GOT address.
3633   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3634   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3635   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3636   SDValue Load =
3637       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
3638 
3639   // Prepare argument list to generate call.
3640   ArgListTy Args;
3641   ArgListEntry Entry;
3642   Entry.Node = Load;
3643   Entry.Ty = CallTy;
3644   Args.push_back(Entry);
3645 
3646   // Setup call to __tls_get_addr.
3647   TargetLowering::CallLoweringInfo CLI(DAG);
3648   CLI.setDebugLoc(DL)
3649       .setChain(DAG.getEntryNode())
3650       .setLibCallee(CallingConv::C, CallTy,
3651                     DAG.getExternalSymbol("__tls_get_addr", Ty),
3652                     std::move(Args));
3653 
3654   return LowerCallTo(CLI).first;
3655 }
3656 
3657 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3658                                                    SelectionDAG &DAG) const {
3659   SDLoc DL(Op);
3660   EVT Ty = Op.getValueType();
3661   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3662   int64_t Offset = N->getOffset();
3663   MVT XLenVT = Subtarget.getXLenVT();
3664 
3665   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
3666 
3667   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3668       CallingConv::GHC)
3669     report_fatal_error("In GHC calling convention TLS is not supported");
3670 
3671   SDValue Addr;
3672   switch (Model) {
3673   case TLSModel::LocalExec:
3674     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3675     break;
3676   case TLSModel::InitialExec:
3677     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3678     break;
3679   case TLSModel::LocalDynamic:
3680   case TLSModel::GeneralDynamic:
3681     Addr = getDynamicTLSAddr(N, DAG);
3682     break;
3683   }
3684 
3685   // In order to maximise the opportunity for common subexpression elimination,
3686   // emit a separate ADD node for the global address offset instead of folding
3687   // it in the global address node. Later peephole optimisations may choose to
3688   // fold it back in when profitable.
3689   if (Offset != 0)
3690     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3691                        DAG.getConstant(Offset, DL, XLenVT));
3692   return Addr;
3693 }
3694 
3695 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3696   SDValue CondV = Op.getOperand(0);
3697   SDValue TrueV = Op.getOperand(1);
3698   SDValue FalseV = Op.getOperand(2);
3699   SDLoc DL(Op);
3700   MVT VT = Op.getSimpleValueType();
3701   MVT XLenVT = Subtarget.getXLenVT();
3702 
3703   // Lower vector SELECTs to VSELECTs by splatting the condition.
3704   if (VT.isVector()) {
3705     MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3706     SDValue CondSplat = VT.isScalableVector()
3707                             ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3708                             : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3709     return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3710   }
3711 
3712   // If the result type is XLenVT and CondV is the output of a SETCC node
3713   // which also operated on XLenVT inputs, then merge the SETCC node into the
3714   // lowered RISCVISD::SELECT_CC to take advantage of the integer
3715   // compare+branch instructions. i.e.:
3716   // (select (setcc lhs, rhs, cc), truev, falsev)
3717   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3718   if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3719       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3720     SDValue LHS = CondV.getOperand(0);
3721     SDValue RHS = CondV.getOperand(1);
3722     const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3723     ISD::CondCode CCVal = CC->get();
3724 
3725     // Special case for a select of 2 constants that have a diffence of 1.
3726     // Normally this is done by DAGCombine, but if the select is introduced by
3727     // type legalization or op legalization, we miss it. Restricting to SETLT
3728     // case for now because that is what signed saturating add/sub need.
3729     // FIXME: We don't need the condition to be SETLT or even a SETCC,
3730     // but we would probably want to swap the true/false values if the condition
3731     // is SETGE/SETLE to avoid an XORI.
3732     if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3733         CCVal == ISD::SETLT) {
3734       const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
3735       const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
3736       if (TrueVal - 1 == FalseVal)
3737         return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
3738       if (TrueVal + 1 == FalseVal)
3739         return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
3740     }
3741 
3742     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3743 
3744     SDValue TargetCC = DAG.getCondCode(CCVal);
3745     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
3746     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3747   }
3748 
3749   // Otherwise:
3750   // (select condv, truev, falsev)
3751   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
3752   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3753   SDValue SetNE = DAG.getCondCode(ISD::SETNE);
3754 
3755   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
3756 
3757   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3758 }
3759 
3760 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
3761   SDValue CondV = Op.getOperand(1);
3762   SDLoc DL(Op);
3763   MVT XLenVT = Subtarget.getXLenVT();
3764 
3765   if (CondV.getOpcode() == ISD::SETCC &&
3766       CondV.getOperand(0).getValueType() == XLenVT) {
3767     SDValue LHS = CondV.getOperand(0);
3768     SDValue RHS = CondV.getOperand(1);
3769     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
3770 
3771     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3772 
3773     SDValue TargetCC = DAG.getCondCode(CCVal);
3774     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3775                        LHS, RHS, TargetCC, Op.getOperand(2));
3776   }
3777 
3778   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3779                      CondV, DAG.getConstant(0, DL, XLenVT),
3780                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3781 }
3782 
3783 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3784   MachineFunction &MF = DAG.getMachineFunction();
3785   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
3786 
3787   SDLoc DL(Op);
3788   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3789                                  getPointerTy(MF.getDataLayout()));
3790 
3791   // vastart just stores the address of the VarArgsFrameIndex slot into the
3792   // memory location argument.
3793   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3794   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3795                       MachinePointerInfo(SV));
3796 }
3797 
3798 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3799                                             SelectionDAG &DAG) const {
3800   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3801   MachineFunction &MF = DAG.getMachineFunction();
3802   MachineFrameInfo &MFI = MF.getFrameInfo();
3803   MFI.setFrameAddressIsTaken(true);
3804   Register FrameReg = RI.getFrameRegister(MF);
3805   int XLenInBytes = Subtarget.getXLen() / 8;
3806 
3807   EVT VT = Op.getValueType();
3808   SDLoc DL(Op);
3809   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3810   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3811   while (Depth--) {
3812     int Offset = -(XLenInBytes * 2);
3813     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3814                               DAG.getIntPtrConstant(Offset, DL));
3815     FrameAddr =
3816         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3817   }
3818   return FrameAddr;
3819 }
3820 
3821 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3822                                              SelectionDAG &DAG) const {
3823   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3824   MachineFunction &MF = DAG.getMachineFunction();
3825   MachineFrameInfo &MFI = MF.getFrameInfo();
3826   MFI.setReturnAddressIsTaken(true);
3827   MVT XLenVT = Subtarget.getXLenVT();
3828   int XLenInBytes = Subtarget.getXLen() / 8;
3829 
3830   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3831     return SDValue();
3832 
3833   EVT VT = Op.getValueType();
3834   SDLoc DL(Op);
3835   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3836   if (Depth) {
3837     int Off = -XLenInBytes;
3838     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3839     SDValue Offset = DAG.getConstant(Off, DL, VT);
3840     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3841                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3842                        MachinePointerInfo());
3843   }
3844 
3845   // Return the value of the return address register, marking it an implicit
3846   // live-in.
3847   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3848   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3849 }
3850 
3851 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3852                                                  SelectionDAG &DAG) const {
3853   SDLoc DL(Op);
3854   SDValue Lo = Op.getOperand(0);
3855   SDValue Hi = Op.getOperand(1);
3856   SDValue Shamt = Op.getOperand(2);
3857   EVT VT = Lo.getValueType();
3858 
3859   // if Shamt-XLEN < 0: // Shamt < XLEN
3860   //   Lo = Lo << Shamt
3861   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
3862   // else:
3863   //   Lo = 0
3864   //   Hi = Lo << (Shamt-XLEN)
3865 
3866   SDValue Zero = DAG.getConstant(0, DL, VT);
3867   SDValue One = DAG.getConstant(1, DL, VT);
3868   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3869   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3870   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3871   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3872 
3873   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3874   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3875   SDValue ShiftRightLo =
3876       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
3877   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3878   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3879   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
3880 
3881   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3882 
3883   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3884   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3885 
3886   SDValue Parts[2] = {Lo, Hi};
3887   return DAG.getMergeValues(Parts, DL);
3888 }
3889 
3890 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
3891                                                   bool IsSRA) const {
3892   SDLoc DL(Op);
3893   SDValue Lo = Op.getOperand(0);
3894   SDValue Hi = Op.getOperand(1);
3895   SDValue Shamt = Op.getOperand(2);
3896   EVT VT = Lo.getValueType();
3897 
3898   // SRA expansion:
3899   //   if Shamt-XLEN < 0: // Shamt < XLEN
3900   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3901   //     Hi = Hi >>s Shamt
3902   //   else:
3903   //     Lo = Hi >>s (Shamt-XLEN);
3904   //     Hi = Hi >>s (XLEN-1)
3905   //
3906   // SRL expansion:
3907   //   if Shamt-XLEN < 0: // Shamt < XLEN
3908   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3909   //     Hi = Hi >>u Shamt
3910   //   else:
3911   //     Lo = Hi >>u (Shamt-XLEN);
3912   //     Hi = 0;
3913 
3914   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3915 
3916   SDValue Zero = DAG.getConstant(0, DL, VT);
3917   SDValue One = DAG.getConstant(1, DL, VT);
3918   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3919   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3920   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3921   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3922 
3923   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3924   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3925   SDValue ShiftLeftHi =
3926       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
3927   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3928   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3929   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
3930   SDValue HiFalse =
3931       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
3932 
3933   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3934 
3935   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3936   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3937 
3938   SDValue Parts[2] = {Lo, Hi};
3939   return DAG.getMergeValues(Parts, DL);
3940 }
3941 
3942 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
3943 // legal equivalently-sized i8 type, so we can use that as a go-between.
3944 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
3945                                                   SelectionDAG &DAG) const {
3946   SDLoc DL(Op);
3947   MVT VT = Op.getSimpleValueType();
3948   SDValue SplatVal = Op.getOperand(0);
3949   // All-zeros or all-ones splats are handled specially.
3950   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
3951     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3952     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
3953   }
3954   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
3955     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3956     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
3957   }
3958   MVT XLenVT = Subtarget.getXLenVT();
3959   assert(SplatVal.getValueType() == XLenVT &&
3960          "Unexpected type for i1 splat value");
3961   MVT InterVT = VT.changeVectorElementType(MVT::i8);
3962   SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
3963                          DAG.getConstant(1, DL, XLenVT));
3964   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
3965   SDValue Zero = DAG.getConstant(0, DL, InterVT);
3966   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
3967 }
3968 
3969 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
3970 // illegal (currently only vXi64 RV32).
3971 // FIXME: We could also catch non-constant sign-extended i32 values and lower
3972 // them to SPLAT_VECTOR_I64
3973 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
3974                                                      SelectionDAG &DAG) const {
3975   SDLoc DL(Op);
3976   MVT VecVT = Op.getSimpleValueType();
3977   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
3978          "Unexpected SPLAT_VECTOR_PARTS lowering");
3979 
3980   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
3981   SDValue Lo = Op.getOperand(0);
3982   SDValue Hi = Op.getOperand(1);
3983 
3984   if (VecVT.isFixedLengthVector()) {
3985     MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3986     SDLoc DL(Op);
3987     SDValue Mask, VL;
3988     std::tie(Mask, VL) =
3989         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3990 
3991     SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
3992     return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
3993   }
3994 
3995   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3996     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3997     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3998     // If Hi constant is all the same sign bit as Lo, lower this as a custom
3999     // node in order to try and match RVV vector/scalar instructions.
4000     if ((LoC >> 31) == HiC)
4001       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
4002   }
4003 
4004   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4005   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4006       isa<ConstantSDNode>(Hi.getOperand(1)) &&
4007       Hi.getConstantOperandVal(1) == 31)
4008     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
4009 
4010   // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
4011   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
4012                      DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i64));
4013 }
4014 
4015 // Custom-lower extensions from mask vectors by using a vselect either with 1
4016 // for zero/any-extension or -1 for sign-extension:
4017 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
4018 // Note that any-extension is lowered identically to zero-extension.
4019 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
4020                                                 int64_t ExtTrueVal) const {
4021   SDLoc DL(Op);
4022   MVT VecVT = Op.getSimpleValueType();
4023   SDValue Src = Op.getOperand(0);
4024   // Only custom-lower extensions from mask types
4025   assert(Src.getValueType().isVector() &&
4026          Src.getValueType().getVectorElementType() == MVT::i1);
4027 
4028   MVT XLenVT = Subtarget.getXLenVT();
4029   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
4030   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
4031 
4032   if (VecVT.isScalableVector()) {
4033     // Be careful not to introduce illegal scalar types at this stage, and be
4034     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
4035     // illegal and must be expanded. Since we know that the constants are
4036     // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
4037     bool IsRV32E64 =
4038         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
4039 
4040     if (!IsRV32E64) {
4041       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
4042       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
4043     } else {
4044       SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
4045       SplatTrueVal =
4046           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
4047     }
4048 
4049     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
4050   }
4051 
4052   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4053   MVT I1ContainerVT =
4054       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4055 
4056   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
4057 
4058   SDValue Mask, VL;
4059   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4060 
4061   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
4062   SplatTrueVal =
4063       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
4064   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
4065                                SplatTrueVal, SplatZero, VL);
4066 
4067   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
4068 }
4069 
4070 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
4071     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
4072   MVT ExtVT = Op.getSimpleValueType();
4073   // Only custom-lower extensions from fixed-length vector types.
4074   if (!ExtVT.isFixedLengthVector())
4075     return Op;
4076   MVT VT = Op.getOperand(0).getSimpleValueType();
4077   // Grab the canonical container type for the extended type. Infer the smaller
4078   // type from that to ensure the same number of vector elements, as we know
4079   // the LMUL will be sufficient to hold the smaller type.
4080   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
4081   // Get the extended container type manually to ensure the same number of
4082   // vector elements between source and dest.
4083   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
4084                                      ContainerExtVT.getVectorElementCount());
4085 
4086   SDValue Op1 =
4087       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
4088 
4089   SDLoc DL(Op);
4090   SDValue Mask, VL;
4091   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4092 
4093   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
4094 
4095   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
4096 }
4097 
4098 // Custom-lower truncations from vectors to mask vectors by using a mask and a
4099 // setcc operation:
4100 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
4101 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
4102                                                   SelectionDAG &DAG) const {
4103   SDLoc DL(Op);
4104   EVT MaskVT = Op.getValueType();
4105   // Only expect to custom-lower truncations to mask types
4106   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
4107          "Unexpected type for vector mask lowering");
4108   SDValue Src = Op.getOperand(0);
4109   MVT VecVT = Src.getSimpleValueType();
4110 
4111   // If this is a fixed vector, we need to convert it to a scalable vector.
4112   MVT ContainerVT = VecVT;
4113   if (VecVT.isFixedLengthVector()) {
4114     ContainerVT = getContainerForFixedLengthVector(VecVT);
4115     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4116   }
4117 
4118   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
4119   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
4120 
4121   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
4122   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
4123 
4124   if (VecVT.isScalableVector()) {
4125     SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
4126     return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
4127   }
4128 
4129   SDValue Mask, VL;
4130   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4131 
4132   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4133   SDValue Trunc =
4134       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
4135   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
4136                       DAG.getCondCode(ISD::SETNE), Mask, VL);
4137   return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
4138 }
4139 
4140 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
4141 // first position of a vector, and that vector is slid up to the insert index.
4142 // By limiting the active vector length to index+1 and merging with the
4143 // original vector (with an undisturbed tail policy for elements >= VL), we
4144 // achieve the desired result of leaving all elements untouched except the one
4145 // at VL-1, which is replaced with the desired value.
4146 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4147                                                     SelectionDAG &DAG) const {
4148   SDLoc DL(Op);
4149   MVT VecVT = Op.getSimpleValueType();
4150   SDValue Vec = Op.getOperand(0);
4151   SDValue Val = Op.getOperand(1);
4152   SDValue Idx = Op.getOperand(2);
4153 
4154   if (VecVT.getVectorElementType() == MVT::i1) {
4155     // FIXME: For now we just promote to an i8 vector and insert into that,
4156     // but this is probably not optimal.
4157     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
4158     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
4159     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
4160     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
4161   }
4162 
4163   MVT ContainerVT = VecVT;
4164   // If the operand is a fixed-length vector, convert to a scalable one.
4165   if (VecVT.isFixedLengthVector()) {
4166     ContainerVT = getContainerForFixedLengthVector(VecVT);
4167     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4168   }
4169 
4170   MVT XLenVT = Subtarget.getXLenVT();
4171 
4172   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
4173   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
4174   // Even i64-element vectors on RV32 can be lowered without scalar
4175   // legalization if the most-significant 32 bits of the value are not affected
4176   // by the sign-extension of the lower 32 bits.
4177   // TODO: We could also catch sign extensions of a 32-bit value.
4178   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
4179     const auto *CVal = cast<ConstantSDNode>(Val);
4180     if (isInt<32>(CVal->getSExtValue())) {
4181       IsLegalInsert = true;
4182       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
4183     }
4184   }
4185 
4186   SDValue Mask, VL;
4187   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4188 
4189   SDValue ValInVec;
4190 
4191   if (IsLegalInsert) {
4192     unsigned Opc =
4193         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4194     if (isNullConstant(Idx)) {
4195       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
4196       if (!VecVT.isFixedLengthVector())
4197         return Vec;
4198       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
4199     }
4200     ValInVec =
4201         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
4202   } else {
4203     // On RV32, i64-element vectors must be specially handled to place the
4204     // value at element 0, by using two vslide1up instructions in sequence on
4205     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
4206     // this.
4207     SDValue One = DAG.getConstant(1, DL, XLenVT);
4208     SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
4209     SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
4210     MVT I32ContainerVT =
4211         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
4212     SDValue I32Mask =
4213         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
4214     // Limit the active VL to two.
4215     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
4216     // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
4217     // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
4218     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
4219                            InsertI64VL);
4220     // First slide in the hi value, then the lo in underneath it.
4221     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
4222                            ValHi, I32Mask, InsertI64VL);
4223     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
4224                            ValLo, I32Mask, InsertI64VL);
4225     // Bitcast back to the right container type.
4226     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
4227   }
4228 
4229   // Now that the value is in a vector, slide it into position.
4230   SDValue InsertVL =
4231       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
4232   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
4233                                 ValInVec, Idx, Mask, InsertVL);
4234   if (!VecVT.isFixedLengthVector())
4235     return Slideup;
4236   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
4237 }
4238 
4239 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
4240 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
4241 // types this is done using VMV_X_S to allow us to glean information about the
4242 // sign bits of the result.
4243 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4244                                                      SelectionDAG &DAG) const {
4245   SDLoc DL(Op);
4246   SDValue Idx = Op.getOperand(1);
4247   SDValue Vec = Op.getOperand(0);
4248   EVT EltVT = Op.getValueType();
4249   MVT VecVT = Vec.getSimpleValueType();
4250   MVT XLenVT = Subtarget.getXLenVT();
4251 
4252   if (VecVT.getVectorElementType() == MVT::i1) {
4253     // FIXME: For now we just promote to an i8 vector and extract from that,
4254     // but this is probably not optimal.
4255     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
4256     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
4257     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
4258   }
4259 
4260   // If this is a fixed vector, we need to convert it to a scalable vector.
4261   MVT ContainerVT = VecVT;
4262   if (VecVT.isFixedLengthVector()) {
4263     ContainerVT = getContainerForFixedLengthVector(VecVT);
4264     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4265   }
4266 
4267   // If the index is 0, the vector is already in the right position.
4268   if (!isNullConstant(Idx)) {
4269     // Use a VL of 1 to avoid processing more elements than we need.
4270     SDValue VL = DAG.getConstant(1, DL, XLenVT);
4271     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4272     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4273     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
4274                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
4275   }
4276 
4277   if (!EltVT.isInteger()) {
4278     // Floating-point extracts are handled in TableGen.
4279     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
4280                        DAG.getConstant(0, DL, XLenVT));
4281   }
4282 
4283   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
4284   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
4285 }
4286 
4287 // Some RVV intrinsics may claim that they want an integer operand to be
4288 // promoted or expanded.
4289 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
4290                                           const RISCVSubtarget &Subtarget) {
4291   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4292           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
4293          "Unexpected opcode");
4294 
4295   if (!Subtarget.hasVInstructions())
4296     return SDValue();
4297 
4298   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
4299   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
4300   SDLoc DL(Op);
4301 
4302   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
4303       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
4304   if (!II || !II->hasSplatOperand())
4305     return SDValue();
4306 
4307   unsigned SplatOp = II->SplatOperand + 1 + HasChain;
4308   assert(SplatOp < Op.getNumOperands());
4309 
4310   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
4311   SDValue &ScalarOp = Operands[SplatOp];
4312   MVT OpVT = ScalarOp.getSimpleValueType();
4313   MVT XLenVT = Subtarget.getXLenVT();
4314 
4315   // If this isn't a scalar, or its type is XLenVT we're done.
4316   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
4317     return SDValue();
4318 
4319   // Simplest case is that the operand needs to be promoted to XLenVT.
4320   if (OpVT.bitsLT(XLenVT)) {
4321     // If the operand is a constant, sign extend to increase our chances
4322     // of being able to use a .vi instruction. ANY_EXTEND would become a
4323     // a zero extend and the simm5 check in isel would fail.
4324     // FIXME: Should we ignore the upper bits in isel instead?
4325     unsigned ExtOpc =
4326         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4327     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
4328     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4329   }
4330 
4331   // Use the previous operand to get the vXi64 VT. The result might be a mask
4332   // VT for compares. Using the previous operand assumes that the previous
4333   // operand will never have a smaller element size than a scalar operand and
4334   // that a widening operation never uses SEW=64.
4335   // NOTE: If this fails the below assert, we can probably just find the
4336   // element count from any operand or result and use it to construct the VT.
4337   assert(II->SplatOperand > 0 && "Unexpected splat operand!");
4338   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
4339 
4340   // The more complex case is when the scalar is larger than XLenVT.
4341   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
4342          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
4343 
4344   // If this is a sign-extended 32-bit constant, we can truncate it and rely
4345   // on the instruction to sign-extend since SEW>XLEN.
4346   if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
4347     if (isInt<32>(CVal->getSExtValue())) {
4348       ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
4349       return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4350     }
4351   }
4352 
4353   // We need to convert the scalar to a splat vector.
4354   // FIXME: Can we implicitly truncate the scalar if it is known to
4355   // be sign extended?
4356   SDValue VL = Op.getOperand(II->VLOperand + 1 + HasChain);
4357   assert(VL.getValueType() == XLenVT);
4358   ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
4359   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4360 }
4361 
4362 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
4363                                                      SelectionDAG &DAG) const {
4364   unsigned IntNo = Op.getConstantOperandVal(0);
4365   SDLoc DL(Op);
4366   MVT XLenVT = Subtarget.getXLenVT();
4367 
4368   switch (IntNo) {
4369   default:
4370     break; // Don't custom lower most intrinsics.
4371   case Intrinsic::thread_pointer: {
4372     EVT PtrVT = getPointerTy(DAG.getDataLayout());
4373     return DAG.getRegister(RISCV::X4, PtrVT);
4374   }
4375   case Intrinsic::riscv_orc_b:
4376     // Lower to the GORCI encoding for orc.b.
4377     return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
4378                        DAG.getConstant(7, DL, XLenVT));
4379   case Intrinsic::riscv_grev:
4380   case Intrinsic::riscv_gorc: {
4381     unsigned Opc =
4382         IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
4383     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4384   }
4385   case Intrinsic::riscv_shfl:
4386   case Intrinsic::riscv_unshfl: {
4387     unsigned Opc =
4388         IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
4389     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4390   }
4391   case Intrinsic::riscv_bcompress:
4392   case Intrinsic::riscv_bdecompress: {
4393     unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
4394                                                        : RISCVISD::BDECOMPRESS;
4395     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4396   }
4397   case Intrinsic::riscv_bfp:
4398     return DAG.getNode(RISCVISD::BFP, DL, XLenVT, Op.getOperand(1),
4399                        Op.getOperand(2));
4400   case Intrinsic::riscv_fsl:
4401     return DAG.getNode(RISCVISD::FSL, DL, XLenVT, Op.getOperand(1),
4402                        Op.getOperand(2), Op.getOperand(3));
4403   case Intrinsic::riscv_fsr:
4404     return DAG.getNode(RISCVISD::FSR, DL, XLenVT, Op.getOperand(1),
4405                        Op.getOperand(2), Op.getOperand(3));
4406   case Intrinsic::riscv_vmv_x_s:
4407     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
4408     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
4409                        Op.getOperand(1));
4410   case Intrinsic::riscv_vmv_v_x:
4411     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
4412                             Op.getSimpleValueType(), DL, DAG, Subtarget);
4413   case Intrinsic::riscv_vfmv_v_f:
4414     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
4415                        Op.getOperand(1), Op.getOperand(2));
4416   case Intrinsic::riscv_vmv_s_x: {
4417     SDValue Scalar = Op.getOperand(2);
4418 
4419     if (Scalar.getValueType().bitsLE(XLenVT)) {
4420       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
4421       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
4422                          Op.getOperand(1), Scalar, Op.getOperand(3));
4423     }
4424 
4425     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
4426 
4427     // This is an i64 value that lives in two scalar registers. We have to
4428     // insert this in a convoluted way. First we build vXi64 splat containing
4429     // the/ two values that we assemble using some bit math. Next we'll use
4430     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
4431     // to merge element 0 from our splat into the source vector.
4432     // FIXME: This is probably not the best way to do this, but it is
4433     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
4434     // point.
4435     //   sw lo, (a0)
4436     //   sw hi, 4(a0)
4437     //   vlse vX, (a0)
4438     //
4439     //   vid.v      vVid
4440     //   vmseq.vx   mMask, vVid, 0
4441     //   vmerge.vvm vDest, vSrc, vVal, mMask
4442     MVT VT = Op.getSimpleValueType();
4443     SDValue Vec = Op.getOperand(1);
4444     SDValue VL = Op.getOperand(3);
4445 
4446     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
4447     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
4448                                       DAG.getConstant(0, DL, MVT::i32), VL);
4449 
4450     MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
4451     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4452     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
4453     SDValue SelectCond =
4454         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
4455                     DAG.getCondCode(ISD::SETEQ), Mask, VL);
4456     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
4457                        Vec, VL);
4458   }
4459   case Intrinsic::riscv_vslide1up:
4460   case Intrinsic::riscv_vslide1down:
4461   case Intrinsic::riscv_vslide1up_mask:
4462   case Intrinsic::riscv_vslide1down_mask: {
4463     // We need to special case these when the scalar is larger than XLen.
4464     unsigned NumOps = Op.getNumOperands();
4465     bool IsMasked = NumOps == 7;
4466     unsigned OpOffset = IsMasked ? 1 : 0;
4467     SDValue Scalar = Op.getOperand(2 + OpOffset);
4468     if (Scalar.getValueType().bitsLE(XLenVT))
4469       break;
4470 
4471     // Splatting a sign extended constant is fine.
4472     if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
4473       if (isInt<32>(CVal->getSExtValue()))
4474         break;
4475 
4476     MVT VT = Op.getSimpleValueType();
4477     assert(VT.getVectorElementType() == MVT::i64 &&
4478            Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
4479 
4480     // Convert the vector source to the equivalent nxvXi32 vector.
4481     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4482     SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
4483 
4484     SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
4485                                    DAG.getConstant(0, DL, XLenVT));
4486     SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
4487                                    DAG.getConstant(1, DL, XLenVT));
4488 
4489     // Double the VL since we halved SEW.
4490     SDValue VL = Op.getOperand(NumOps - (1 + OpOffset));
4491     SDValue I32VL =
4492         DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
4493 
4494     MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
4495     SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
4496 
4497     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
4498     // instructions.
4499     if (IntNo == Intrinsic::riscv_vslide1up ||
4500         IntNo == Intrinsic::riscv_vslide1up_mask) {
4501       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
4502                         I32Mask, I32VL);
4503       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
4504                         I32Mask, I32VL);
4505     } else {
4506       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
4507                         I32Mask, I32VL);
4508       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
4509                         I32Mask, I32VL);
4510     }
4511 
4512     // Convert back to nxvXi64.
4513     Vec = DAG.getBitcast(VT, Vec);
4514 
4515     if (!IsMasked)
4516       return Vec;
4517 
4518     // Apply mask after the operation.
4519     SDValue Mask = Op.getOperand(NumOps - 3);
4520     SDValue MaskedOff = Op.getOperand(1);
4521     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
4522   }
4523   }
4524 
4525   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
4526 }
4527 
4528 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
4529                                                     SelectionDAG &DAG) const {
4530   unsigned IntNo = Op.getConstantOperandVal(1);
4531   switch (IntNo) {
4532   default:
4533     break;
4534   case Intrinsic::riscv_masked_strided_load: {
4535     SDLoc DL(Op);
4536     MVT XLenVT = Subtarget.getXLenVT();
4537 
4538     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4539     // the selection of the masked intrinsics doesn't do this for us.
4540     SDValue Mask = Op.getOperand(5);
4541     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4542 
4543     MVT VT = Op->getSimpleValueType(0);
4544     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4545 
4546     SDValue PassThru = Op.getOperand(2);
4547     if (!IsUnmasked) {
4548       MVT MaskVT =
4549           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4550       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4551       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4552     }
4553 
4554     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4555 
4556     SDValue IntID = DAG.getTargetConstant(
4557         IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
4558         XLenVT);
4559 
4560     auto *Load = cast<MemIntrinsicSDNode>(Op);
4561     SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
4562     if (!IsUnmasked)
4563       Ops.push_back(PassThru);
4564     Ops.push_back(Op.getOperand(3)); // Ptr
4565     Ops.push_back(Op.getOperand(4)); // Stride
4566     if (!IsUnmasked)
4567       Ops.push_back(Mask);
4568     Ops.push_back(VL);
4569     if (!IsUnmasked) {
4570       SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
4571       Ops.push_back(Policy);
4572     }
4573 
4574     SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4575     SDValue Result =
4576         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
4577                                 Load->getMemoryVT(), Load->getMemOperand());
4578     SDValue Chain = Result.getValue(1);
4579     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4580     return DAG.getMergeValues({Result, Chain}, DL);
4581   }
4582   }
4583 
4584   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
4585 }
4586 
4587 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
4588                                                  SelectionDAG &DAG) const {
4589   unsigned IntNo = Op.getConstantOperandVal(1);
4590   switch (IntNo) {
4591   default:
4592     break;
4593   case Intrinsic::riscv_masked_strided_store: {
4594     SDLoc DL(Op);
4595     MVT XLenVT = Subtarget.getXLenVT();
4596 
4597     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4598     // the selection of the masked intrinsics doesn't do this for us.
4599     SDValue Mask = Op.getOperand(5);
4600     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4601 
4602     SDValue Val = Op.getOperand(2);
4603     MVT VT = Val.getSimpleValueType();
4604     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4605 
4606     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
4607     if (!IsUnmasked) {
4608       MVT MaskVT =
4609           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4610       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4611     }
4612 
4613     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4614 
4615     SDValue IntID = DAG.getTargetConstant(
4616         IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
4617         XLenVT);
4618 
4619     auto *Store = cast<MemIntrinsicSDNode>(Op);
4620     SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
4621     Ops.push_back(Val);
4622     Ops.push_back(Op.getOperand(3)); // Ptr
4623     Ops.push_back(Op.getOperand(4)); // Stride
4624     if (!IsUnmasked)
4625       Ops.push_back(Mask);
4626     Ops.push_back(VL);
4627 
4628     return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
4629                                    Ops, Store->getMemoryVT(),
4630                                    Store->getMemOperand());
4631   }
4632   }
4633 
4634   return SDValue();
4635 }
4636 
4637 static MVT getLMUL1VT(MVT VT) {
4638   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
4639          "Unexpected vector MVT");
4640   return MVT::getScalableVectorVT(
4641       VT.getVectorElementType(),
4642       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
4643 }
4644 
4645 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
4646   switch (ISDOpcode) {
4647   default:
4648     llvm_unreachable("Unhandled reduction");
4649   case ISD::VECREDUCE_ADD:
4650     return RISCVISD::VECREDUCE_ADD_VL;
4651   case ISD::VECREDUCE_UMAX:
4652     return RISCVISD::VECREDUCE_UMAX_VL;
4653   case ISD::VECREDUCE_SMAX:
4654     return RISCVISD::VECREDUCE_SMAX_VL;
4655   case ISD::VECREDUCE_UMIN:
4656     return RISCVISD::VECREDUCE_UMIN_VL;
4657   case ISD::VECREDUCE_SMIN:
4658     return RISCVISD::VECREDUCE_SMIN_VL;
4659   case ISD::VECREDUCE_AND:
4660     return RISCVISD::VECREDUCE_AND_VL;
4661   case ISD::VECREDUCE_OR:
4662     return RISCVISD::VECREDUCE_OR_VL;
4663   case ISD::VECREDUCE_XOR:
4664     return RISCVISD::VECREDUCE_XOR_VL;
4665   }
4666 }
4667 
4668 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
4669                                                          SelectionDAG &DAG,
4670                                                          bool IsVP) const {
4671   SDLoc DL(Op);
4672   SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
4673   MVT VecVT = Vec.getSimpleValueType();
4674   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
4675           Op.getOpcode() == ISD::VECREDUCE_OR ||
4676           Op.getOpcode() == ISD::VECREDUCE_XOR ||
4677           Op.getOpcode() == ISD::VP_REDUCE_AND ||
4678           Op.getOpcode() == ISD::VP_REDUCE_OR ||
4679           Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
4680          "Unexpected reduction lowering");
4681 
4682   MVT XLenVT = Subtarget.getXLenVT();
4683   assert(Op.getValueType() == XLenVT &&
4684          "Expected reduction output to be legalized to XLenVT");
4685 
4686   MVT ContainerVT = VecVT;
4687   if (VecVT.isFixedLengthVector()) {
4688     ContainerVT = getContainerForFixedLengthVector(VecVT);
4689     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4690   }
4691 
4692   SDValue Mask, VL;
4693   if (IsVP) {
4694     Mask = Op.getOperand(2);
4695     VL = Op.getOperand(3);
4696   } else {
4697     std::tie(Mask, VL) =
4698         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4699   }
4700 
4701   unsigned BaseOpc;
4702   ISD::CondCode CC;
4703   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
4704 
4705   switch (Op.getOpcode()) {
4706   default:
4707     llvm_unreachable("Unhandled reduction");
4708   case ISD::VECREDUCE_AND:
4709   case ISD::VP_REDUCE_AND: {
4710     // vcpop ~x == 0
4711     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4712     Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
4713     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
4714     CC = ISD::SETEQ;
4715     BaseOpc = ISD::AND;
4716     break;
4717   }
4718   case ISD::VECREDUCE_OR:
4719   case ISD::VP_REDUCE_OR:
4720     // vcpop x != 0
4721     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
4722     CC = ISD::SETNE;
4723     BaseOpc = ISD::OR;
4724     break;
4725   case ISD::VECREDUCE_XOR:
4726   case ISD::VP_REDUCE_XOR: {
4727     // ((vcpop x) & 1) != 0
4728     SDValue One = DAG.getConstant(1, DL, XLenVT);
4729     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
4730     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
4731     CC = ISD::SETNE;
4732     BaseOpc = ISD::XOR;
4733     break;
4734   }
4735   }
4736 
4737   SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
4738 
4739   if (!IsVP)
4740     return SetCC;
4741 
4742   // Now include the start value in the operation.
4743   // Note that we must return the start value when no elements are operated
4744   // upon. The vcpop instructions we've emitted in each case above will return
4745   // 0 for an inactive vector, and so we've already received the neutral value:
4746   // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
4747   // can simply include the start value.
4748   return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
4749 }
4750 
4751 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
4752                                             SelectionDAG &DAG) const {
4753   SDLoc DL(Op);
4754   SDValue Vec = Op.getOperand(0);
4755   EVT VecEVT = Vec.getValueType();
4756 
4757   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
4758 
4759   // Due to ordering in legalize types we may have a vector type that needs to
4760   // be split. Do that manually so we can get down to a legal type.
4761   while (getTypeAction(*DAG.getContext(), VecEVT) ==
4762          TargetLowering::TypeSplitVector) {
4763     SDValue Lo, Hi;
4764     std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
4765     VecEVT = Lo.getValueType();
4766     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
4767   }
4768 
4769   // TODO: The type may need to be widened rather than split. Or widened before
4770   // it can be split.
4771   if (!isTypeLegal(VecEVT))
4772     return SDValue();
4773 
4774   MVT VecVT = VecEVT.getSimpleVT();
4775   MVT VecEltVT = VecVT.getVectorElementType();
4776   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
4777 
4778   MVT ContainerVT = VecVT;
4779   if (VecVT.isFixedLengthVector()) {
4780     ContainerVT = getContainerForFixedLengthVector(VecVT);
4781     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4782   }
4783 
4784   MVT M1VT = getLMUL1VT(ContainerVT);
4785   MVT XLenVT = Subtarget.getXLenVT();
4786 
4787   SDValue Mask, VL;
4788   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4789 
4790   SDValue NeutralElem =
4791       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
4792   SDValue IdentitySplat = lowerScalarSplat(
4793       NeutralElem, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
4794   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
4795                                   IdentitySplat, Mask, VL);
4796   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
4797                              DAG.getConstant(0, DL, XLenVT));
4798   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
4799 }
4800 
4801 // Given a reduction op, this function returns the matching reduction opcode,
4802 // the vector SDValue and the scalar SDValue required to lower this to a
4803 // RISCVISD node.
4804 static std::tuple<unsigned, SDValue, SDValue>
4805 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
4806   SDLoc DL(Op);
4807   auto Flags = Op->getFlags();
4808   unsigned Opcode = Op.getOpcode();
4809   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
4810   switch (Opcode) {
4811   default:
4812     llvm_unreachable("Unhandled reduction");
4813   case ISD::VECREDUCE_FADD: {
4814     // Use positive zero if we can. It is cheaper to materialize.
4815     SDValue Zero =
4816         DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
4817     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
4818   }
4819   case ISD::VECREDUCE_SEQ_FADD:
4820     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
4821                            Op.getOperand(0));
4822   case ISD::VECREDUCE_FMIN:
4823     return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
4824                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
4825   case ISD::VECREDUCE_FMAX:
4826     return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
4827                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
4828   }
4829 }
4830 
4831 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
4832                                               SelectionDAG &DAG) const {
4833   SDLoc DL(Op);
4834   MVT VecEltVT = Op.getSimpleValueType();
4835 
4836   unsigned RVVOpcode;
4837   SDValue VectorVal, ScalarVal;
4838   std::tie(RVVOpcode, VectorVal, ScalarVal) =
4839       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
4840   MVT VecVT = VectorVal.getSimpleValueType();
4841 
4842   MVT ContainerVT = VecVT;
4843   if (VecVT.isFixedLengthVector()) {
4844     ContainerVT = getContainerForFixedLengthVector(VecVT);
4845     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
4846   }
4847 
4848   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
4849   MVT XLenVT = Subtarget.getXLenVT();
4850 
4851   SDValue Mask, VL;
4852   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4853 
4854   SDValue ScalarSplat = lowerScalarSplat(
4855       ScalarVal, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
4856   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
4857                                   VectorVal, ScalarSplat, Mask, VL);
4858   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
4859                      DAG.getConstant(0, DL, XLenVT));
4860 }
4861 
4862 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
4863   switch (ISDOpcode) {
4864   default:
4865     llvm_unreachable("Unhandled reduction");
4866   case ISD::VP_REDUCE_ADD:
4867     return RISCVISD::VECREDUCE_ADD_VL;
4868   case ISD::VP_REDUCE_UMAX:
4869     return RISCVISD::VECREDUCE_UMAX_VL;
4870   case ISD::VP_REDUCE_SMAX:
4871     return RISCVISD::VECREDUCE_SMAX_VL;
4872   case ISD::VP_REDUCE_UMIN:
4873     return RISCVISD::VECREDUCE_UMIN_VL;
4874   case ISD::VP_REDUCE_SMIN:
4875     return RISCVISD::VECREDUCE_SMIN_VL;
4876   case ISD::VP_REDUCE_AND:
4877     return RISCVISD::VECREDUCE_AND_VL;
4878   case ISD::VP_REDUCE_OR:
4879     return RISCVISD::VECREDUCE_OR_VL;
4880   case ISD::VP_REDUCE_XOR:
4881     return RISCVISD::VECREDUCE_XOR_VL;
4882   case ISD::VP_REDUCE_FADD:
4883     return RISCVISD::VECREDUCE_FADD_VL;
4884   case ISD::VP_REDUCE_SEQ_FADD:
4885     return RISCVISD::VECREDUCE_SEQ_FADD_VL;
4886   case ISD::VP_REDUCE_FMAX:
4887     return RISCVISD::VECREDUCE_FMAX_VL;
4888   case ISD::VP_REDUCE_FMIN:
4889     return RISCVISD::VECREDUCE_FMIN_VL;
4890   }
4891 }
4892 
4893 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
4894                                            SelectionDAG &DAG) const {
4895   SDLoc DL(Op);
4896   SDValue Vec = Op.getOperand(1);
4897   EVT VecEVT = Vec.getValueType();
4898 
4899   // TODO: The type may need to be widened rather than split. Or widened before
4900   // it can be split.
4901   if (!isTypeLegal(VecEVT))
4902     return SDValue();
4903 
4904   MVT VecVT = VecEVT.getSimpleVT();
4905   MVT VecEltVT = VecVT.getVectorElementType();
4906   unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
4907 
4908   MVT ContainerVT = VecVT;
4909   if (VecVT.isFixedLengthVector()) {
4910     ContainerVT = getContainerForFixedLengthVector(VecVT);
4911     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4912   }
4913 
4914   SDValue VL = Op.getOperand(3);
4915   SDValue Mask = Op.getOperand(2);
4916 
4917   MVT M1VT = getLMUL1VT(ContainerVT);
4918   MVT XLenVT = Subtarget.getXLenVT();
4919   MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
4920 
4921   SDValue StartSplat =
4922       lowerScalarSplat(Op.getOperand(0), DAG.getConstant(1, DL, XLenVT), M1VT,
4923                        DL, DAG, Subtarget);
4924   SDValue Reduction =
4925       DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
4926   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
4927                              DAG.getConstant(0, DL, XLenVT));
4928   if (!VecVT.isInteger())
4929     return Elt0;
4930   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
4931 }
4932 
4933 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
4934                                                    SelectionDAG &DAG) const {
4935   SDValue Vec = Op.getOperand(0);
4936   SDValue SubVec = Op.getOperand(1);
4937   MVT VecVT = Vec.getSimpleValueType();
4938   MVT SubVecVT = SubVec.getSimpleValueType();
4939 
4940   SDLoc DL(Op);
4941   MVT XLenVT = Subtarget.getXLenVT();
4942   unsigned OrigIdx = Op.getConstantOperandVal(2);
4943   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
4944 
4945   // We don't have the ability to slide mask vectors up indexed by their i1
4946   // elements; the smallest we can do is i8. Often we are able to bitcast to
4947   // equivalent i8 vectors. Note that when inserting a fixed-length vector
4948   // into a scalable one, we might not necessarily have enough scalable
4949   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
4950   if (SubVecVT.getVectorElementType() == MVT::i1 &&
4951       (OrigIdx != 0 || !Vec.isUndef())) {
4952     if (VecVT.getVectorMinNumElements() >= 8 &&
4953         SubVecVT.getVectorMinNumElements() >= 8) {
4954       assert(OrigIdx % 8 == 0 && "Invalid index");
4955       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
4956              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
4957              "Unexpected mask vector lowering");
4958       OrigIdx /= 8;
4959       SubVecVT =
4960           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
4961                            SubVecVT.isScalableVector());
4962       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
4963                                VecVT.isScalableVector());
4964       Vec = DAG.getBitcast(VecVT, Vec);
4965       SubVec = DAG.getBitcast(SubVecVT, SubVec);
4966     } else {
4967       // We can't slide this mask vector up indexed by its i1 elements.
4968       // This poses a problem when we wish to insert a scalable vector which
4969       // can't be re-expressed as a larger type. Just choose the slow path and
4970       // extend to a larger type, then truncate back down.
4971       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
4972       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
4973       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
4974       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
4975       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
4976                         Op.getOperand(2));
4977       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
4978       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
4979     }
4980   }
4981 
4982   // If the subvector vector is a fixed-length type, we cannot use subregister
4983   // manipulation to simplify the codegen; we don't know which register of a
4984   // LMUL group contains the specific subvector as we only know the minimum
4985   // register size. Therefore we must slide the vector group up the full
4986   // amount.
4987   if (SubVecVT.isFixedLengthVector()) {
4988     if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
4989       return Op;
4990     MVT ContainerVT = VecVT;
4991     if (VecVT.isFixedLengthVector()) {
4992       ContainerVT = getContainerForFixedLengthVector(VecVT);
4993       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4994     }
4995     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
4996                          DAG.getUNDEF(ContainerVT), SubVec,
4997                          DAG.getConstant(0, DL, XLenVT));
4998     if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
4999       SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
5000       return DAG.getBitcast(Op.getValueType(), SubVec);
5001     }
5002     SDValue Mask =
5003         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
5004     // Set the vector length to only the number of elements we care about. Note
5005     // that for slideup this includes the offset.
5006     SDValue VL =
5007         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
5008     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
5009     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
5010                                   SubVec, SlideupAmt, Mask, VL);
5011     if (VecVT.isFixedLengthVector())
5012       Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
5013     return DAG.getBitcast(Op.getValueType(), Slideup);
5014   }
5015 
5016   unsigned SubRegIdx, RemIdx;
5017   std::tie(SubRegIdx, RemIdx) =
5018       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
5019           VecVT, SubVecVT, OrigIdx, TRI);
5020 
5021   RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
5022   bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
5023                          SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
5024                          SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
5025 
5026   // 1. If the Idx has been completely eliminated and this subvector's size is
5027   // a vector register or a multiple thereof, or the surrounding elements are
5028   // undef, then this is a subvector insert which naturally aligns to a vector
5029   // register. These can easily be handled using subregister manipulation.
5030   // 2. If the subvector is smaller than a vector register, then the insertion
5031   // must preserve the undisturbed elements of the register. We do this by
5032   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
5033   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
5034   // subvector within the vector register, and an INSERT_SUBVECTOR of that
5035   // LMUL=1 type back into the larger vector (resolving to another subregister
5036   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
5037   // to avoid allocating a large register group to hold our subvector.
5038   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
5039     return Op;
5040 
5041   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
5042   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
5043   // (in our case undisturbed). This means we can set up a subvector insertion
5044   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
5045   // size of the subvector.
5046   MVT InterSubVT = VecVT;
5047   SDValue AlignedExtract = Vec;
5048   unsigned AlignedIdx = OrigIdx - RemIdx;
5049   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
5050     InterSubVT = getLMUL1VT(VecVT);
5051     // Extract a subvector equal to the nearest full vector register type. This
5052     // should resolve to a EXTRACT_SUBREG instruction.
5053     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
5054                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
5055   }
5056 
5057   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
5058   // For scalable vectors this must be further multiplied by vscale.
5059   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
5060 
5061   SDValue Mask, VL;
5062   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
5063 
5064   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
5065   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
5066   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
5067   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
5068 
5069   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
5070                        DAG.getUNDEF(InterSubVT), SubVec,
5071                        DAG.getConstant(0, DL, XLenVT));
5072 
5073   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
5074                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
5075 
5076   // If required, insert this subvector back into the correct vector register.
5077   // This should resolve to an INSERT_SUBREG instruction.
5078   if (VecVT.bitsGT(InterSubVT))
5079     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
5080                           DAG.getConstant(AlignedIdx, DL, XLenVT));
5081 
5082   // We might have bitcast from a mask type: cast back to the original type if
5083   // required.
5084   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
5085 }
5086 
5087 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
5088                                                     SelectionDAG &DAG) const {
5089   SDValue Vec = Op.getOperand(0);
5090   MVT SubVecVT = Op.getSimpleValueType();
5091   MVT VecVT = Vec.getSimpleValueType();
5092 
5093   SDLoc DL(Op);
5094   MVT XLenVT = Subtarget.getXLenVT();
5095   unsigned OrigIdx = Op.getConstantOperandVal(1);
5096   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
5097 
5098   // We don't have the ability to slide mask vectors down indexed by their i1
5099   // elements; the smallest we can do is i8. Often we are able to bitcast to
5100   // equivalent i8 vectors. Note that when extracting a fixed-length vector
5101   // from a scalable one, we might not necessarily have enough scalable
5102   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
5103   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
5104     if (VecVT.getVectorMinNumElements() >= 8 &&
5105         SubVecVT.getVectorMinNumElements() >= 8) {
5106       assert(OrigIdx % 8 == 0 && "Invalid index");
5107       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
5108              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
5109              "Unexpected mask vector lowering");
5110       OrigIdx /= 8;
5111       SubVecVT =
5112           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
5113                            SubVecVT.isScalableVector());
5114       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
5115                                VecVT.isScalableVector());
5116       Vec = DAG.getBitcast(VecVT, Vec);
5117     } else {
5118       // We can't slide this mask vector down, indexed by its i1 elements.
5119       // This poses a problem when we wish to extract a scalable vector which
5120       // can't be re-expressed as a larger type. Just choose the slow path and
5121       // extend to a larger type, then truncate back down.
5122       // TODO: We could probably improve this when extracting certain fixed
5123       // from fixed, where we can extract as i8 and shift the correct element
5124       // right to reach the desired subvector?
5125       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
5126       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
5127       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
5128       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
5129                         Op.getOperand(1));
5130       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
5131       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
5132     }
5133   }
5134 
5135   // If the subvector vector is a fixed-length type, we cannot use subregister
5136   // manipulation to simplify the codegen; we don't know which register of a
5137   // LMUL group contains the specific subvector as we only know the minimum
5138   // register size. Therefore we must slide the vector group down the full
5139   // amount.
5140   if (SubVecVT.isFixedLengthVector()) {
5141     // With an index of 0 this is a cast-like subvector, which can be performed
5142     // with subregister operations.
5143     if (OrigIdx == 0)
5144       return Op;
5145     MVT ContainerVT = VecVT;
5146     if (VecVT.isFixedLengthVector()) {
5147       ContainerVT = getContainerForFixedLengthVector(VecVT);
5148       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5149     }
5150     SDValue Mask =
5151         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
5152     // Set the vector length to only the number of elements we care about. This
5153     // avoids sliding down elements we're going to discard straight away.
5154     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
5155     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
5156     SDValue Slidedown =
5157         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
5158                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
5159     // Now we can use a cast-like subvector extract to get the result.
5160     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
5161                             DAG.getConstant(0, DL, XLenVT));
5162     return DAG.getBitcast(Op.getValueType(), Slidedown);
5163   }
5164 
5165   unsigned SubRegIdx, RemIdx;
5166   std::tie(SubRegIdx, RemIdx) =
5167       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
5168           VecVT, SubVecVT, OrigIdx, TRI);
5169 
5170   // If the Idx has been completely eliminated then this is a subvector extract
5171   // which naturally aligns to a vector register. These can easily be handled
5172   // using subregister manipulation.
5173   if (RemIdx == 0)
5174     return Op;
5175 
5176   // Else we must shift our vector register directly to extract the subvector.
5177   // Do this using VSLIDEDOWN.
5178 
5179   // If the vector type is an LMUL-group type, extract a subvector equal to the
5180   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
5181   // instruction.
5182   MVT InterSubVT = VecVT;
5183   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
5184     InterSubVT = getLMUL1VT(VecVT);
5185     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
5186                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
5187   }
5188 
5189   // Slide this vector register down by the desired number of elements in order
5190   // to place the desired subvector starting at element 0.
5191   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
5192   // For scalable vectors this must be further multiplied by vscale.
5193   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
5194 
5195   SDValue Mask, VL;
5196   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
5197   SDValue Slidedown =
5198       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
5199                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
5200 
5201   // Now the vector is in the right position, extract our final subvector. This
5202   // should resolve to a COPY.
5203   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
5204                           DAG.getConstant(0, DL, XLenVT));
5205 
5206   // We might have bitcast from a mask type: cast back to the original type if
5207   // required.
5208   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
5209 }
5210 
5211 // Lower step_vector to the vid instruction. Any non-identity step value must
5212 // be accounted for my manual expansion.
5213 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
5214                                               SelectionDAG &DAG) const {
5215   SDLoc DL(Op);
5216   MVT VT = Op.getSimpleValueType();
5217   MVT XLenVT = Subtarget.getXLenVT();
5218   SDValue Mask, VL;
5219   std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
5220   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
5221   uint64_t StepValImm = Op.getConstantOperandVal(0);
5222   if (StepValImm != 1) {
5223     if (isPowerOf2_64(StepValImm)) {
5224       SDValue StepVal =
5225           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
5226                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
5227       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
5228     } else {
5229       SDValue StepVal = lowerScalarSplat(
5230           DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT,
5231           DL, DAG, Subtarget);
5232       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
5233     }
5234   }
5235   return StepVec;
5236 }
5237 
5238 // Implement vector_reverse using vrgather.vv with indices determined by
5239 // subtracting the id of each element from (VLMAX-1). This will convert
5240 // the indices like so:
5241 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
5242 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
5243 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
5244                                                  SelectionDAG &DAG) const {
5245   SDLoc DL(Op);
5246   MVT VecVT = Op.getSimpleValueType();
5247   unsigned EltSize = VecVT.getScalarSizeInBits();
5248   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
5249 
5250   unsigned MaxVLMAX = 0;
5251   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
5252   if (VectorBitsMax != 0)
5253     MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
5254 
5255   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
5256   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
5257 
5258   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
5259   // to use vrgatherei16.vv.
5260   // TODO: It's also possible to use vrgatherei16.vv for other types to
5261   // decrease register width for the index calculation.
5262   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
5263     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
5264     // Reverse each half, then reassemble them in reverse order.
5265     // NOTE: It's also possible that after splitting that VLMAX no longer
5266     // requires vrgatherei16.vv.
5267     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
5268       SDValue Lo, Hi;
5269       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
5270       EVT LoVT, HiVT;
5271       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
5272       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
5273       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
5274       // Reassemble the low and high pieces reversed.
5275       // FIXME: This is a CONCAT_VECTORS.
5276       SDValue Res =
5277           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
5278                       DAG.getIntPtrConstant(0, DL));
5279       return DAG.getNode(
5280           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
5281           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
5282     }
5283 
5284     // Just promote the int type to i16 which will double the LMUL.
5285     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
5286     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
5287   }
5288 
5289   MVT XLenVT = Subtarget.getXLenVT();
5290   SDValue Mask, VL;
5291   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
5292 
5293   // Calculate VLMAX-1 for the desired SEW.
5294   unsigned MinElts = VecVT.getVectorMinNumElements();
5295   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
5296                               DAG.getConstant(MinElts, DL, XLenVT));
5297   SDValue VLMinus1 =
5298       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
5299 
5300   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
5301   bool IsRV32E64 =
5302       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
5303   SDValue SplatVL;
5304   if (!IsRV32E64)
5305     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
5306   else
5307     SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
5308 
5309   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
5310   SDValue Indices =
5311       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
5312 
5313   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
5314 }
5315 
5316 SDValue
5317 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
5318                                                      SelectionDAG &DAG) const {
5319   SDLoc DL(Op);
5320   auto *Load = cast<LoadSDNode>(Op);
5321 
5322   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5323                                         Load->getMemoryVT(),
5324                                         *Load->getMemOperand()) &&
5325          "Expecting a correctly-aligned load");
5326 
5327   MVT VT = Op.getSimpleValueType();
5328   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5329 
5330   SDValue VL =
5331       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
5332 
5333   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5334   SDValue NewLoad = DAG.getMemIntrinsicNode(
5335       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
5336       Load->getMemoryVT(), Load->getMemOperand());
5337 
5338   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5339   return DAG.getMergeValues({Result, Load->getChain()}, DL);
5340 }
5341 
5342 SDValue
5343 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
5344                                                       SelectionDAG &DAG) const {
5345   SDLoc DL(Op);
5346   auto *Store = cast<StoreSDNode>(Op);
5347 
5348   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5349                                         Store->getMemoryVT(),
5350                                         *Store->getMemOperand()) &&
5351          "Expecting a correctly-aligned store");
5352 
5353   SDValue StoreVal = Store->getValue();
5354   MVT VT = StoreVal.getSimpleValueType();
5355 
5356   // If the size less than a byte, we need to pad with zeros to make a byte.
5357   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
5358     VT = MVT::v8i1;
5359     StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
5360                            DAG.getConstant(0, DL, VT), StoreVal,
5361                            DAG.getIntPtrConstant(0, DL));
5362   }
5363 
5364   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5365 
5366   SDValue VL =
5367       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
5368 
5369   SDValue NewValue =
5370       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
5371   return DAG.getMemIntrinsicNode(
5372       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
5373       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
5374       Store->getMemoryVT(), Store->getMemOperand());
5375 }
5376 
5377 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
5378                                              SelectionDAG &DAG) const {
5379   SDLoc DL(Op);
5380   MVT VT = Op.getSimpleValueType();
5381 
5382   const auto *MemSD = cast<MemSDNode>(Op);
5383   EVT MemVT = MemSD->getMemoryVT();
5384   MachineMemOperand *MMO = MemSD->getMemOperand();
5385   SDValue Chain = MemSD->getChain();
5386   SDValue BasePtr = MemSD->getBasePtr();
5387 
5388   SDValue Mask, PassThru, VL;
5389   if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
5390     Mask = VPLoad->getMask();
5391     PassThru = DAG.getUNDEF(VT);
5392     VL = VPLoad->getVectorLength();
5393   } else {
5394     const auto *MLoad = cast<MaskedLoadSDNode>(Op);
5395     Mask = MLoad->getMask();
5396     PassThru = MLoad->getPassThru();
5397   }
5398 
5399   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5400 
5401   MVT XLenVT = Subtarget.getXLenVT();
5402 
5403   MVT ContainerVT = VT;
5404   if (VT.isFixedLengthVector()) {
5405     ContainerVT = getContainerForFixedLengthVector(VT);
5406     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
5407     if (!IsUnmasked) {
5408       MVT MaskVT =
5409           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5410       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5411     }
5412   }
5413 
5414   if (!VL)
5415     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5416 
5417   unsigned IntID =
5418       IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
5419   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5420   if (!IsUnmasked)
5421     Ops.push_back(PassThru);
5422   Ops.push_back(BasePtr);
5423   if (!IsUnmasked)
5424     Ops.push_back(Mask);
5425   Ops.push_back(VL);
5426   if (!IsUnmasked)
5427     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
5428 
5429   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5430 
5431   SDValue Result =
5432       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
5433   Chain = Result.getValue(1);
5434 
5435   if (VT.isFixedLengthVector())
5436     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
5437 
5438   return DAG.getMergeValues({Result, Chain}, DL);
5439 }
5440 
5441 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
5442                                               SelectionDAG &DAG) const {
5443   SDLoc DL(Op);
5444 
5445   const auto *MemSD = cast<MemSDNode>(Op);
5446   EVT MemVT = MemSD->getMemoryVT();
5447   MachineMemOperand *MMO = MemSD->getMemOperand();
5448   SDValue Chain = MemSD->getChain();
5449   SDValue BasePtr = MemSD->getBasePtr();
5450   SDValue Val, Mask, VL;
5451 
5452   if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
5453     Val = VPStore->getValue();
5454     Mask = VPStore->getMask();
5455     VL = VPStore->getVectorLength();
5456   } else {
5457     const auto *MStore = cast<MaskedStoreSDNode>(Op);
5458     Val = MStore->getValue();
5459     Mask = MStore->getMask();
5460   }
5461 
5462   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5463 
5464   MVT VT = Val.getSimpleValueType();
5465   MVT XLenVT = Subtarget.getXLenVT();
5466 
5467   MVT ContainerVT = VT;
5468   if (VT.isFixedLengthVector()) {
5469     ContainerVT = getContainerForFixedLengthVector(VT);
5470 
5471     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
5472     if (!IsUnmasked) {
5473       MVT MaskVT =
5474           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5475       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5476     }
5477   }
5478 
5479   if (!VL)
5480     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5481 
5482   unsigned IntID =
5483       IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
5484   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5485   Ops.push_back(Val);
5486   Ops.push_back(BasePtr);
5487   if (!IsUnmasked)
5488     Ops.push_back(Mask);
5489   Ops.push_back(VL);
5490 
5491   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
5492                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
5493 }
5494 
5495 SDValue
5496 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
5497                                                       SelectionDAG &DAG) const {
5498   MVT InVT = Op.getOperand(0).getSimpleValueType();
5499   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
5500 
5501   MVT VT = Op.getSimpleValueType();
5502 
5503   SDValue Op1 =
5504       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
5505   SDValue Op2 =
5506       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
5507 
5508   SDLoc DL(Op);
5509   SDValue VL =
5510       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
5511 
5512   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5513   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
5514 
5515   SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
5516                             Op.getOperand(2), Mask, VL);
5517 
5518   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
5519 }
5520 
5521 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
5522     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
5523   MVT VT = Op.getSimpleValueType();
5524 
5525   if (VT.getVectorElementType() == MVT::i1)
5526     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
5527 
5528   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
5529 }
5530 
5531 SDValue
5532 RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op,
5533                                                       SelectionDAG &DAG) const {
5534   unsigned Opc;
5535   switch (Op.getOpcode()) {
5536   default: llvm_unreachable("Unexpected opcode!");
5537   case ISD::SHL: Opc = RISCVISD::SHL_VL; break;
5538   case ISD::SRA: Opc = RISCVISD::SRA_VL; break;
5539   case ISD::SRL: Opc = RISCVISD::SRL_VL; break;
5540   }
5541 
5542   return lowerToScalableOp(Op, DAG, Opc);
5543 }
5544 
5545 // Lower vector ABS to smax(X, sub(0, X)).
5546 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
5547   SDLoc DL(Op);
5548   MVT VT = Op.getSimpleValueType();
5549   SDValue X = Op.getOperand(0);
5550 
5551   assert(VT.isFixedLengthVector() && "Unexpected type");
5552 
5553   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5554   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5555 
5556   SDValue Mask, VL;
5557   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5558 
5559   SDValue SplatZero =
5560       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
5561                   DAG.getConstant(0, DL, Subtarget.getXLenVT()));
5562   SDValue NegX =
5563       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
5564   SDValue Max =
5565       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
5566 
5567   return convertFromScalableVector(VT, Max, DAG, Subtarget);
5568 }
5569 
5570 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
5571     SDValue Op, SelectionDAG &DAG) const {
5572   SDLoc DL(Op);
5573   MVT VT = Op.getSimpleValueType();
5574   SDValue Mag = Op.getOperand(0);
5575   SDValue Sign = Op.getOperand(1);
5576   assert(Mag.getValueType() == Sign.getValueType() &&
5577          "Can only handle COPYSIGN with matching types.");
5578 
5579   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5580   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
5581   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
5582 
5583   SDValue Mask, VL;
5584   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5585 
5586   SDValue CopySign =
5587       DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
5588 
5589   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
5590 }
5591 
5592 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
5593     SDValue Op, SelectionDAG &DAG) const {
5594   MVT VT = Op.getSimpleValueType();
5595   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5596 
5597   MVT I1ContainerVT =
5598       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5599 
5600   SDValue CC =
5601       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
5602   SDValue Op1 =
5603       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
5604   SDValue Op2 =
5605       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
5606 
5607   SDLoc DL(Op);
5608   SDValue Mask, VL;
5609   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5610 
5611   SDValue Select =
5612       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
5613 
5614   return convertFromScalableVector(VT, Select, DAG, Subtarget);
5615 }
5616 
5617 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
5618                                                unsigned NewOpc,
5619                                                bool HasMask) const {
5620   MVT VT = Op.getSimpleValueType();
5621   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5622 
5623   // Create list of operands by converting existing ones to scalable types.
5624   SmallVector<SDValue, 6> Ops;
5625   for (const SDValue &V : Op->op_values()) {
5626     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
5627 
5628     // Pass through non-vector operands.
5629     if (!V.getValueType().isVector()) {
5630       Ops.push_back(V);
5631       continue;
5632     }
5633 
5634     // "cast" fixed length vector to a scalable vector.
5635     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
5636            "Only fixed length vectors are supported!");
5637     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
5638   }
5639 
5640   SDLoc DL(Op);
5641   SDValue Mask, VL;
5642   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5643   if (HasMask)
5644     Ops.push_back(Mask);
5645   Ops.push_back(VL);
5646 
5647   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
5648   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
5649 }
5650 
5651 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
5652 // * Operands of each node are assumed to be in the same order.
5653 // * The EVL operand is promoted from i32 to i64 on RV64.
5654 // * Fixed-length vectors are converted to their scalable-vector container
5655 //   types.
5656 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
5657                                        unsigned RISCVISDOpc) const {
5658   SDLoc DL(Op);
5659   MVT VT = Op.getSimpleValueType();
5660   SmallVector<SDValue, 4> Ops;
5661 
5662   for (const auto &OpIdx : enumerate(Op->ops())) {
5663     SDValue V = OpIdx.value();
5664     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
5665     // Pass through operands which aren't fixed-length vectors.
5666     if (!V.getValueType().isFixedLengthVector()) {
5667       Ops.push_back(V);
5668       continue;
5669     }
5670     // "cast" fixed length vector to a scalable vector.
5671     MVT OpVT = V.getSimpleValueType();
5672     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
5673     assert(useRVVForFixedLengthVectorVT(OpVT) &&
5674            "Only fixed length vectors are supported!");
5675     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
5676   }
5677 
5678   if (!VT.isFixedLengthVector())
5679     return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
5680 
5681   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5682 
5683   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
5684 
5685   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
5686 }
5687 
5688 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
5689                                             unsigned MaskOpc,
5690                                             unsigned VecOpc) const {
5691   MVT VT = Op.getSimpleValueType();
5692   if (VT.getVectorElementType() != MVT::i1)
5693     return lowerVPOp(Op, DAG, VecOpc);
5694 
5695   // It is safe to drop mask parameter as masked-off elements are undef.
5696   SDValue Op1 = Op->getOperand(0);
5697   SDValue Op2 = Op->getOperand(1);
5698   SDValue VL = Op->getOperand(3);
5699 
5700   MVT ContainerVT = VT;
5701   const bool IsFixed = VT.isFixedLengthVector();
5702   if (IsFixed) {
5703     ContainerVT = getContainerForFixedLengthVector(VT);
5704     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5705     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
5706   }
5707 
5708   SDLoc DL(Op);
5709   SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
5710   if (!IsFixed)
5711     return Val;
5712   return convertFromScalableVector(VT, Val, DAG, Subtarget);
5713 }
5714 
5715 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
5716 // matched to a RVV indexed load. The RVV indexed load instructions only
5717 // support the "unsigned unscaled" addressing mode; indices are implicitly
5718 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
5719 // signed or scaled indexing is extended to the XLEN value type and scaled
5720 // accordingly.
5721 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
5722                                                SelectionDAG &DAG) const {
5723   SDLoc DL(Op);
5724   MVT VT = Op.getSimpleValueType();
5725 
5726   const auto *MemSD = cast<MemSDNode>(Op.getNode());
5727   EVT MemVT = MemSD->getMemoryVT();
5728   MachineMemOperand *MMO = MemSD->getMemOperand();
5729   SDValue Chain = MemSD->getChain();
5730   SDValue BasePtr = MemSD->getBasePtr();
5731 
5732   ISD::LoadExtType LoadExtType;
5733   SDValue Index, Mask, PassThru, VL;
5734 
5735   if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
5736     Index = VPGN->getIndex();
5737     Mask = VPGN->getMask();
5738     PassThru = DAG.getUNDEF(VT);
5739     VL = VPGN->getVectorLength();
5740     // VP doesn't support extending loads.
5741     LoadExtType = ISD::NON_EXTLOAD;
5742   } else {
5743     // Else it must be a MGATHER.
5744     auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
5745     Index = MGN->getIndex();
5746     Mask = MGN->getMask();
5747     PassThru = MGN->getPassThru();
5748     LoadExtType = MGN->getExtensionType();
5749   }
5750 
5751   MVT IndexVT = Index.getSimpleValueType();
5752   MVT XLenVT = Subtarget.getXLenVT();
5753 
5754   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
5755          "Unexpected VTs!");
5756   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
5757   // Targets have to explicitly opt-in for extending vector loads.
5758   assert(LoadExtType == ISD::NON_EXTLOAD &&
5759          "Unexpected extending MGATHER/VP_GATHER");
5760   (void)LoadExtType;
5761 
5762   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
5763   // the selection of the masked intrinsics doesn't do this for us.
5764   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5765 
5766   MVT ContainerVT = VT;
5767   if (VT.isFixedLengthVector()) {
5768     // We need to use the larger of the result and index type to determine the
5769     // scalable type to use so we don't increase LMUL for any operand/result.
5770     if (VT.bitsGE(IndexVT)) {
5771       ContainerVT = getContainerForFixedLengthVector(VT);
5772       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
5773                                  ContainerVT.getVectorElementCount());
5774     } else {
5775       IndexVT = getContainerForFixedLengthVector(IndexVT);
5776       ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
5777                                      IndexVT.getVectorElementCount());
5778     }
5779 
5780     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
5781 
5782     if (!IsUnmasked) {
5783       MVT MaskVT =
5784           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5785       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5786       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
5787     }
5788   }
5789 
5790   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
5791       IndexVT = IndexVT.changeVectorElementType(XLenVT);
5792       Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
5793   }
5794 
5795   if (!VL)
5796     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5797 
5798   unsigned IntID =
5799       IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
5800   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5801   if (!IsUnmasked)
5802     Ops.push_back(PassThru);
5803   Ops.push_back(BasePtr);
5804   Ops.push_back(Index);
5805   if (!IsUnmasked)
5806     Ops.push_back(Mask);
5807   Ops.push_back(VL);
5808   if (!IsUnmasked)
5809     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
5810 
5811   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5812   SDValue Result =
5813       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
5814   Chain = Result.getValue(1);
5815 
5816   if (VT.isFixedLengthVector())
5817     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
5818 
5819   return DAG.getMergeValues({Result, Chain}, DL);
5820 }
5821 
5822 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
5823 // matched to a RVV indexed store. The RVV indexed store instructions only
5824 // support the "unsigned unscaled" addressing mode; indices are implicitly
5825 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
5826 // signed or scaled indexing is extended to the XLEN value type and scaled
5827 // accordingly.
5828 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
5829                                                 SelectionDAG &DAG) const {
5830   SDLoc DL(Op);
5831   const auto *MemSD = cast<MemSDNode>(Op.getNode());
5832   EVT MemVT = MemSD->getMemoryVT();
5833   MachineMemOperand *MMO = MemSD->getMemOperand();
5834   SDValue Chain = MemSD->getChain();
5835   SDValue BasePtr = MemSD->getBasePtr();
5836 
5837   bool IsTruncatingStore = false;
5838   SDValue Index, Mask, Val, VL;
5839 
5840   if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
5841     Index = VPSN->getIndex();
5842     Mask = VPSN->getMask();
5843     Val = VPSN->getValue();
5844     VL = VPSN->getVectorLength();
5845     // VP doesn't support truncating stores.
5846     IsTruncatingStore = false;
5847   } else {
5848     // Else it must be a MSCATTER.
5849     auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
5850     Index = MSN->getIndex();
5851     Mask = MSN->getMask();
5852     Val = MSN->getValue();
5853     IsTruncatingStore = MSN->isTruncatingStore();
5854   }
5855 
5856   MVT VT = Val.getSimpleValueType();
5857   MVT IndexVT = Index.getSimpleValueType();
5858   MVT XLenVT = Subtarget.getXLenVT();
5859 
5860   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
5861          "Unexpected VTs!");
5862   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
5863   // Targets have to explicitly opt-in for extending vector loads and
5864   // truncating vector stores.
5865   assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
5866   (void)IsTruncatingStore;
5867 
5868   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
5869   // the selection of the masked intrinsics doesn't do this for us.
5870   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5871 
5872   MVT ContainerVT = VT;
5873   if (VT.isFixedLengthVector()) {
5874     // We need to use the larger of the value and index type to determine the
5875     // scalable type to use so we don't increase LMUL for any operand/result.
5876     if (VT.bitsGE(IndexVT)) {
5877       ContainerVT = getContainerForFixedLengthVector(VT);
5878       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
5879                                  ContainerVT.getVectorElementCount());
5880     } else {
5881       IndexVT = getContainerForFixedLengthVector(IndexVT);
5882       ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
5883                                      IndexVT.getVectorElementCount());
5884     }
5885 
5886     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
5887     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
5888 
5889     if (!IsUnmasked) {
5890       MVT MaskVT =
5891           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5892       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5893     }
5894   }
5895 
5896   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
5897       IndexVT = IndexVT.changeVectorElementType(XLenVT);
5898       Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
5899   }
5900 
5901   if (!VL)
5902     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5903 
5904   unsigned IntID =
5905       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
5906   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5907   Ops.push_back(Val);
5908   Ops.push_back(BasePtr);
5909   Ops.push_back(Index);
5910   if (!IsUnmasked)
5911     Ops.push_back(Mask);
5912   Ops.push_back(VL);
5913 
5914   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
5915                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
5916 }
5917 
5918 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
5919                                                SelectionDAG &DAG) const {
5920   const MVT XLenVT = Subtarget.getXLenVT();
5921   SDLoc DL(Op);
5922   SDValue Chain = Op->getOperand(0);
5923   SDValue SysRegNo = DAG.getTargetConstant(
5924       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
5925   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
5926   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
5927 
5928   // Encoding used for rounding mode in RISCV differs from that used in
5929   // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
5930   // table, which consists of a sequence of 4-bit fields, each representing
5931   // corresponding FLT_ROUNDS mode.
5932   static const int Table =
5933       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
5934       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
5935       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
5936       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
5937       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
5938 
5939   SDValue Shift =
5940       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
5941   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
5942                                 DAG.getConstant(Table, DL, XLenVT), Shift);
5943   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
5944                                DAG.getConstant(7, DL, XLenVT));
5945 
5946   return DAG.getMergeValues({Masked, Chain}, DL);
5947 }
5948 
5949 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
5950                                                SelectionDAG &DAG) const {
5951   const MVT XLenVT = Subtarget.getXLenVT();
5952   SDLoc DL(Op);
5953   SDValue Chain = Op->getOperand(0);
5954   SDValue RMValue = Op->getOperand(1);
5955   SDValue SysRegNo = DAG.getTargetConstant(
5956       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
5957 
5958   // Encoding used for rounding mode in RISCV differs from that used in
5959   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
5960   // a table, which consists of a sequence of 4-bit fields, each representing
5961   // corresponding RISCV mode.
5962   static const unsigned Table =
5963       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
5964       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
5965       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
5966       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
5967       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
5968 
5969   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
5970                               DAG.getConstant(2, DL, XLenVT));
5971   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
5972                                 DAG.getConstant(Table, DL, XLenVT), Shift);
5973   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
5974                         DAG.getConstant(0x7, DL, XLenVT));
5975   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
5976                      RMValue);
5977 }
5978 
5979 static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) {
5980   switch (IntNo) {
5981   default:
5982     llvm_unreachable("Unexpected Intrinsic");
5983   case Intrinsic::riscv_grev:
5984     return RISCVISD::GREVW;
5985   case Intrinsic::riscv_gorc:
5986     return RISCVISD::GORCW;
5987   case Intrinsic::riscv_bcompress:
5988     return RISCVISD::BCOMPRESSW;
5989   case Intrinsic::riscv_bdecompress:
5990     return RISCVISD::BDECOMPRESSW;
5991   case Intrinsic::riscv_bfp:
5992     return RISCVISD::BFPW;
5993   case Intrinsic::riscv_fsl:
5994     return RISCVISD::FSLW;
5995   case Intrinsic::riscv_fsr:
5996     return RISCVISD::FSRW;
5997   }
5998 }
5999 
6000 // Converts the given intrinsic to a i64 operation with any extension.
6001 static SDValue customLegalizeToWOpByIntr(SDNode *N, SelectionDAG &DAG,
6002                                          unsigned IntNo) {
6003   SDLoc DL(N);
6004   RISCVISD::NodeType WOpcode = getRISCVWOpcodeByIntr(IntNo);
6005   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6006   SDValue NewOp2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6007   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp1, NewOp2);
6008   // ReplaceNodeResults requires we maintain the same type for the return value.
6009   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
6010 }
6011 
6012 // Returns the opcode of the target-specific SDNode that implements the 32-bit
6013 // form of the given Opcode.
6014 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
6015   switch (Opcode) {
6016   default:
6017     llvm_unreachable("Unexpected opcode");
6018   case ISD::SHL:
6019     return RISCVISD::SLLW;
6020   case ISD::SRA:
6021     return RISCVISD::SRAW;
6022   case ISD::SRL:
6023     return RISCVISD::SRLW;
6024   case ISD::SDIV:
6025     return RISCVISD::DIVW;
6026   case ISD::UDIV:
6027     return RISCVISD::DIVUW;
6028   case ISD::UREM:
6029     return RISCVISD::REMUW;
6030   case ISD::ROTL:
6031     return RISCVISD::ROLW;
6032   case ISD::ROTR:
6033     return RISCVISD::RORW;
6034   case RISCVISD::GREV:
6035     return RISCVISD::GREVW;
6036   case RISCVISD::GORC:
6037     return RISCVISD::GORCW;
6038   }
6039 }
6040 
6041 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
6042 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
6043 // otherwise be promoted to i64, making it difficult to select the
6044 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
6045 // type i8/i16/i32 is lost.
6046 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
6047                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
6048   SDLoc DL(N);
6049   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
6050   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
6051   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
6052   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
6053   // ReplaceNodeResults requires we maintain the same type for the return value.
6054   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
6055 }
6056 
6057 // Converts the given 32-bit operation to a i64 operation with signed extension
6058 // semantic to reduce the signed extension instructions.
6059 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
6060   SDLoc DL(N);
6061   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6062   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6063   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
6064   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
6065                                DAG.getValueType(MVT::i32));
6066   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
6067 }
6068 
6069 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
6070                                              SmallVectorImpl<SDValue> &Results,
6071                                              SelectionDAG &DAG) const {
6072   SDLoc DL(N);
6073   switch (N->getOpcode()) {
6074   default:
6075     llvm_unreachable("Don't know how to custom type legalize this operation!");
6076   case ISD::STRICT_FP_TO_SINT:
6077   case ISD::STRICT_FP_TO_UINT:
6078   case ISD::FP_TO_SINT:
6079   case ISD::FP_TO_UINT: {
6080     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6081            "Unexpected custom legalisation");
6082     bool IsStrict = N->isStrictFPOpcode();
6083     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
6084                     N->getOpcode() == ISD::STRICT_FP_TO_SINT;
6085     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
6086     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
6087         TargetLowering::TypeSoftenFloat) {
6088       if (!isTypeLegal(Op0.getValueType()))
6089         return;
6090       if (IsStrict) {
6091         unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
6092                                 : RISCVISD::STRICT_FCVT_WU_RV64;
6093         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
6094         SDValue Res = DAG.getNode(
6095             Opc, DL, VTs, N->getOperand(0), Op0,
6096             DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
6097         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6098         Results.push_back(Res.getValue(1));
6099         return;
6100       }
6101       unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
6102       SDValue Res =
6103           DAG.getNode(Opc, DL, MVT::i64, Op0,
6104                       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
6105       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6106       return;
6107     }
6108     // If the FP type needs to be softened, emit a library call using the 'si'
6109     // version. If we left it to default legalization we'd end up with 'di'. If
6110     // the FP type doesn't need to be softened just let generic type
6111     // legalization promote the result type.
6112     RTLIB::Libcall LC;
6113     if (IsSigned)
6114       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
6115     else
6116       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
6117     MakeLibCallOptions CallOptions;
6118     EVT OpVT = Op0.getValueType();
6119     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
6120     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
6121     SDValue Result;
6122     std::tie(Result, Chain) =
6123         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
6124     Results.push_back(Result);
6125     if (IsStrict)
6126       Results.push_back(Chain);
6127     break;
6128   }
6129   case ISD::READCYCLECOUNTER: {
6130     assert(!Subtarget.is64Bit() &&
6131            "READCYCLECOUNTER only has custom type legalization on riscv32");
6132 
6133     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
6134     SDValue RCW =
6135         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
6136 
6137     Results.push_back(
6138         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
6139     Results.push_back(RCW.getValue(2));
6140     break;
6141   }
6142   case ISD::MUL: {
6143     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
6144     unsigned XLen = Subtarget.getXLen();
6145     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
6146     if (Size > XLen) {
6147       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
6148       SDValue LHS = N->getOperand(0);
6149       SDValue RHS = N->getOperand(1);
6150       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
6151 
6152       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
6153       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
6154       // We need exactly one side to be unsigned.
6155       if (LHSIsU == RHSIsU)
6156         return;
6157 
6158       auto MakeMULPair = [&](SDValue S, SDValue U) {
6159         MVT XLenVT = Subtarget.getXLenVT();
6160         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
6161         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
6162         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
6163         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
6164         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
6165       };
6166 
6167       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
6168       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
6169 
6170       // The other operand should be signed, but still prefer MULH when
6171       // possible.
6172       if (RHSIsU && LHSIsS && !RHSIsS)
6173         Results.push_back(MakeMULPair(LHS, RHS));
6174       else if (LHSIsU && RHSIsS && !LHSIsS)
6175         Results.push_back(MakeMULPair(RHS, LHS));
6176 
6177       return;
6178     }
6179     LLVM_FALLTHROUGH;
6180   }
6181   case ISD::ADD:
6182   case ISD::SUB:
6183     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6184            "Unexpected custom legalisation");
6185     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
6186     break;
6187   case ISD::SHL:
6188   case ISD::SRA:
6189   case ISD::SRL:
6190     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6191            "Unexpected custom legalisation");
6192     if (N->getOperand(1).getOpcode() != ISD::Constant) {
6193       Results.push_back(customLegalizeToWOp(N, DAG));
6194       break;
6195     }
6196 
6197     // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
6198     // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
6199     // shift amount.
6200     if (N->getOpcode() == ISD::SHL) {
6201       SDLoc DL(N);
6202       SDValue NewOp0 =
6203           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6204       SDValue NewOp1 =
6205           DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
6206       SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
6207       SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
6208                                    DAG.getValueType(MVT::i32));
6209       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6210     }
6211 
6212     break;
6213   case ISD::ROTL:
6214   case ISD::ROTR:
6215     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6216            "Unexpected custom legalisation");
6217     Results.push_back(customLegalizeToWOp(N, DAG));
6218     break;
6219   case ISD::CTTZ:
6220   case ISD::CTTZ_ZERO_UNDEF:
6221   case ISD::CTLZ:
6222   case ISD::CTLZ_ZERO_UNDEF: {
6223     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6224            "Unexpected custom legalisation");
6225 
6226     SDValue NewOp0 =
6227         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6228     bool IsCTZ =
6229         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
6230     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
6231     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
6232     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6233     return;
6234   }
6235   case ISD::SDIV:
6236   case ISD::UDIV:
6237   case ISD::UREM: {
6238     MVT VT = N->getSimpleValueType(0);
6239     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
6240            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
6241            "Unexpected custom legalisation");
6242     // Don't promote division/remainder by constant since we should expand those
6243     // to multiply by magic constant.
6244     // FIXME: What if the expansion is disabled for minsize.
6245     if (N->getOperand(1).getOpcode() == ISD::Constant)
6246       return;
6247 
6248     // If the input is i32, use ANY_EXTEND since the W instructions don't read
6249     // the upper 32 bits. For other types we need to sign or zero extend
6250     // based on the opcode.
6251     unsigned ExtOpc = ISD::ANY_EXTEND;
6252     if (VT != MVT::i32)
6253       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
6254                                            : ISD::ZERO_EXTEND;
6255 
6256     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
6257     break;
6258   }
6259   case ISD::UADDO:
6260   case ISD::USUBO: {
6261     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6262            "Unexpected custom legalisation");
6263     bool IsAdd = N->getOpcode() == ISD::UADDO;
6264     // Create an ADDW or SUBW.
6265     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6266     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6267     SDValue Res =
6268         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
6269     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
6270                       DAG.getValueType(MVT::i32));
6271 
6272     // Sign extend the LHS and perform an unsigned compare with the ADDW result.
6273     // Since the inputs are sign extended from i32, this is equivalent to
6274     // comparing the lower 32 bits.
6275     LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
6276     SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
6277                                     IsAdd ? ISD::SETULT : ISD::SETUGT);
6278 
6279     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6280     Results.push_back(Overflow);
6281     return;
6282   }
6283   case ISD::UADDSAT:
6284   case ISD::USUBSAT: {
6285     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6286            "Unexpected custom legalisation");
6287     if (Subtarget.hasStdExtZbb()) {
6288       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
6289       // sign extend allows overflow of the lower 32 bits to be detected on
6290       // the promoted size.
6291       SDValue LHS =
6292           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
6293       SDValue RHS =
6294           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
6295       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
6296       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6297       return;
6298     }
6299 
6300     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
6301     // promotion for UADDO/USUBO.
6302     Results.push_back(expandAddSubSat(N, DAG));
6303     return;
6304   }
6305   case ISD::BITCAST: {
6306     EVT VT = N->getValueType(0);
6307     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
6308     SDValue Op0 = N->getOperand(0);
6309     EVT Op0VT = Op0.getValueType();
6310     MVT XLenVT = Subtarget.getXLenVT();
6311     if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) {
6312       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
6313       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
6314     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
6315                Subtarget.hasStdExtF()) {
6316       SDValue FPConv =
6317           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
6318       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
6319     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
6320                isTypeLegal(Op0VT)) {
6321       // Custom-legalize bitcasts from fixed-length vector types to illegal
6322       // scalar types in order to improve codegen. Bitcast the vector to a
6323       // one-element vector type whose element type is the same as the result
6324       // type, and extract the first element.
6325       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6326       if (isTypeLegal(BVT)) {
6327         SDValue BVec = DAG.getBitcast(BVT, Op0);
6328         Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6329                                       DAG.getConstant(0, DL, XLenVT)));
6330       }
6331     }
6332     break;
6333   }
6334   case RISCVISD::GREV:
6335   case RISCVISD::GORC: {
6336     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6337            "Unexpected custom legalisation");
6338     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
6339     // This is similar to customLegalizeToWOp, except that we pass the second
6340     // operand (a TargetConstant) straight through: it is already of type
6341     // XLenVT.
6342     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
6343     SDValue NewOp0 =
6344         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6345     SDValue NewOp1 =
6346         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6347     SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
6348     // ReplaceNodeResults requires we maintain the same type for the return
6349     // value.
6350     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6351     break;
6352   }
6353   case RISCVISD::SHFL: {
6354     // There is no SHFLIW instruction, but we can just promote the operation.
6355     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6356            "Unexpected custom legalisation");
6357     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
6358     SDValue NewOp0 =
6359         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6360     SDValue NewOp1 =
6361         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6362     SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1);
6363     // ReplaceNodeResults requires we maintain the same type for the return
6364     // value.
6365     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6366     break;
6367   }
6368   case ISD::BSWAP:
6369   case ISD::BITREVERSE: {
6370     MVT VT = N->getSimpleValueType(0);
6371     MVT XLenVT = Subtarget.getXLenVT();
6372     assert((VT == MVT::i8 || VT == MVT::i16 ||
6373             (VT == MVT::i32 && Subtarget.is64Bit())) &&
6374            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
6375     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
6376     unsigned Imm = VT.getSizeInBits() - 1;
6377     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
6378     if (N->getOpcode() == ISD::BSWAP)
6379       Imm &= ~0x7U;
6380     unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV;
6381     SDValue GREVI =
6382         DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT));
6383     // ReplaceNodeResults requires we maintain the same type for the return
6384     // value.
6385     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI));
6386     break;
6387   }
6388   case ISD::FSHL:
6389   case ISD::FSHR: {
6390     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6391            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
6392     SDValue NewOp0 =
6393         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6394     SDValue NewOp1 =
6395         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6396     SDValue NewShAmt =
6397         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6398     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
6399     // Mask the shift amount to 5 bits to prevent accidentally setting bit 5.
6400     NewShAmt = DAG.getNode(ISD::AND, DL, MVT::i64, NewShAmt,
6401                            DAG.getConstant(0x1f, DL, MVT::i64));
6402     // fshl and fshr concatenate their operands in the same order. fsrw and fslw
6403     // instruction use different orders. fshl will return its first operand for
6404     // shift of zero, fshr will return its second operand. fsl and fsr both
6405     // return rs1 so the ISD nodes need to have different operand orders.
6406     // Shift amount is in rs2.
6407     unsigned Opc = RISCVISD::FSLW;
6408     if (N->getOpcode() == ISD::FSHR) {
6409       std::swap(NewOp0, NewOp1);
6410       Opc = RISCVISD::FSRW;
6411     }
6412     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewShAmt);
6413     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
6414     break;
6415   }
6416   case ISD::EXTRACT_VECTOR_ELT: {
6417     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
6418     // type is illegal (currently only vXi64 RV32).
6419     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
6420     // transferred to the destination register. We issue two of these from the
6421     // upper- and lower- halves of the SEW-bit vector element, slid down to the
6422     // first element.
6423     SDValue Vec = N->getOperand(0);
6424     SDValue Idx = N->getOperand(1);
6425 
6426     // The vector type hasn't been legalized yet so we can't issue target
6427     // specific nodes if it needs legalization.
6428     // FIXME: We would manually legalize if it's important.
6429     if (!isTypeLegal(Vec.getValueType()))
6430       return;
6431 
6432     MVT VecVT = Vec.getSimpleValueType();
6433 
6434     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
6435            VecVT.getVectorElementType() == MVT::i64 &&
6436            "Unexpected EXTRACT_VECTOR_ELT legalization");
6437 
6438     // If this is a fixed vector, we need to convert it to a scalable vector.
6439     MVT ContainerVT = VecVT;
6440     if (VecVT.isFixedLengthVector()) {
6441       ContainerVT = getContainerForFixedLengthVector(VecVT);
6442       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6443     }
6444 
6445     MVT XLenVT = Subtarget.getXLenVT();
6446 
6447     // Use a VL of 1 to avoid processing more elements than we need.
6448     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
6449     SDValue VL = DAG.getConstant(1, DL, XLenVT);
6450     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
6451 
6452     // Unless the index is known to be 0, we must slide the vector down to get
6453     // the desired element into index 0.
6454     if (!isNullConstant(Idx)) {
6455       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
6456                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
6457     }
6458 
6459     // Extract the lower XLEN bits of the correct vector element.
6460     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
6461 
6462     // To extract the upper XLEN bits of the vector element, shift the first
6463     // element right by 32 bits and re-extract the lower XLEN bits.
6464     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6465                                      DAG.getConstant(32, DL, XLenVT), VL);
6466     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
6467                                  ThirtyTwoV, Mask, VL);
6468 
6469     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
6470 
6471     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
6472     break;
6473   }
6474   case ISD::INTRINSIC_WO_CHAIN: {
6475     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
6476     switch (IntNo) {
6477     default:
6478       llvm_unreachable(
6479           "Don't know how to custom type legalize this intrinsic!");
6480     case Intrinsic::riscv_grev:
6481     case Intrinsic::riscv_gorc:
6482     case Intrinsic::riscv_bcompress:
6483     case Intrinsic::riscv_bdecompress:
6484     case Intrinsic::riscv_bfp: {
6485       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6486              "Unexpected custom legalisation");
6487       Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
6488       break;
6489     }
6490     case Intrinsic::riscv_fsl:
6491     case Intrinsic::riscv_fsr: {
6492       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6493              "Unexpected custom legalisation");
6494       SDValue NewOp1 =
6495           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6496       SDValue NewOp2 =
6497           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6498       SDValue NewOp3 =
6499           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3));
6500       unsigned Opc = getRISCVWOpcodeByIntr(IntNo);
6501       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2, NewOp3);
6502       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6503       break;
6504     }
6505     case Intrinsic::riscv_orc_b: {
6506       // Lower to the GORCI encoding for orc.b with the operand extended.
6507       SDValue NewOp =
6508           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6509       // If Zbp is enabled, use GORCIW which will sign extend the result.
6510       unsigned Opc =
6511           Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC;
6512       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp,
6513                                 DAG.getConstant(7, DL, MVT::i64));
6514       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6515       return;
6516     }
6517     case Intrinsic::riscv_shfl:
6518     case Intrinsic::riscv_unshfl: {
6519       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6520              "Unexpected custom legalisation");
6521       SDValue NewOp1 =
6522           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6523       SDValue NewOp2 =
6524           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6525       unsigned Opc =
6526           IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
6527       if (isa<ConstantSDNode>(N->getOperand(2))) {
6528         NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
6529                              DAG.getConstant(0xf, DL, MVT::i64));
6530         Opc =
6531             IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
6532       }
6533       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
6534       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6535       break;
6536     }
6537     case Intrinsic::riscv_vmv_x_s: {
6538       EVT VT = N->getValueType(0);
6539       MVT XLenVT = Subtarget.getXLenVT();
6540       if (VT.bitsLT(XLenVT)) {
6541         // Simple case just extract using vmv.x.s and truncate.
6542         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
6543                                       Subtarget.getXLenVT(), N->getOperand(1));
6544         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
6545         return;
6546       }
6547 
6548       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
6549              "Unexpected custom legalization");
6550 
6551       // We need to do the move in two steps.
6552       SDValue Vec = N->getOperand(1);
6553       MVT VecVT = Vec.getSimpleValueType();
6554 
6555       // First extract the lower XLEN bits of the element.
6556       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
6557 
6558       // To extract the upper XLEN bits of the vector element, shift the first
6559       // element right by 32 bits and re-extract the lower XLEN bits.
6560       SDValue VL = DAG.getConstant(1, DL, XLenVT);
6561       MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
6562       SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
6563       SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
6564                                        DAG.getConstant(32, DL, XLenVT), VL);
6565       SDValue LShr32 =
6566           DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
6567       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
6568 
6569       Results.push_back(
6570           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
6571       break;
6572     }
6573     }
6574     break;
6575   }
6576   case ISD::VECREDUCE_ADD:
6577   case ISD::VECREDUCE_AND:
6578   case ISD::VECREDUCE_OR:
6579   case ISD::VECREDUCE_XOR:
6580   case ISD::VECREDUCE_SMAX:
6581   case ISD::VECREDUCE_UMAX:
6582   case ISD::VECREDUCE_SMIN:
6583   case ISD::VECREDUCE_UMIN:
6584     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
6585       Results.push_back(V);
6586     break;
6587   case ISD::VP_REDUCE_ADD:
6588   case ISD::VP_REDUCE_AND:
6589   case ISD::VP_REDUCE_OR:
6590   case ISD::VP_REDUCE_XOR:
6591   case ISD::VP_REDUCE_SMAX:
6592   case ISD::VP_REDUCE_UMAX:
6593   case ISD::VP_REDUCE_SMIN:
6594   case ISD::VP_REDUCE_UMIN:
6595     if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
6596       Results.push_back(V);
6597     break;
6598   case ISD::FLT_ROUNDS_: {
6599     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
6600     SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
6601     Results.push_back(Res.getValue(0));
6602     Results.push_back(Res.getValue(1));
6603     break;
6604   }
6605   }
6606 }
6607 
6608 // A structure to hold one of the bit-manipulation patterns below. Together, a
6609 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
6610 //   (or (and (shl x, 1), 0xAAAAAAAA),
6611 //       (and (srl x, 1), 0x55555555))
6612 struct RISCVBitmanipPat {
6613   SDValue Op;
6614   unsigned ShAmt;
6615   bool IsSHL;
6616 
6617   bool formsPairWith(const RISCVBitmanipPat &Other) const {
6618     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
6619   }
6620 };
6621 
6622 // Matches patterns of the form
6623 //   (and (shl x, C2), (C1 << C2))
6624 //   (and (srl x, C2), C1)
6625 //   (shl (and x, C1), C2)
6626 //   (srl (and x, (C1 << C2)), C2)
6627 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
6628 // The expected masks for each shift amount are specified in BitmanipMasks where
6629 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
6630 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
6631 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
6632 // XLen is 64.
6633 static Optional<RISCVBitmanipPat>
6634 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
6635   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
6636          "Unexpected number of masks");
6637   Optional<uint64_t> Mask;
6638   // Optionally consume a mask around the shift operation.
6639   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
6640     Mask = Op.getConstantOperandVal(1);
6641     Op = Op.getOperand(0);
6642   }
6643   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
6644     return None;
6645   bool IsSHL = Op.getOpcode() == ISD::SHL;
6646 
6647   if (!isa<ConstantSDNode>(Op.getOperand(1)))
6648     return None;
6649   uint64_t ShAmt = Op.getConstantOperandVal(1);
6650 
6651   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
6652   if (ShAmt >= Width || !isPowerOf2_64(ShAmt))
6653     return None;
6654   // If we don't have enough masks for 64 bit, then we must be trying to
6655   // match SHFL so we're only allowed to shift 1/4 of the width.
6656   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
6657     return None;
6658 
6659   SDValue Src = Op.getOperand(0);
6660 
6661   // The expected mask is shifted left when the AND is found around SHL
6662   // patterns.
6663   //   ((x >> 1) & 0x55555555)
6664   //   ((x << 1) & 0xAAAAAAAA)
6665   bool SHLExpMask = IsSHL;
6666 
6667   if (!Mask) {
6668     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
6669     // the mask is all ones: consume that now.
6670     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
6671       Mask = Src.getConstantOperandVal(1);
6672       Src = Src.getOperand(0);
6673       // The expected mask is now in fact shifted left for SRL, so reverse the
6674       // decision.
6675       //   ((x & 0xAAAAAAAA) >> 1)
6676       //   ((x & 0x55555555) << 1)
6677       SHLExpMask = !SHLExpMask;
6678     } else {
6679       // Use a default shifted mask of all-ones if there's no AND, truncated
6680       // down to the expected width. This simplifies the logic later on.
6681       Mask = maskTrailingOnes<uint64_t>(Width);
6682       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
6683     }
6684   }
6685 
6686   unsigned MaskIdx = Log2_32(ShAmt);
6687   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
6688 
6689   if (SHLExpMask)
6690     ExpMask <<= ShAmt;
6691 
6692   if (Mask != ExpMask)
6693     return None;
6694 
6695   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
6696 }
6697 
6698 // Matches any of the following bit-manipulation patterns:
6699 //   (and (shl x, 1), (0x55555555 << 1))
6700 //   (and (srl x, 1), 0x55555555)
6701 //   (shl (and x, 0x55555555), 1)
6702 //   (srl (and x, (0x55555555 << 1)), 1)
6703 // where the shift amount and mask may vary thus:
6704 //   [1]  = 0x55555555 / 0xAAAAAAAA
6705 //   [2]  = 0x33333333 / 0xCCCCCCCC
6706 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
6707 //   [8]  = 0x00FF00FF / 0xFF00FF00
6708 //   [16] = 0x0000FFFF / 0xFFFFFFFF
6709 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
6710 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
6711   // These are the unshifted masks which we use to match bit-manipulation
6712   // patterns. They may be shifted left in certain circumstances.
6713   static const uint64_t BitmanipMasks[] = {
6714       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
6715       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
6716 
6717   return matchRISCVBitmanipPat(Op, BitmanipMasks);
6718 }
6719 
6720 // Match the following pattern as a GREVI(W) operation
6721 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
6722 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
6723                                const RISCVSubtarget &Subtarget) {
6724   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6725   EVT VT = Op.getValueType();
6726 
6727   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
6728     auto LHS = matchGREVIPat(Op.getOperand(0));
6729     auto RHS = matchGREVIPat(Op.getOperand(1));
6730     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
6731       SDLoc DL(Op);
6732       return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op,
6733                          DAG.getConstant(LHS->ShAmt, DL, VT));
6734     }
6735   }
6736   return SDValue();
6737 }
6738 
6739 // Matches any the following pattern as a GORCI(W) operation
6740 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
6741 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
6742 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
6743 // Note that with the variant of 3.,
6744 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
6745 // the inner pattern will first be matched as GREVI and then the outer
6746 // pattern will be matched to GORC via the first rule above.
6747 // 4.  (or (rotl/rotr x, bitwidth/2), x)
6748 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
6749                                const RISCVSubtarget &Subtarget) {
6750   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6751   EVT VT = Op.getValueType();
6752 
6753   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
6754     SDLoc DL(Op);
6755     SDValue Op0 = Op.getOperand(0);
6756     SDValue Op1 = Op.getOperand(1);
6757 
6758     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
6759       if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X &&
6760           isa<ConstantSDNode>(Reverse.getOperand(1)) &&
6761           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
6762         return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1));
6763       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
6764       if ((Reverse.getOpcode() == ISD::ROTL ||
6765            Reverse.getOpcode() == ISD::ROTR) &&
6766           Reverse.getOperand(0) == X &&
6767           isa<ConstantSDNode>(Reverse.getOperand(1))) {
6768         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
6769         if (RotAmt == (VT.getSizeInBits() / 2))
6770           return DAG.getNode(RISCVISD::GORC, DL, VT, X,
6771                              DAG.getConstant(RotAmt, DL, VT));
6772       }
6773       return SDValue();
6774     };
6775 
6776     // Check for either commutable permutation of (or (GREVI x, shamt), x)
6777     if (SDValue V = MatchOROfReverse(Op0, Op1))
6778       return V;
6779     if (SDValue V = MatchOROfReverse(Op1, Op0))
6780       return V;
6781 
6782     // OR is commutable so canonicalize its OR operand to the left
6783     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
6784       std::swap(Op0, Op1);
6785     if (Op0.getOpcode() != ISD::OR)
6786       return SDValue();
6787     SDValue OrOp0 = Op0.getOperand(0);
6788     SDValue OrOp1 = Op0.getOperand(1);
6789     auto LHS = matchGREVIPat(OrOp0);
6790     // OR is commutable so swap the operands and try again: x might have been
6791     // on the left
6792     if (!LHS) {
6793       std::swap(OrOp0, OrOp1);
6794       LHS = matchGREVIPat(OrOp0);
6795     }
6796     auto RHS = matchGREVIPat(Op1);
6797     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
6798       return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op,
6799                          DAG.getConstant(LHS->ShAmt, DL, VT));
6800     }
6801   }
6802   return SDValue();
6803 }
6804 
6805 // Matches any of the following bit-manipulation patterns:
6806 //   (and (shl x, 1), (0x22222222 << 1))
6807 //   (and (srl x, 1), 0x22222222)
6808 //   (shl (and x, 0x22222222), 1)
6809 //   (srl (and x, (0x22222222 << 1)), 1)
6810 // where the shift amount and mask may vary thus:
6811 //   [1]  = 0x22222222 / 0x44444444
6812 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
6813 //   [4]  = 0x00F000F0 / 0x0F000F00
6814 //   [8]  = 0x0000FF00 / 0x00FF0000
6815 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
6816 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
6817   // These are the unshifted masks which we use to match bit-manipulation
6818   // patterns. They may be shifted left in certain circumstances.
6819   static const uint64_t BitmanipMasks[] = {
6820       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
6821       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
6822 
6823   return matchRISCVBitmanipPat(Op, BitmanipMasks);
6824 }
6825 
6826 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
6827 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
6828                                const RISCVSubtarget &Subtarget) {
6829   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6830   EVT VT = Op.getValueType();
6831 
6832   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
6833     return SDValue();
6834 
6835   SDValue Op0 = Op.getOperand(0);
6836   SDValue Op1 = Op.getOperand(1);
6837 
6838   // Or is commutable so canonicalize the second OR to the LHS.
6839   if (Op0.getOpcode() != ISD::OR)
6840     std::swap(Op0, Op1);
6841   if (Op0.getOpcode() != ISD::OR)
6842     return SDValue();
6843 
6844   // We found an inner OR, so our operands are the operands of the inner OR
6845   // and the other operand of the outer OR.
6846   SDValue A = Op0.getOperand(0);
6847   SDValue B = Op0.getOperand(1);
6848   SDValue C = Op1;
6849 
6850   auto Match1 = matchSHFLPat(A);
6851   auto Match2 = matchSHFLPat(B);
6852 
6853   // If neither matched, we failed.
6854   if (!Match1 && !Match2)
6855     return SDValue();
6856 
6857   // We had at least one match. if one failed, try the remaining C operand.
6858   if (!Match1) {
6859     std::swap(A, C);
6860     Match1 = matchSHFLPat(A);
6861     if (!Match1)
6862       return SDValue();
6863   } else if (!Match2) {
6864     std::swap(B, C);
6865     Match2 = matchSHFLPat(B);
6866     if (!Match2)
6867       return SDValue();
6868   }
6869   assert(Match1 && Match2);
6870 
6871   // Make sure our matches pair up.
6872   if (!Match1->formsPairWith(*Match2))
6873     return SDValue();
6874 
6875   // All the remains is to make sure C is an AND with the same input, that masks
6876   // out the bits that are being shuffled.
6877   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
6878       C.getOperand(0) != Match1->Op)
6879     return SDValue();
6880 
6881   uint64_t Mask = C.getConstantOperandVal(1);
6882 
6883   static const uint64_t BitmanipMasks[] = {
6884       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
6885       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
6886   };
6887 
6888   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
6889   unsigned MaskIdx = Log2_32(Match1->ShAmt);
6890   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
6891 
6892   if (Mask != ExpMask)
6893     return SDValue();
6894 
6895   SDLoc DL(Op);
6896   return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op,
6897                      DAG.getConstant(Match1->ShAmt, DL, VT));
6898 }
6899 
6900 // Optimize (add (shl x, c0), (shl y, c1)) ->
6901 //          (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
6902 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
6903                                   const RISCVSubtarget &Subtarget) {
6904   // Perform this optimization only in the zba extension.
6905   if (!Subtarget.hasStdExtZba())
6906     return SDValue();
6907 
6908   // Skip for vector types and larger types.
6909   EVT VT = N->getValueType(0);
6910   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
6911     return SDValue();
6912 
6913   // The two operand nodes must be SHL and have no other use.
6914   SDValue N0 = N->getOperand(0);
6915   SDValue N1 = N->getOperand(1);
6916   if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
6917       !N0->hasOneUse() || !N1->hasOneUse())
6918     return SDValue();
6919 
6920   // Check c0 and c1.
6921   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
6922   auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
6923   if (!N0C || !N1C)
6924     return SDValue();
6925   int64_t C0 = N0C->getSExtValue();
6926   int64_t C1 = N1C->getSExtValue();
6927   if (C0 <= 0 || C1 <= 0)
6928     return SDValue();
6929 
6930   // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
6931   int64_t Bits = std::min(C0, C1);
6932   int64_t Diff = std::abs(C0 - C1);
6933   if (Diff != 1 && Diff != 2 && Diff != 3)
6934     return SDValue();
6935 
6936   // Build nodes.
6937   SDLoc DL(N);
6938   SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
6939   SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
6940   SDValue NA0 =
6941       DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
6942   SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
6943   return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
6944 }
6945 
6946 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
6947 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
6948 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
6949 // not undo itself, but they are redundant.
6950 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
6951   SDValue Src = N->getOperand(0);
6952 
6953   if (Src.getOpcode() != N->getOpcode())
6954     return SDValue();
6955 
6956   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
6957       !isa<ConstantSDNode>(Src.getOperand(1)))
6958     return SDValue();
6959 
6960   unsigned ShAmt1 = N->getConstantOperandVal(1);
6961   unsigned ShAmt2 = Src.getConstantOperandVal(1);
6962   Src = Src.getOperand(0);
6963 
6964   unsigned CombinedShAmt;
6965   if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW)
6966     CombinedShAmt = ShAmt1 | ShAmt2;
6967   else
6968     CombinedShAmt = ShAmt1 ^ ShAmt2;
6969 
6970   if (CombinedShAmt == 0)
6971     return Src;
6972 
6973   SDLoc DL(N);
6974   return DAG.getNode(
6975       N->getOpcode(), DL, N->getValueType(0), Src,
6976       DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
6977 }
6978 
6979 // Combine a constant select operand into its use:
6980 //
6981 // (and (select cond, -1, c), x)
6982 //   -> (select cond, x, (and x, c))  [AllOnes=1]
6983 // (or  (select cond, 0, c), x)
6984 //   -> (select cond, x, (or x, c))  [AllOnes=0]
6985 // (xor (select cond, 0, c), x)
6986 //   -> (select cond, x, (xor x, c))  [AllOnes=0]
6987 // (add (select cond, 0, c), x)
6988 //   -> (select cond, x, (add x, c))  [AllOnes=0]
6989 // (sub x, (select cond, 0, c))
6990 //   -> (select cond, x, (sub x, c))  [AllOnes=0]
6991 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
6992                                    SelectionDAG &DAG, bool AllOnes) {
6993   EVT VT = N->getValueType(0);
6994 
6995   // Skip vectors.
6996   if (VT.isVector())
6997     return SDValue();
6998 
6999   if ((Slct.getOpcode() != ISD::SELECT &&
7000        Slct.getOpcode() != RISCVISD::SELECT_CC) ||
7001       !Slct.hasOneUse())
7002     return SDValue();
7003 
7004   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
7005     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
7006   };
7007 
7008   bool SwapSelectOps;
7009   unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
7010   SDValue TrueVal = Slct.getOperand(1 + OpOffset);
7011   SDValue FalseVal = Slct.getOperand(2 + OpOffset);
7012   SDValue NonConstantVal;
7013   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
7014     SwapSelectOps = false;
7015     NonConstantVal = FalseVal;
7016   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
7017     SwapSelectOps = true;
7018     NonConstantVal = TrueVal;
7019   } else
7020     return SDValue();
7021 
7022   // Slct is now know to be the desired identity constant when CC is true.
7023   TrueVal = OtherOp;
7024   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
7025   // Unless SwapSelectOps says the condition should be false.
7026   if (SwapSelectOps)
7027     std::swap(TrueVal, FalseVal);
7028 
7029   if (Slct.getOpcode() == RISCVISD::SELECT_CC)
7030     return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
7031                        {Slct.getOperand(0), Slct.getOperand(1),
7032                         Slct.getOperand(2), TrueVal, FalseVal});
7033 
7034   return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
7035                      {Slct.getOperand(0), TrueVal, FalseVal});
7036 }
7037 
7038 // Attempt combineSelectAndUse on each operand of a commutative operator N.
7039 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
7040                                               bool AllOnes) {
7041   SDValue N0 = N->getOperand(0);
7042   SDValue N1 = N->getOperand(1);
7043   if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes))
7044     return Result;
7045   if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes))
7046     return Result;
7047   return SDValue();
7048 }
7049 
7050 // Transform (add (mul x, c0), c1) ->
7051 //           (add (mul (add x, c1/c0), c0), c1%c0).
7052 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
7053 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
7054 // to an infinite loop in DAGCombine if transformed.
7055 // Or transform (add (mul x, c0), c1) ->
7056 //              (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
7057 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
7058 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
7059 // lead to an infinite loop in DAGCombine if transformed.
7060 // Or transform (add (mul x, c0), c1) ->
7061 //              (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
7062 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
7063 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
7064 // lead to an infinite loop in DAGCombine if transformed.
7065 // Or transform (add (mul x, c0), c1) ->
7066 //              (mul (add x, c1/c0), c0).
7067 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
7068 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
7069                                      const RISCVSubtarget &Subtarget) {
7070   // Skip for vector types and larger types.
7071   EVT VT = N->getValueType(0);
7072   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
7073     return SDValue();
7074   // The first operand node must be a MUL and has no other use.
7075   SDValue N0 = N->getOperand(0);
7076   if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
7077     return SDValue();
7078   // Check if c0 and c1 match above conditions.
7079   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
7080   auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
7081   if (!N0C || !N1C)
7082     return SDValue();
7083   int64_t C0 = N0C->getSExtValue();
7084   int64_t C1 = N1C->getSExtValue();
7085   int64_t CA, CB;
7086   if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
7087     return SDValue();
7088   // Search for proper CA (non-zero) and CB that both are simm12.
7089   if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
7090       !isInt<12>(C0 * (C1 / C0))) {
7091     CA = C1 / C0;
7092     CB = C1 % C0;
7093   } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
7094              isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
7095     CA = C1 / C0 + 1;
7096     CB = C1 % C0 - C0;
7097   } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
7098              isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
7099     CA = C1 / C0 - 1;
7100     CB = C1 % C0 + C0;
7101   } else
7102     return SDValue();
7103   // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
7104   SDLoc DL(N);
7105   SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
7106                              DAG.getConstant(CA, DL, VT));
7107   SDValue New1 =
7108       DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
7109   return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
7110 }
7111 
7112 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
7113                                  const RISCVSubtarget &Subtarget) {
7114   if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
7115     return V;
7116   if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
7117     return V;
7118   // fold (add (select lhs, rhs, cc, 0, y), x) ->
7119   //      (select lhs, rhs, cc, x, (add x, y))
7120   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7121 }
7122 
7123 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
7124   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
7125   //      (select lhs, rhs, cc, x, (sub x, y))
7126   SDValue N0 = N->getOperand(0);
7127   SDValue N1 = N->getOperand(1);
7128   return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
7129 }
7130 
7131 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) {
7132   // fold (and (select lhs, rhs, cc, -1, y), x) ->
7133   //      (select lhs, rhs, cc, x, (and x, y))
7134   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
7135 }
7136 
7137 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
7138                                 const RISCVSubtarget &Subtarget) {
7139   if (Subtarget.hasStdExtZbp()) {
7140     if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
7141       return GREV;
7142     if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget))
7143       return GORC;
7144     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget))
7145       return SHFL;
7146   }
7147 
7148   // fold (or (select cond, 0, y), x) ->
7149   //      (select cond, x, (or x, y))
7150   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7151 }
7152 
7153 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
7154   // fold (xor (select cond, 0, y), x) ->
7155   //      (select cond, x, (xor x, y))
7156   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7157 }
7158 
7159 // Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
7160 // has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free
7161 // by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be
7162 // removed during type legalization leaving an ADD/SUB/MUL use that won't use
7163 // ADDW/SUBW/MULW.
7164 static SDValue performANY_EXTENDCombine(SDNode *N,
7165                                         TargetLowering::DAGCombinerInfo &DCI,
7166                                         const RISCVSubtarget &Subtarget) {
7167   if (!Subtarget.is64Bit())
7168     return SDValue();
7169 
7170   SelectionDAG &DAG = DCI.DAG;
7171 
7172   SDValue Src = N->getOperand(0);
7173   EVT VT = N->getValueType(0);
7174   if (VT != MVT::i64 || Src.getValueType() != MVT::i32)
7175     return SDValue();
7176 
7177   // The opcode must be one that can implicitly sign_extend.
7178   // FIXME: Additional opcodes.
7179   switch (Src.getOpcode()) {
7180   default:
7181     return SDValue();
7182   case ISD::MUL:
7183     if (!Subtarget.hasStdExtM())
7184       return SDValue();
7185     LLVM_FALLTHROUGH;
7186   case ISD::ADD:
7187   case ISD::SUB:
7188     break;
7189   }
7190 
7191   // Only handle cases where the result is used by a CopyToReg. That likely
7192   // means the value is a liveout of the basic block. This helps prevent
7193   // infinite combine loops like PR51206.
7194   if (none_of(N->uses(),
7195               [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; }))
7196     return SDValue();
7197 
7198   SmallVector<SDNode *, 4> SetCCs;
7199   for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
7200                             UE = Src.getNode()->use_end();
7201        UI != UE; ++UI) {
7202     SDNode *User = *UI;
7203     if (User == N)
7204       continue;
7205     if (UI.getUse().getResNo() != Src.getResNo())
7206       continue;
7207     // All i32 setccs are legalized by sign extending operands.
7208     if (User->getOpcode() == ISD::SETCC) {
7209       SetCCs.push_back(User);
7210       continue;
7211     }
7212     // We don't know if we can extend this user.
7213     break;
7214   }
7215 
7216   // If we don't have any SetCCs, this isn't worthwhile.
7217   if (SetCCs.empty())
7218     return SDValue();
7219 
7220   SDLoc DL(N);
7221   SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src);
7222   DCI.CombineTo(N, SExt);
7223 
7224   // Promote all the setccs.
7225   for (SDNode *SetCC : SetCCs) {
7226     SmallVector<SDValue, 4> Ops;
7227 
7228     for (unsigned j = 0; j != 2; ++j) {
7229       SDValue SOp = SetCC->getOperand(j);
7230       if (SOp == Src)
7231         Ops.push_back(SExt);
7232       else
7233         Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp));
7234     }
7235 
7236     Ops.push_back(SetCC->getOperand(2));
7237     DCI.CombineTo(SetCC,
7238                   DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7239   }
7240   return SDValue(N, 0);
7241 }
7242 
7243 // Try to form VWMUL or VWMULU.
7244 // FIXME: Support VWMULSU.
7245 static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
7246                                     SelectionDAG &DAG) {
7247   assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
7248   bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
7249   bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
7250   if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
7251     return SDValue();
7252 
7253   SDValue Mask = N->getOperand(2);
7254   SDValue VL = N->getOperand(3);
7255 
7256   // Make sure the mask and VL match.
7257   if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)
7258     return SDValue();
7259 
7260   MVT VT = N->getSimpleValueType(0);
7261 
7262   // Determine the narrow size for a widening multiply.
7263   unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
7264   MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
7265                                   VT.getVectorElementCount());
7266 
7267   SDLoc DL(N);
7268 
7269   // See if the other operand is the same opcode.
7270   if (Op0.getOpcode() == Op1.getOpcode()) {
7271     if (!Op1.hasOneUse())
7272       return SDValue();
7273 
7274     // Make sure the mask and VL match.
7275     if (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
7276       return SDValue();
7277 
7278     Op1 = Op1.getOperand(0);
7279   } else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
7280     // The operand is a splat of a scalar.
7281 
7282     // The VL must be the same.
7283     if (Op1.getOperand(1) != VL)
7284       return SDValue();
7285 
7286     // Get the scalar value.
7287     Op1 = Op1.getOperand(0);
7288 
7289     // See if have enough sign bits or zero bits in the scalar to use a
7290     // widening multiply by splatting to smaller element size.
7291     unsigned EltBits = VT.getScalarSizeInBits();
7292     unsigned ScalarBits = Op1.getValueSizeInBits();
7293     // Make sure we're getting all element bits from the scalar register.
7294     // FIXME: Support implicit sign extension of vmv.v.x?
7295     if (ScalarBits < EltBits)
7296       return SDValue();
7297 
7298     if (IsSignExt) {
7299       if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
7300         return SDValue();
7301     } else {
7302       APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
7303       if (!DAG.MaskedValueIsZero(Op1, Mask))
7304         return SDValue();
7305     }
7306 
7307     Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL);
7308   } else
7309     return SDValue();
7310 
7311   Op0 = Op0.getOperand(0);
7312 
7313   // Re-introduce narrower extends if needed.
7314   unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
7315   if (Op0.getValueType() != NarrowVT)
7316     Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
7317   if (Op1.getValueType() != NarrowVT)
7318     Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
7319 
7320   unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
7321   return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
7322 }
7323 
7324 static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
7325   switch (Op.getOpcode()) {
7326   case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
7327   case ISD::FTRUNC:     return RISCVFPRndMode::RTZ;
7328   case ISD::FFLOOR:     return RISCVFPRndMode::RDN;
7329   case ISD::FCEIL:      return RISCVFPRndMode::RUP;
7330   case ISD::FROUND:     return RISCVFPRndMode::RMM;
7331   }
7332 
7333   return RISCVFPRndMode::Invalid;
7334 }
7335 
7336 // Fold
7337 //   (fp_to_int (froundeven X)) -> fcvt X, rne
7338 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
7339 //   (fp_to_int (ffloor X))     -> fcvt X, rdn
7340 //   (fp_to_int (fceil X))      -> fcvt X, rup
7341 //   (fp_to_int (fround X))     -> fcvt X, rmm
7342 static SDValue performFP_TO_INTCombine(SDNode *N,
7343                                        TargetLowering::DAGCombinerInfo &DCI,
7344                                        const RISCVSubtarget &Subtarget) {
7345   SelectionDAG &DAG = DCI.DAG;
7346   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7347   MVT XLenVT = Subtarget.getXLenVT();
7348 
7349   // Only handle XLen or i32 types. Other types narrower than XLen will
7350   // eventually be legalized to XLenVT.
7351   EVT VT = N->getValueType(0);
7352   if (VT != MVT::i32 && VT != XLenVT)
7353     return SDValue();
7354 
7355   SDValue Src = N->getOperand(0);
7356 
7357   // Ensure the FP type is also legal.
7358   if (!TLI.isTypeLegal(Src.getValueType()))
7359     return SDValue();
7360 
7361   // Don't do this for f16 with Zfhmin and not Zfh.
7362   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
7363     return SDValue();
7364 
7365   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
7366   if (FRM == RISCVFPRndMode::Invalid)
7367     return SDValue();
7368 
7369   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
7370 
7371   unsigned Opc;
7372   if (VT == XLenVT)
7373     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
7374   else
7375     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
7376 
7377   SDLoc DL(N);
7378   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
7379                                 DAG.getTargetConstant(FRM, DL, XLenVT));
7380   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
7381 }
7382 
7383 // Fold
7384 //   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
7385 //   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))
7386 //   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))
7387 //   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))
7388 //   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))
7389 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
7390                                        TargetLowering::DAGCombinerInfo &DCI,
7391                                        const RISCVSubtarget &Subtarget) {
7392   SelectionDAG &DAG = DCI.DAG;
7393   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7394   MVT XLenVT = Subtarget.getXLenVT();
7395 
7396   // Only handle XLen types. Other types narrower than XLen will eventually be
7397   // legalized to XLenVT.
7398   EVT DstVT = N->getValueType(0);
7399   if (DstVT != XLenVT)
7400     return SDValue();
7401 
7402   SDValue Src = N->getOperand(0);
7403 
7404   // Ensure the FP type is also legal.
7405   if (!TLI.isTypeLegal(Src.getValueType()))
7406     return SDValue();
7407 
7408   // Don't do this for f16 with Zfhmin and not Zfh.
7409   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
7410     return SDValue();
7411 
7412   EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7413 
7414   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
7415   if (FRM == RISCVFPRndMode::Invalid)
7416     return SDValue();
7417 
7418   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
7419 
7420   unsigned Opc;
7421   if (SatVT == DstVT)
7422     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
7423   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
7424     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
7425   else
7426     return SDValue();
7427   // FIXME: Support other SatVTs by clamping before or after the conversion.
7428 
7429   Src = Src.getOperand(0);
7430 
7431   SDLoc DL(N);
7432   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
7433                                 DAG.getTargetConstant(FRM, DL, XLenVT));
7434 
7435   // RISCV FP-to-int conversions saturate to the destination register size, but
7436   // don't produce 0 for nan.
7437   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
7438   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
7439 }
7440 
7441 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
7442                                                DAGCombinerInfo &DCI) const {
7443   SelectionDAG &DAG = DCI.DAG;
7444 
7445   // Helper to call SimplifyDemandedBits on an operand of N where only some low
7446   // bits are demanded. N will be added to the Worklist if it was not deleted.
7447   // Caller should return SDValue(N, 0) if this returns true.
7448   auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
7449     SDValue Op = N->getOperand(OpNo);
7450     APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
7451     if (!SimplifyDemandedBits(Op, Mask, DCI))
7452       return false;
7453 
7454     if (N->getOpcode() != ISD::DELETED_NODE)
7455       DCI.AddToWorklist(N);
7456     return true;
7457   };
7458 
7459   switch (N->getOpcode()) {
7460   default:
7461     break;
7462   case RISCVISD::SplitF64: {
7463     SDValue Op0 = N->getOperand(0);
7464     // If the input to SplitF64 is just BuildPairF64 then the operation is
7465     // redundant. Instead, use BuildPairF64's operands directly.
7466     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
7467       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
7468 
7469     SDLoc DL(N);
7470 
7471     // It's cheaper to materialise two 32-bit integers than to load a double
7472     // from the constant pool and transfer it to integer registers through the
7473     // stack.
7474     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
7475       APInt V = C->getValueAPF().bitcastToAPInt();
7476       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
7477       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
7478       return DCI.CombineTo(N, Lo, Hi);
7479     }
7480 
7481     // This is a target-specific version of a DAGCombine performed in
7482     // DAGCombiner::visitBITCAST. It performs the equivalent of:
7483     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7484     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7485     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
7486         !Op0.getNode()->hasOneUse())
7487       break;
7488     SDValue NewSplitF64 =
7489         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
7490                     Op0.getOperand(0));
7491     SDValue Lo = NewSplitF64.getValue(0);
7492     SDValue Hi = NewSplitF64.getValue(1);
7493     APInt SignBit = APInt::getSignMask(32);
7494     if (Op0.getOpcode() == ISD::FNEG) {
7495       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
7496                                   DAG.getConstant(SignBit, DL, MVT::i32));
7497       return DCI.CombineTo(N, Lo, NewHi);
7498     }
7499     assert(Op0.getOpcode() == ISD::FABS);
7500     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
7501                                 DAG.getConstant(~SignBit, DL, MVT::i32));
7502     return DCI.CombineTo(N, Lo, NewHi);
7503   }
7504   case RISCVISD::SLLW:
7505   case RISCVISD::SRAW:
7506   case RISCVISD::SRLW:
7507   case RISCVISD::ROLW:
7508   case RISCVISD::RORW: {
7509     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
7510     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7511         SimplifyDemandedLowBitsHelper(1, 5))
7512       return SDValue(N, 0);
7513     break;
7514   }
7515   case RISCVISD::CLZW:
7516   case RISCVISD::CTZW: {
7517     // Only the lower 32 bits of the first operand are read
7518     if (SimplifyDemandedLowBitsHelper(0, 32))
7519       return SDValue(N, 0);
7520     break;
7521   }
7522   case RISCVISD::GREV:
7523   case RISCVISD::GORC: {
7524     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
7525     unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
7526     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
7527     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth)))
7528       return SDValue(N, 0);
7529 
7530     return combineGREVI_GORCI(N, DAG);
7531   }
7532   case RISCVISD::GREVW:
7533   case RISCVISD::GORCW: {
7534     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
7535     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7536         SimplifyDemandedLowBitsHelper(1, 5))
7537       return SDValue(N, 0);
7538 
7539     return combineGREVI_GORCI(N, DAG);
7540   }
7541   case RISCVISD::SHFL:
7542   case RISCVISD::UNSHFL: {
7543     // Only the lower log2(Bitwidth)-1 bits of the the shift amount are read.
7544     unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
7545     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
7546     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) - 1))
7547       return SDValue(N, 0);
7548 
7549     break;
7550   }
7551   case RISCVISD::SHFLW:
7552   case RISCVISD::UNSHFLW: {
7553     // Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
7554     SDValue LHS = N->getOperand(0);
7555     SDValue RHS = N->getOperand(1);
7556     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
7557     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
7558     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7559         SimplifyDemandedLowBitsHelper(1, 4))
7560       return SDValue(N, 0);
7561 
7562     break;
7563   }
7564   case RISCVISD::BCOMPRESSW:
7565   case RISCVISD::BDECOMPRESSW: {
7566     // Only the lower 32 bits of LHS and RHS are read.
7567     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7568         SimplifyDemandedLowBitsHelper(1, 32))
7569       return SDValue(N, 0);
7570 
7571     break;
7572   }
7573   case RISCVISD::FMV_X_ANYEXTH:
7574   case RISCVISD::FMV_X_ANYEXTW_RV64: {
7575     SDLoc DL(N);
7576     SDValue Op0 = N->getOperand(0);
7577     MVT VT = N->getSimpleValueType(0);
7578     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
7579     // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
7580     // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
7581     if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
7582          Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
7583         (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
7584          Op0->getOpcode() == RISCVISD::FMV_H_X)) {
7585       assert(Op0.getOperand(0).getValueType() == VT &&
7586              "Unexpected value type!");
7587       return Op0.getOperand(0);
7588     }
7589 
7590     // This is a target-specific version of a DAGCombine performed in
7591     // DAGCombiner::visitBITCAST. It performs the equivalent of:
7592     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7593     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7594     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
7595         !Op0.getNode()->hasOneUse())
7596       break;
7597     SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
7598     unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
7599     APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits());
7600     if (Op0.getOpcode() == ISD::FNEG)
7601       return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
7602                          DAG.getConstant(SignBit, DL, VT));
7603 
7604     assert(Op0.getOpcode() == ISD::FABS);
7605     return DAG.getNode(ISD::AND, DL, VT, NewFMV,
7606                        DAG.getConstant(~SignBit, DL, VT));
7607   }
7608   case ISD::ADD:
7609     return performADDCombine(N, DAG, Subtarget);
7610   case ISD::SUB:
7611     return performSUBCombine(N, DAG);
7612   case ISD::AND:
7613     return performANDCombine(N, DAG);
7614   case ISD::OR:
7615     return performORCombine(N, DAG, Subtarget);
7616   case ISD::XOR:
7617     return performXORCombine(N, DAG);
7618   case ISD::ANY_EXTEND:
7619     return performANY_EXTENDCombine(N, DCI, Subtarget);
7620   case ISD::ZERO_EXTEND:
7621     // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
7622     // type legalization. This is safe because fp_to_uint produces poison if
7623     // it overflows.
7624     if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
7625       SDValue Src = N->getOperand(0);
7626       if (Src.getOpcode() == ISD::FP_TO_UINT &&
7627           isTypeLegal(Src.getOperand(0).getValueType()))
7628         return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
7629                            Src.getOperand(0));
7630       if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
7631           isTypeLegal(Src.getOperand(1).getValueType())) {
7632         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
7633         SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
7634                                   Src.getOperand(0), Src.getOperand(1));
7635         DCI.CombineTo(N, Res);
7636         DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
7637         DCI.recursivelyDeleteUnusedNodes(Src.getNode());
7638         return SDValue(N, 0); // Return N so it doesn't get rechecked.
7639       }
7640     }
7641     return SDValue();
7642   case RISCVISD::SELECT_CC: {
7643     // Transform
7644     SDValue LHS = N->getOperand(0);
7645     SDValue RHS = N->getOperand(1);
7646     SDValue TrueV = N->getOperand(3);
7647     SDValue FalseV = N->getOperand(4);
7648 
7649     // If the True and False values are the same, we don't need a select_cc.
7650     if (TrueV == FalseV)
7651       return TrueV;
7652 
7653     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
7654     if (!ISD::isIntEqualitySetCC(CCVal))
7655       break;
7656 
7657     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
7658     //      (select_cc X, Y, lt, trueV, falseV)
7659     // Sometimes the setcc is introduced after select_cc has been formed.
7660     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
7661         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
7662       // If we're looking for eq 0 instead of ne 0, we need to invert the
7663       // condition.
7664       bool Invert = CCVal == ISD::SETEQ;
7665       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
7666       if (Invert)
7667         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7668 
7669       SDLoc DL(N);
7670       RHS = LHS.getOperand(1);
7671       LHS = LHS.getOperand(0);
7672       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7673 
7674       SDValue TargetCC = DAG.getCondCode(CCVal);
7675       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
7676                          {LHS, RHS, TargetCC, TrueV, FalseV});
7677     }
7678 
7679     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
7680     //      (select_cc X, Y, eq/ne, trueV, falseV)
7681     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
7682       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
7683                          {LHS.getOperand(0), LHS.getOperand(1),
7684                           N->getOperand(2), TrueV, FalseV});
7685     // (select_cc X, 1, setne, trueV, falseV) ->
7686     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
7687     // This can occur when legalizing some floating point comparisons.
7688     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
7689     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
7690       SDLoc DL(N);
7691       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7692       SDValue TargetCC = DAG.getCondCode(CCVal);
7693       RHS = DAG.getConstant(0, DL, LHS.getValueType());
7694       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
7695                          {LHS, RHS, TargetCC, TrueV, FalseV});
7696     }
7697 
7698     break;
7699   }
7700   case RISCVISD::BR_CC: {
7701     SDValue LHS = N->getOperand(1);
7702     SDValue RHS = N->getOperand(2);
7703     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
7704     if (!ISD::isIntEqualitySetCC(CCVal))
7705       break;
7706 
7707     // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
7708     //      (br_cc X, Y, lt, dest)
7709     // Sometimes the setcc is introduced after br_cc has been formed.
7710     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
7711         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
7712       // If we're looking for eq 0 instead of ne 0, we need to invert the
7713       // condition.
7714       bool Invert = CCVal == ISD::SETEQ;
7715       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
7716       if (Invert)
7717         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7718 
7719       SDLoc DL(N);
7720       RHS = LHS.getOperand(1);
7721       LHS = LHS.getOperand(0);
7722       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7723 
7724       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
7725                          N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
7726                          N->getOperand(4));
7727     }
7728 
7729     // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
7730     //      (br_cc X, Y, eq/ne, trueV, falseV)
7731     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
7732       return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
7733                          N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
7734                          N->getOperand(3), N->getOperand(4));
7735 
7736     // (br_cc X, 1, setne, br_cc) ->
7737     // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
7738     // This can occur when legalizing some floating point comparisons.
7739     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
7740     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
7741       SDLoc DL(N);
7742       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7743       SDValue TargetCC = DAG.getCondCode(CCVal);
7744       RHS = DAG.getConstant(0, DL, LHS.getValueType());
7745       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
7746                          N->getOperand(0), LHS, RHS, TargetCC,
7747                          N->getOperand(4));
7748     }
7749     break;
7750   }
7751   case ISD::FP_TO_SINT:
7752   case ISD::FP_TO_UINT:
7753     return performFP_TO_INTCombine(N, DCI, Subtarget);
7754   case ISD::FP_TO_SINT_SAT:
7755   case ISD::FP_TO_UINT_SAT:
7756     return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
7757   case ISD::FCOPYSIGN: {
7758     EVT VT = N->getValueType(0);
7759     if (!VT.isVector())
7760       break;
7761     // There is a form of VFSGNJ which injects the negated sign of its second
7762     // operand. Try and bubble any FNEG up after the extend/round to produce
7763     // this optimized pattern. Avoid modifying cases where FP_ROUND and
7764     // TRUNC=1.
7765     SDValue In2 = N->getOperand(1);
7766     // Avoid cases where the extend/round has multiple uses, as duplicating
7767     // those is typically more expensive than removing a fneg.
7768     if (!In2.hasOneUse())
7769       break;
7770     if (In2.getOpcode() != ISD::FP_EXTEND &&
7771         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
7772       break;
7773     In2 = In2.getOperand(0);
7774     if (In2.getOpcode() != ISD::FNEG)
7775       break;
7776     SDLoc DL(N);
7777     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
7778     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
7779                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
7780   }
7781   case ISD::MGATHER:
7782   case ISD::MSCATTER:
7783   case ISD::VP_GATHER:
7784   case ISD::VP_SCATTER: {
7785     if (!DCI.isBeforeLegalize())
7786       break;
7787     SDValue Index, ScaleOp;
7788     bool IsIndexScaled = false;
7789     bool IsIndexSigned = false;
7790     if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
7791       Index = VPGSN->getIndex();
7792       ScaleOp = VPGSN->getScale();
7793       IsIndexScaled = VPGSN->isIndexScaled();
7794       IsIndexSigned = VPGSN->isIndexSigned();
7795     } else {
7796       const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
7797       Index = MGSN->getIndex();
7798       ScaleOp = MGSN->getScale();
7799       IsIndexScaled = MGSN->isIndexScaled();
7800       IsIndexSigned = MGSN->isIndexSigned();
7801     }
7802     EVT IndexVT = Index.getValueType();
7803     MVT XLenVT = Subtarget.getXLenVT();
7804     // RISCV indexed loads only support the "unsigned unscaled" addressing
7805     // mode, so anything else must be manually legalized.
7806     bool NeedsIdxLegalization =
7807         IsIndexScaled ||
7808         (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
7809     if (!NeedsIdxLegalization)
7810       break;
7811 
7812     SDLoc DL(N);
7813 
7814     // Any index legalization should first promote to XLenVT, so we don't lose
7815     // bits when scaling. This may create an illegal index type so we let
7816     // LLVM's legalization take care of the splitting.
7817     // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
7818     if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
7819       IndexVT = IndexVT.changeVectorElementType(XLenVT);
7820       Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
7821                           DL, IndexVT, Index);
7822     }
7823 
7824     unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
7825     if (IsIndexScaled && Scale != 1) {
7826       // Manually scale the indices by the element size.
7827       // TODO: Sanitize the scale operand here?
7828       // TODO: For VP nodes, should we use VP_SHL here?
7829       assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
7830       SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
7831       Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
7832     }
7833 
7834     ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
7835     if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
7836       return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
7837                              {VPGN->getChain(), VPGN->getBasePtr(), Index,
7838                               VPGN->getScale(), VPGN->getMask(),
7839                               VPGN->getVectorLength()},
7840                              VPGN->getMemOperand(), NewIndexTy);
7841     if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
7842       return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
7843                               {VPSN->getChain(), VPSN->getValue(),
7844                                VPSN->getBasePtr(), Index, VPSN->getScale(),
7845                                VPSN->getMask(), VPSN->getVectorLength()},
7846                               VPSN->getMemOperand(), NewIndexTy);
7847     if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
7848       return DAG.getMaskedGather(
7849           N->getVTList(), MGN->getMemoryVT(), DL,
7850           {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
7851            MGN->getBasePtr(), Index, MGN->getScale()},
7852           MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
7853     const auto *MSN = cast<MaskedScatterSDNode>(N);
7854     return DAG.getMaskedScatter(
7855         N->getVTList(), MSN->getMemoryVT(), DL,
7856         {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
7857          Index, MSN->getScale()},
7858         MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
7859   }
7860   case RISCVISD::SRA_VL:
7861   case RISCVISD::SRL_VL:
7862   case RISCVISD::SHL_VL: {
7863     SDValue ShAmt = N->getOperand(1);
7864     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
7865       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
7866       SDLoc DL(N);
7867       SDValue VL = N->getOperand(3);
7868       EVT VT = N->getValueType(0);
7869       ShAmt =
7870           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL);
7871       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
7872                          N->getOperand(2), N->getOperand(3));
7873     }
7874     break;
7875   }
7876   case ISD::SRA:
7877   case ISD::SRL:
7878   case ISD::SHL: {
7879     SDValue ShAmt = N->getOperand(1);
7880     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
7881       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
7882       SDLoc DL(N);
7883       EVT VT = N->getValueType(0);
7884       ShAmt =
7885           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0));
7886       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
7887     }
7888     break;
7889   }
7890   case RISCVISD::MUL_VL: {
7891     SDValue Op0 = N->getOperand(0);
7892     SDValue Op1 = N->getOperand(1);
7893     if (SDValue V = combineMUL_VLToVWMUL(N, Op0, Op1, DAG))
7894       return V;
7895     if (SDValue V = combineMUL_VLToVWMUL(N, Op1, Op0, DAG))
7896       return V;
7897     return SDValue();
7898   }
7899   case ISD::STORE: {
7900     auto *Store = cast<StoreSDNode>(N);
7901     SDValue Val = Store->getValue();
7902     // Combine store of vmv.x.s to vse with VL of 1.
7903     // FIXME: Support FP.
7904     if (Val.getOpcode() == RISCVISD::VMV_X_S) {
7905       SDValue Src = Val.getOperand(0);
7906       EVT VecVT = Src.getValueType();
7907       EVT MemVT = Store->getMemoryVT();
7908       // The memory VT and the element type must match.
7909       if (VecVT.getVectorElementType() == MemVT) {
7910         SDLoc DL(N);
7911         MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
7912         return DAG.getStoreVP(
7913             Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
7914             DAG.getConstant(1, DL, MaskVT),
7915             DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
7916             Store->getMemOperand(), Store->getAddressingMode(),
7917             Store->isTruncatingStore(), /*IsCompress*/ false);
7918       }
7919     }
7920 
7921     break;
7922   }
7923   }
7924 
7925   return SDValue();
7926 }
7927 
7928 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
7929     const SDNode *N, CombineLevel Level) const {
7930   // The following folds are only desirable if `(OP _, c1 << c2)` can be
7931   // materialised in fewer instructions than `(OP _, c1)`:
7932   //
7933   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7934   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7935   SDValue N0 = N->getOperand(0);
7936   EVT Ty = N0.getValueType();
7937   if (Ty.isScalarInteger() &&
7938       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
7939     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
7940     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
7941     if (C1 && C2) {
7942       const APInt &C1Int = C1->getAPIntValue();
7943       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
7944 
7945       // We can materialise `c1 << c2` into an add immediate, so it's "free",
7946       // and the combine should happen, to potentially allow further combines
7947       // later.
7948       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
7949           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
7950         return true;
7951 
7952       // We can materialise `c1` in an add immediate, so it's "free", and the
7953       // combine should be prevented.
7954       if (C1Int.getMinSignedBits() <= 64 &&
7955           isLegalAddImmediate(C1Int.getSExtValue()))
7956         return false;
7957 
7958       // Neither constant will fit into an immediate, so find materialisation
7959       // costs.
7960       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
7961                                               Subtarget.getFeatureBits(),
7962                                               /*CompressionCost*/true);
7963       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
7964           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
7965           /*CompressionCost*/true);
7966 
7967       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
7968       // combine should be prevented.
7969       if (C1Cost < ShiftedC1Cost)
7970         return false;
7971     }
7972   }
7973   return true;
7974 }
7975 
7976 bool RISCVTargetLowering::targetShrinkDemandedConstant(
7977     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
7978     TargetLoweringOpt &TLO) const {
7979   // Delay this optimization as late as possible.
7980   if (!TLO.LegalOps)
7981     return false;
7982 
7983   EVT VT = Op.getValueType();
7984   if (VT.isVector())
7985     return false;
7986 
7987   // Only handle AND for now.
7988   if (Op.getOpcode() != ISD::AND)
7989     return false;
7990 
7991   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
7992   if (!C)
7993     return false;
7994 
7995   const APInt &Mask = C->getAPIntValue();
7996 
7997   // Clear all non-demanded bits initially.
7998   APInt ShrunkMask = Mask & DemandedBits;
7999 
8000   // Try to make a smaller immediate by setting undemanded bits.
8001 
8002   APInt ExpandedMask = Mask | ~DemandedBits;
8003 
8004   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
8005     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
8006   };
8007   auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
8008     if (NewMask == Mask)
8009       return true;
8010     SDLoc DL(Op);
8011     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
8012     SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
8013     return TLO.CombineTo(Op, NewOp);
8014   };
8015 
8016   // If the shrunk mask fits in sign extended 12 bits, let the target
8017   // independent code apply it.
8018   if (ShrunkMask.isSignedIntN(12))
8019     return false;
8020 
8021   // Preserve (and X, 0xffff) when zext.h is supported.
8022   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
8023     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
8024     if (IsLegalMask(NewMask))
8025       return UseMask(NewMask);
8026   }
8027 
8028   // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
8029   if (VT == MVT::i64) {
8030     APInt NewMask = APInt(64, 0xffffffff);
8031     if (IsLegalMask(NewMask))
8032       return UseMask(NewMask);
8033   }
8034 
8035   // For the remaining optimizations, we need to be able to make a negative
8036   // number through a combination of mask and undemanded bits.
8037   if (!ExpandedMask.isNegative())
8038     return false;
8039 
8040   // What is the fewest number of bits we need to represent the negative number.
8041   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
8042 
8043   // Try to make a 12 bit negative immediate. If that fails try to make a 32
8044   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
8045   APInt NewMask = ShrunkMask;
8046   if (MinSignedBits <= 12)
8047     NewMask.setBitsFrom(11);
8048   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
8049     NewMask.setBitsFrom(31);
8050   else
8051     return false;
8052 
8053   // Check that our new mask is a subset of the demanded mask.
8054   assert(IsLegalMask(NewMask));
8055   return UseMask(NewMask);
8056 }
8057 
8058 static void computeGREV(APInt &Src, unsigned ShAmt) {
8059   ShAmt &= Src.getBitWidth() - 1;
8060   uint64_t x = Src.getZExtValue();
8061   if (ShAmt & 1)
8062     x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
8063   if (ShAmt & 2)
8064     x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2);
8065   if (ShAmt & 4)
8066     x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4);
8067   if (ShAmt & 8)
8068     x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8);
8069   if (ShAmt & 16)
8070     x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16);
8071   if (ShAmt & 32)
8072     x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32);
8073   Src = x;
8074 }
8075 
8076 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
8077                                                         KnownBits &Known,
8078                                                         const APInt &DemandedElts,
8079                                                         const SelectionDAG &DAG,
8080                                                         unsigned Depth) const {
8081   unsigned BitWidth = Known.getBitWidth();
8082   unsigned Opc = Op.getOpcode();
8083   assert((Opc >= ISD::BUILTIN_OP_END ||
8084           Opc == ISD::INTRINSIC_WO_CHAIN ||
8085           Opc == ISD::INTRINSIC_W_CHAIN ||
8086           Opc == ISD::INTRINSIC_VOID) &&
8087          "Should use MaskedValueIsZero if you don't know whether Op"
8088          " is a target node!");
8089 
8090   Known.resetAll();
8091   switch (Opc) {
8092   default: break;
8093   case RISCVISD::SELECT_CC: {
8094     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
8095     // If we don't know any bits, early out.
8096     if (Known.isUnknown())
8097       break;
8098     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
8099 
8100     // Only known if known in both the LHS and RHS.
8101     Known = KnownBits::commonBits(Known, Known2);
8102     break;
8103   }
8104   case RISCVISD::REMUW: {
8105     KnownBits Known2;
8106     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
8107     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
8108     // We only care about the lower 32 bits.
8109     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
8110     // Restore the original width by sign extending.
8111     Known = Known.sext(BitWidth);
8112     break;
8113   }
8114   case RISCVISD::DIVUW: {
8115     KnownBits Known2;
8116     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
8117     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
8118     // We only care about the lower 32 bits.
8119     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
8120     // Restore the original width by sign extending.
8121     Known = Known.sext(BitWidth);
8122     break;
8123   }
8124   case RISCVISD::CTZW: {
8125     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
8126     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
8127     unsigned LowBits = Log2_32(PossibleTZ) + 1;
8128     Known.Zero.setBitsFrom(LowBits);
8129     break;
8130   }
8131   case RISCVISD::CLZW: {
8132     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
8133     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
8134     unsigned LowBits = Log2_32(PossibleLZ) + 1;
8135     Known.Zero.setBitsFrom(LowBits);
8136     break;
8137   }
8138   case RISCVISD::GREV:
8139   case RISCVISD::GREVW: {
8140     if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8141       Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
8142       if (Opc == RISCVISD::GREVW)
8143         Known = Known.trunc(32);
8144       unsigned ShAmt = C->getZExtValue();
8145       computeGREV(Known.Zero, ShAmt);
8146       computeGREV(Known.One, ShAmt);
8147       if (Opc == RISCVISD::GREVW)
8148         Known = Known.sext(BitWidth);
8149     }
8150     break;
8151   }
8152   case RISCVISD::READ_VLENB:
8153     // We assume VLENB is at least 16 bytes.
8154     Known.Zero.setLowBits(4);
8155     // We assume VLENB is no more than 65536 / 8 bytes.
8156     Known.Zero.setBitsFrom(14);
8157     break;
8158   case ISD::INTRINSIC_W_CHAIN: {
8159     unsigned IntNo = Op.getConstantOperandVal(1);
8160     switch (IntNo) {
8161     default:
8162       // We can't do anything for most intrinsics.
8163       break;
8164     case Intrinsic::riscv_vsetvli:
8165     case Intrinsic::riscv_vsetvlimax:
8166       // Assume that VL output is positive and would fit in an int32_t.
8167       // TODO: VLEN might be capped at 16 bits in a future V spec update.
8168       if (BitWidth >= 32)
8169         Known.Zero.setBitsFrom(31);
8170       break;
8171     }
8172     break;
8173   }
8174   }
8175 }
8176 
8177 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
8178     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8179     unsigned Depth) const {
8180   switch (Op.getOpcode()) {
8181   default:
8182     break;
8183   case RISCVISD::SELECT_CC: {
8184     unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
8185     if (Tmp == 1) return 1;  // Early out.
8186     unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
8187     return std::min(Tmp, Tmp2);
8188   }
8189   case RISCVISD::SLLW:
8190   case RISCVISD::SRAW:
8191   case RISCVISD::SRLW:
8192   case RISCVISD::DIVW:
8193   case RISCVISD::DIVUW:
8194   case RISCVISD::REMUW:
8195   case RISCVISD::ROLW:
8196   case RISCVISD::RORW:
8197   case RISCVISD::GREVW:
8198   case RISCVISD::GORCW:
8199   case RISCVISD::FSLW:
8200   case RISCVISD::FSRW:
8201   case RISCVISD::SHFLW:
8202   case RISCVISD::UNSHFLW:
8203   case RISCVISD::BCOMPRESSW:
8204   case RISCVISD::BDECOMPRESSW:
8205   case RISCVISD::BFPW:
8206   case RISCVISD::FCVT_W_RV64:
8207   case RISCVISD::FCVT_WU_RV64:
8208   case RISCVISD::STRICT_FCVT_W_RV64:
8209   case RISCVISD::STRICT_FCVT_WU_RV64:
8210     // TODO: As the result is sign-extended, this is conservatively correct. A
8211     // more precise answer could be calculated for SRAW depending on known
8212     // bits in the shift amount.
8213     return 33;
8214   case RISCVISD::SHFL:
8215   case RISCVISD::UNSHFL: {
8216     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
8217     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
8218     // will stay within the upper 32 bits. If there were more than 32 sign bits
8219     // before there will be at least 33 sign bits after.
8220     if (Op.getValueType() == MVT::i64 &&
8221         isa<ConstantSDNode>(Op.getOperand(1)) &&
8222         (Op.getConstantOperandVal(1) & 0x10) == 0) {
8223       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
8224       if (Tmp > 32)
8225         return 33;
8226     }
8227     break;
8228   }
8229   case RISCVISD::VMV_X_S:
8230     // The number of sign bits of the scalar result is computed by obtaining the
8231     // element type of the input vector operand, subtracting its width from the
8232     // XLEN, and then adding one (sign bit within the element type). If the
8233     // element type is wider than XLen, the least-significant XLEN bits are
8234     // taken.
8235     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
8236       return 1;
8237     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
8238   }
8239 
8240   return 1;
8241 }
8242 
8243 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
8244                                                   MachineBasicBlock *BB) {
8245   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
8246 
8247   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
8248   // Should the count have wrapped while it was being read, we need to try
8249   // again.
8250   // ...
8251   // read:
8252   // rdcycleh x3 # load high word of cycle
8253   // rdcycle  x2 # load low word of cycle
8254   // rdcycleh x4 # load high word of cycle
8255   // bne x3, x4, read # check if high word reads match, otherwise try again
8256   // ...
8257 
8258   MachineFunction &MF = *BB->getParent();
8259   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8260   MachineFunction::iterator It = ++BB->getIterator();
8261 
8262   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
8263   MF.insert(It, LoopMBB);
8264 
8265   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
8266   MF.insert(It, DoneMBB);
8267 
8268   // Transfer the remainder of BB and its successor edges to DoneMBB.
8269   DoneMBB->splice(DoneMBB->begin(), BB,
8270                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
8271   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
8272 
8273   BB->addSuccessor(LoopMBB);
8274 
8275   MachineRegisterInfo &RegInfo = MF.getRegInfo();
8276   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
8277   Register LoReg = MI.getOperand(0).getReg();
8278   Register HiReg = MI.getOperand(1).getReg();
8279   DebugLoc DL = MI.getDebugLoc();
8280 
8281   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8282   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
8283       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
8284       .addReg(RISCV::X0);
8285   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
8286       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
8287       .addReg(RISCV::X0);
8288   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
8289       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
8290       .addReg(RISCV::X0);
8291 
8292   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
8293       .addReg(HiReg)
8294       .addReg(ReadAgainReg)
8295       .addMBB(LoopMBB);
8296 
8297   LoopMBB->addSuccessor(LoopMBB);
8298   LoopMBB->addSuccessor(DoneMBB);
8299 
8300   MI.eraseFromParent();
8301 
8302   return DoneMBB;
8303 }
8304 
8305 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
8306                                              MachineBasicBlock *BB) {
8307   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
8308 
8309   MachineFunction &MF = *BB->getParent();
8310   DebugLoc DL = MI.getDebugLoc();
8311   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
8312   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
8313   Register LoReg = MI.getOperand(0).getReg();
8314   Register HiReg = MI.getOperand(1).getReg();
8315   Register SrcReg = MI.getOperand(2).getReg();
8316   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
8317   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
8318 
8319   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
8320                           RI);
8321   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
8322   MachineMemOperand *MMOLo =
8323       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
8324   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
8325       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
8326   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
8327       .addFrameIndex(FI)
8328       .addImm(0)
8329       .addMemOperand(MMOLo);
8330   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
8331       .addFrameIndex(FI)
8332       .addImm(4)
8333       .addMemOperand(MMOHi);
8334   MI.eraseFromParent(); // The pseudo instruction is gone now.
8335   return BB;
8336 }
8337 
8338 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
8339                                                  MachineBasicBlock *BB) {
8340   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
8341          "Unexpected instruction");
8342 
8343   MachineFunction &MF = *BB->getParent();
8344   DebugLoc DL = MI.getDebugLoc();
8345   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
8346   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
8347   Register DstReg = MI.getOperand(0).getReg();
8348   Register LoReg = MI.getOperand(1).getReg();
8349   Register HiReg = MI.getOperand(2).getReg();
8350   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
8351   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
8352 
8353   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
8354   MachineMemOperand *MMOLo =
8355       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
8356   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
8357       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
8358   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
8359       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
8360       .addFrameIndex(FI)
8361       .addImm(0)
8362       .addMemOperand(MMOLo);
8363   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
8364       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
8365       .addFrameIndex(FI)
8366       .addImm(4)
8367       .addMemOperand(MMOHi);
8368   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
8369   MI.eraseFromParent(); // The pseudo instruction is gone now.
8370   return BB;
8371 }
8372 
8373 static bool isSelectPseudo(MachineInstr &MI) {
8374   switch (MI.getOpcode()) {
8375   default:
8376     return false;
8377   case RISCV::Select_GPR_Using_CC_GPR:
8378   case RISCV::Select_FPR16_Using_CC_GPR:
8379   case RISCV::Select_FPR32_Using_CC_GPR:
8380   case RISCV::Select_FPR64_Using_CC_GPR:
8381     return true;
8382   }
8383 }
8384 
8385 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
8386                                         unsigned RelOpcode, unsigned EqOpcode,
8387                                         const RISCVSubtarget &Subtarget) {
8388   DebugLoc DL = MI.getDebugLoc();
8389   Register DstReg = MI.getOperand(0).getReg();
8390   Register Src1Reg = MI.getOperand(1).getReg();
8391   Register Src2Reg = MI.getOperand(2).getReg();
8392   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
8393   Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
8394   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
8395 
8396   // Save the current FFLAGS.
8397   BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
8398 
8399   auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
8400                  .addReg(Src1Reg)
8401                  .addReg(Src2Reg);
8402   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
8403     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
8404 
8405   // Restore the FFLAGS.
8406   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
8407       .addReg(SavedFFlags, RegState::Kill);
8408 
8409   // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
8410   auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
8411                   .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
8412                   .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
8413   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
8414     MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
8415 
8416   // Erase the pseudoinstruction.
8417   MI.eraseFromParent();
8418   return BB;
8419 }
8420 
8421 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
8422                                            MachineBasicBlock *BB,
8423                                            const RISCVSubtarget &Subtarget) {
8424   // To "insert" Select_* instructions, we actually have to insert the triangle
8425   // control-flow pattern.  The incoming instructions know the destination vreg
8426   // to set, the condition code register to branch on, the true/false values to
8427   // select between, and the condcode to use to select the appropriate branch.
8428   //
8429   // We produce the following control flow:
8430   //     HeadMBB
8431   //     |  \
8432   //     |  IfFalseMBB
8433   //     | /
8434   //    TailMBB
8435   //
8436   // When we find a sequence of selects we attempt to optimize their emission
8437   // by sharing the control flow. Currently we only handle cases where we have
8438   // multiple selects with the exact same condition (same LHS, RHS and CC).
8439   // The selects may be interleaved with other instructions if the other
8440   // instructions meet some requirements we deem safe:
8441   // - They are debug instructions. Otherwise,
8442   // - They do not have side-effects, do not access memory and their inputs do
8443   //   not depend on the results of the select pseudo-instructions.
8444   // The TrueV/FalseV operands of the selects cannot depend on the result of
8445   // previous selects in the sequence.
8446   // These conditions could be further relaxed. See the X86 target for a
8447   // related approach and more information.
8448   Register LHS = MI.getOperand(1).getReg();
8449   Register RHS = MI.getOperand(2).getReg();
8450   auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
8451 
8452   SmallVector<MachineInstr *, 4> SelectDebugValues;
8453   SmallSet<Register, 4> SelectDests;
8454   SelectDests.insert(MI.getOperand(0).getReg());
8455 
8456   MachineInstr *LastSelectPseudo = &MI;
8457 
8458   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
8459        SequenceMBBI != E; ++SequenceMBBI) {
8460     if (SequenceMBBI->isDebugInstr())
8461       continue;
8462     else if (isSelectPseudo(*SequenceMBBI)) {
8463       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
8464           SequenceMBBI->getOperand(2).getReg() != RHS ||
8465           SequenceMBBI->getOperand(3).getImm() != CC ||
8466           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
8467           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
8468         break;
8469       LastSelectPseudo = &*SequenceMBBI;
8470       SequenceMBBI->collectDebugValues(SelectDebugValues);
8471       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
8472     } else {
8473       if (SequenceMBBI->hasUnmodeledSideEffects() ||
8474           SequenceMBBI->mayLoadOrStore())
8475         break;
8476       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
8477             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
8478           }))
8479         break;
8480     }
8481   }
8482 
8483   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
8484   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8485   DebugLoc DL = MI.getDebugLoc();
8486   MachineFunction::iterator I = ++BB->getIterator();
8487 
8488   MachineBasicBlock *HeadMBB = BB;
8489   MachineFunction *F = BB->getParent();
8490   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
8491   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
8492 
8493   F->insert(I, IfFalseMBB);
8494   F->insert(I, TailMBB);
8495 
8496   // Transfer debug instructions associated with the selects to TailMBB.
8497   for (MachineInstr *DebugInstr : SelectDebugValues) {
8498     TailMBB->push_back(DebugInstr->removeFromParent());
8499   }
8500 
8501   // Move all instructions after the sequence to TailMBB.
8502   TailMBB->splice(TailMBB->end(), HeadMBB,
8503                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
8504   // Update machine-CFG edges by transferring all successors of the current
8505   // block to the new block which will contain the Phi nodes for the selects.
8506   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
8507   // Set the successors for HeadMBB.
8508   HeadMBB->addSuccessor(IfFalseMBB);
8509   HeadMBB->addSuccessor(TailMBB);
8510 
8511   // Insert appropriate branch.
8512   BuildMI(HeadMBB, DL, TII.getBrCond(CC))
8513     .addReg(LHS)
8514     .addReg(RHS)
8515     .addMBB(TailMBB);
8516 
8517   // IfFalseMBB just falls through to TailMBB.
8518   IfFalseMBB->addSuccessor(TailMBB);
8519 
8520   // Create PHIs for all of the select pseudo-instructions.
8521   auto SelectMBBI = MI.getIterator();
8522   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
8523   auto InsertionPoint = TailMBB->begin();
8524   while (SelectMBBI != SelectEnd) {
8525     auto Next = std::next(SelectMBBI);
8526     if (isSelectPseudo(*SelectMBBI)) {
8527       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
8528       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
8529               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
8530           .addReg(SelectMBBI->getOperand(4).getReg())
8531           .addMBB(HeadMBB)
8532           .addReg(SelectMBBI->getOperand(5).getReg())
8533           .addMBB(IfFalseMBB);
8534       SelectMBBI->eraseFromParent();
8535     }
8536     SelectMBBI = Next;
8537   }
8538 
8539   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
8540   return TailMBB;
8541 }
8542 
8543 MachineBasicBlock *
8544 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
8545                                                  MachineBasicBlock *BB) const {
8546   switch (MI.getOpcode()) {
8547   default:
8548     llvm_unreachable("Unexpected instr type to insert");
8549   case RISCV::ReadCycleWide:
8550     assert(!Subtarget.is64Bit() &&
8551            "ReadCycleWrite is only to be used on riscv32");
8552     return emitReadCycleWidePseudo(MI, BB);
8553   case RISCV::Select_GPR_Using_CC_GPR:
8554   case RISCV::Select_FPR16_Using_CC_GPR:
8555   case RISCV::Select_FPR32_Using_CC_GPR:
8556   case RISCV::Select_FPR64_Using_CC_GPR:
8557     return emitSelectPseudo(MI, BB, Subtarget);
8558   case RISCV::BuildPairF64Pseudo:
8559     return emitBuildPairF64Pseudo(MI, BB);
8560   case RISCV::SplitF64Pseudo:
8561     return emitSplitF64Pseudo(MI, BB);
8562   case RISCV::PseudoQuietFLE_H:
8563     return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
8564   case RISCV::PseudoQuietFLT_H:
8565     return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
8566   case RISCV::PseudoQuietFLE_S:
8567     return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
8568   case RISCV::PseudoQuietFLT_S:
8569     return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
8570   case RISCV::PseudoQuietFLE_D:
8571     return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
8572   case RISCV::PseudoQuietFLT_D:
8573     return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
8574   }
8575 }
8576 
8577 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
8578                                                         SDNode *Node) const {
8579   // Add FRM dependency to any instructions with dynamic rounding mode.
8580   unsigned Opc = MI.getOpcode();
8581   auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
8582   if (Idx < 0)
8583     return;
8584   if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
8585     return;
8586   // If the instruction already reads FRM, don't add another read.
8587   if (MI.readsRegister(RISCV::FRM))
8588     return;
8589   MI.addOperand(
8590       MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
8591 }
8592 
8593 // Calling Convention Implementation.
8594 // The expectations for frontend ABI lowering vary from target to target.
8595 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
8596 // details, but this is a longer term goal. For now, we simply try to keep the
8597 // role of the frontend as simple and well-defined as possible. The rules can
8598 // be summarised as:
8599 // * Never split up large scalar arguments. We handle them here.
8600 // * If a hardfloat calling convention is being used, and the struct may be
8601 // passed in a pair of registers (fp+fp, int+fp), and both registers are
8602 // available, then pass as two separate arguments. If either the GPRs or FPRs
8603 // are exhausted, then pass according to the rule below.
8604 // * If a struct could never be passed in registers or directly in a stack
8605 // slot (as it is larger than 2*XLEN and the floating point rules don't
8606 // apply), then pass it using a pointer with the byval attribute.
8607 // * If a struct is less than 2*XLEN, then coerce to either a two-element
8608 // word-sized array or a 2*XLEN scalar (depending on alignment).
8609 // * The frontend can determine whether a struct is returned by reference or
8610 // not based on its size and fields. If it will be returned by reference, the
8611 // frontend must modify the prototype so a pointer with the sret annotation is
8612 // passed as the first argument. This is not necessary for large scalar
8613 // returns.
8614 // * Struct return values and varargs should be coerced to structs containing
8615 // register-size fields in the same situations they would be for fixed
8616 // arguments.
8617 
8618 static const MCPhysReg ArgGPRs[] = {
8619   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
8620   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
8621 };
8622 static const MCPhysReg ArgFPR16s[] = {
8623   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
8624   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
8625 };
8626 static const MCPhysReg ArgFPR32s[] = {
8627   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
8628   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
8629 };
8630 static const MCPhysReg ArgFPR64s[] = {
8631   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
8632   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
8633 };
8634 // This is an interim calling convention and it may be changed in the future.
8635 static const MCPhysReg ArgVRs[] = {
8636     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
8637     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
8638     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
8639 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
8640                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
8641                                      RISCV::V20M2, RISCV::V22M2};
8642 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
8643                                      RISCV::V20M4};
8644 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
8645 
8646 // Pass a 2*XLEN argument that has been split into two XLEN values through
8647 // registers or the stack as necessary.
8648 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
8649                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
8650                                 MVT ValVT2, MVT LocVT2,
8651                                 ISD::ArgFlagsTy ArgFlags2) {
8652   unsigned XLenInBytes = XLen / 8;
8653   if (Register Reg = State.AllocateReg(ArgGPRs)) {
8654     // At least one half can be passed via register.
8655     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
8656                                      VA1.getLocVT(), CCValAssign::Full));
8657   } else {
8658     // Both halves must be passed on the stack, with proper alignment.
8659     Align StackAlign =
8660         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
8661     State.addLoc(
8662         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
8663                             State.AllocateStack(XLenInBytes, StackAlign),
8664                             VA1.getLocVT(), CCValAssign::Full));
8665     State.addLoc(CCValAssign::getMem(
8666         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
8667         LocVT2, CCValAssign::Full));
8668     return false;
8669   }
8670 
8671   if (Register Reg = State.AllocateReg(ArgGPRs)) {
8672     // The second half can also be passed via register.
8673     State.addLoc(
8674         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
8675   } else {
8676     // The second half is passed via the stack, without additional alignment.
8677     State.addLoc(CCValAssign::getMem(
8678         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
8679         LocVT2, CCValAssign::Full));
8680   }
8681 
8682   return false;
8683 }
8684 
8685 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
8686                                Optional<unsigned> FirstMaskArgument,
8687                                CCState &State, const RISCVTargetLowering &TLI) {
8688   const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
8689   if (RC == &RISCV::VRRegClass) {
8690     // Assign the first mask argument to V0.
8691     // This is an interim calling convention and it may be changed in the
8692     // future.
8693     if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue())
8694       return State.AllocateReg(RISCV::V0);
8695     return State.AllocateReg(ArgVRs);
8696   }
8697   if (RC == &RISCV::VRM2RegClass)
8698     return State.AllocateReg(ArgVRM2s);
8699   if (RC == &RISCV::VRM4RegClass)
8700     return State.AllocateReg(ArgVRM4s);
8701   if (RC == &RISCV::VRM8RegClass)
8702     return State.AllocateReg(ArgVRM8s);
8703   llvm_unreachable("Unhandled register class for ValueType");
8704 }
8705 
8706 // Implements the RISC-V calling convention. Returns true upon failure.
8707 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
8708                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
8709                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
8710                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
8711                      Optional<unsigned> FirstMaskArgument) {
8712   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
8713   assert(XLen == 32 || XLen == 64);
8714   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
8715 
8716   // Any return value split in to more than two values can't be returned
8717   // directly. Vectors are returned via the available vector registers.
8718   if (!LocVT.isVector() && IsRet && ValNo > 1)
8719     return true;
8720 
8721   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
8722   // variadic argument, or if no F16/F32 argument registers are available.
8723   bool UseGPRForF16_F32 = true;
8724   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
8725   // variadic argument, or if no F64 argument registers are available.
8726   bool UseGPRForF64 = true;
8727 
8728   switch (ABI) {
8729   default:
8730     llvm_unreachable("Unexpected ABI");
8731   case RISCVABI::ABI_ILP32:
8732   case RISCVABI::ABI_LP64:
8733     break;
8734   case RISCVABI::ABI_ILP32F:
8735   case RISCVABI::ABI_LP64F:
8736     UseGPRForF16_F32 = !IsFixed;
8737     break;
8738   case RISCVABI::ABI_ILP32D:
8739   case RISCVABI::ABI_LP64D:
8740     UseGPRForF16_F32 = !IsFixed;
8741     UseGPRForF64 = !IsFixed;
8742     break;
8743   }
8744 
8745   // FPR16, FPR32, and FPR64 alias each other.
8746   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
8747     UseGPRForF16_F32 = true;
8748     UseGPRForF64 = true;
8749   }
8750 
8751   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
8752   // similar local variables rather than directly checking against the target
8753   // ABI.
8754 
8755   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
8756     LocVT = XLenVT;
8757     LocInfo = CCValAssign::BCvt;
8758   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
8759     LocVT = MVT::i64;
8760     LocInfo = CCValAssign::BCvt;
8761   }
8762 
8763   // If this is a variadic argument, the RISC-V calling convention requires
8764   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
8765   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
8766   // be used regardless of whether the original argument was split during
8767   // legalisation or not. The argument will not be passed by registers if the
8768   // original type is larger than 2*XLEN, so the register alignment rule does
8769   // not apply.
8770   unsigned TwoXLenInBytes = (2 * XLen) / 8;
8771   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
8772       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
8773     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
8774     // Skip 'odd' register if necessary.
8775     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
8776       State.AllocateReg(ArgGPRs);
8777   }
8778 
8779   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8780   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8781       State.getPendingArgFlags();
8782 
8783   assert(PendingLocs.size() == PendingArgFlags.size() &&
8784          "PendingLocs and PendingArgFlags out of sync");
8785 
8786   // Handle passing f64 on RV32D with a soft float ABI or when floating point
8787   // registers are exhausted.
8788   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
8789     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
8790            "Can't lower f64 if it is split");
8791     // Depending on available argument GPRS, f64 may be passed in a pair of
8792     // GPRs, split between a GPR and the stack, or passed completely on the
8793     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8794     // cases.
8795     Register Reg = State.AllocateReg(ArgGPRs);
8796     LocVT = MVT::i32;
8797     if (!Reg) {
8798       unsigned StackOffset = State.AllocateStack(8, Align(8));
8799       State.addLoc(
8800           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8801       return false;
8802     }
8803     if (!State.AllocateReg(ArgGPRs))
8804       State.AllocateStack(4, Align(4));
8805     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8806     return false;
8807   }
8808 
8809   // Fixed-length vectors are located in the corresponding scalable-vector
8810   // container types.
8811   if (ValVT.isFixedLengthVector())
8812     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
8813 
8814   // Split arguments might be passed indirectly, so keep track of the pending
8815   // values. Split vectors are passed via a mix of registers and indirectly, so
8816   // treat them as we would any other argument.
8817   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
8818     LocVT = XLenVT;
8819     LocInfo = CCValAssign::Indirect;
8820     PendingLocs.push_back(
8821         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
8822     PendingArgFlags.push_back(ArgFlags);
8823     if (!ArgFlags.isSplitEnd()) {
8824       return false;
8825     }
8826   }
8827 
8828   // If the split argument only had two elements, it should be passed directly
8829   // in registers or on the stack.
8830   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
8831       PendingLocs.size() <= 2) {
8832     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
8833     // Apply the normal calling convention rules to the first half of the
8834     // split argument.
8835     CCValAssign VA = PendingLocs[0];
8836     ISD::ArgFlagsTy AF = PendingArgFlags[0];
8837     PendingLocs.clear();
8838     PendingArgFlags.clear();
8839     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
8840                                ArgFlags);
8841   }
8842 
8843   // Allocate to a register if possible, or else a stack slot.
8844   Register Reg;
8845   unsigned StoreSizeBytes = XLen / 8;
8846   Align StackAlign = Align(XLen / 8);
8847 
8848   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
8849     Reg = State.AllocateReg(ArgFPR16s);
8850   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
8851     Reg = State.AllocateReg(ArgFPR32s);
8852   else if (ValVT == MVT::f64 && !UseGPRForF64)
8853     Reg = State.AllocateReg(ArgFPR64s);
8854   else if (ValVT.isVector()) {
8855     Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
8856     if (!Reg) {
8857       // For return values, the vector must be passed fully via registers or
8858       // via the stack.
8859       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
8860       // but we're using all of them.
8861       if (IsRet)
8862         return true;
8863       // Try using a GPR to pass the address
8864       if ((Reg = State.AllocateReg(ArgGPRs))) {
8865         LocVT = XLenVT;
8866         LocInfo = CCValAssign::Indirect;
8867       } else if (ValVT.isScalableVector()) {
8868         LocVT = XLenVT;
8869         LocInfo = CCValAssign::Indirect;
8870       } else {
8871         // Pass fixed-length vectors on the stack.
8872         LocVT = ValVT;
8873         StoreSizeBytes = ValVT.getStoreSize();
8874         // Align vectors to their element sizes, being careful for vXi1
8875         // vectors.
8876         StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
8877       }
8878     }
8879   } else {
8880     Reg = State.AllocateReg(ArgGPRs);
8881   }
8882 
8883   unsigned StackOffset =
8884       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
8885 
8886   // If we reach this point and PendingLocs is non-empty, we must be at the
8887   // end of a split argument that must be passed indirectly.
8888   if (!PendingLocs.empty()) {
8889     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
8890     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
8891 
8892     for (auto &It : PendingLocs) {
8893       if (Reg)
8894         It.convertToReg(Reg);
8895       else
8896         It.convertToMem(StackOffset);
8897       State.addLoc(It);
8898     }
8899     PendingLocs.clear();
8900     PendingArgFlags.clear();
8901     return false;
8902   }
8903 
8904   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
8905           (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
8906          "Expected an XLenVT or vector types at this stage");
8907 
8908   if (Reg) {
8909     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8910     return false;
8911   }
8912 
8913   // When a floating-point value is passed on the stack, no bit-conversion is
8914   // needed.
8915   if (ValVT.isFloatingPoint()) {
8916     LocVT = ValVT;
8917     LocInfo = CCValAssign::Full;
8918   }
8919   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8920   return false;
8921 }
8922 
8923 template <typename ArgTy>
8924 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
8925   for (const auto &ArgIdx : enumerate(Args)) {
8926     MVT ArgVT = ArgIdx.value().VT;
8927     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
8928       return ArgIdx.index();
8929   }
8930   return None;
8931 }
8932 
8933 void RISCVTargetLowering::analyzeInputArgs(
8934     MachineFunction &MF, CCState &CCInfo,
8935     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8936     RISCVCCAssignFn Fn) const {
8937   unsigned NumArgs = Ins.size();
8938   FunctionType *FType = MF.getFunction().getFunctionType();
8939 
8940   Optional<unsigned> FirstMaskArgument;
8941   if (Subtarget.hasVInstructions())
8942     FirstMaskArgument = preAssignMask(Ins);
8943 
8944   for (unsigned i = 0; i != NumArgs; ++i) {
8945     MVT ArgVT = Ins[i].VT;
8946     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
8947 
8948     Type *ArgTy = nullptr;
8949     if (IsRet)
8950       ArgTy = FType->getReturnType();
8951     else if (Ins[i].isOrigArg())
8952       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8953 
8954     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
8955     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
8956            ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
8957            FirstMaskArgument)) {
8958       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
8959                         << EVT(ArgVT).getEVTString() << '\n');
8960       llvm_unreachable(nullptr);
8961     }
8962   }
8963 }
8964 
8965 void RISCVTargetLowering::analyzeOutputArgs(
8966     MachineFunction &MF, CCState &CCInfo,
8967     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8968     CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
8969   unsigned NumArgs = Outs.size();
8970 
8971   Optional<unsigned> FirstMaskArgument;
8972   if (Subtarget.hasVInstructions())
8973     FirstMaskArgument = preAssignMask(Outs);
8974 
8975   for (unsigned i = 0; i != NumArgs; i++) {
8976     MVT ArgVT = Outs[i].VT;
8977     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
8978     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8979 
8980     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
8981     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
8982            ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
8983            FirstMaskArgument)) {
8984       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
8985                         << EVT(ArgVT).getEVTString() << "\n");
8986       llvm_unreachable(nullptr);
8987     }
8988   }
8989 }
8990 
8991 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8992 // values.
8993 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
8994                                    const CCValAssign &VA, const SDLoc &DL,
8995                                    const RISCVSubtarget &Subtarget) {
8996   switch (VA.getLocInfo()) {
8997   default:
8998     llvm_unreachable("Unexpected CCValAssign::LocInfo");
8999   case CCValAssign::Full:
9000     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
9001       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
9002     break;
9003   case CCValAssign::BCvt:
9004     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
9005       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
9006     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9007       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
9008     else
9009       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
9010     break;
9011   }
9012   return Val;
9013 }
9014 
9015 // The caller is responsible for loading the full value if the argument is
9016 // passed with CCValAssign::Indirect.
9017 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
9018                                 const CCValAssign &VA, const SDLoc &DL,
9019                                 const RISCVTargetLowering &TLI) {
9020   MachineFunction &MF = DAG.getMachineFunction();
9021   MachineRegisterInfo &RegInfo = MF.getRegInfo();
9022   EVT LocVT = VA.getLocVT();
9023   SDValue Val;
9024   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
9025   Register VReg = RegInfo.createVirtualRegister(RC);
9026   RegInfo.addLiveIn(VA.getLocReg(), VReg);
9027   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
9028 
9029   if (VA.getLocInfo() == CCValAssign::Indirect)
9030     return Val;
9031 
9032   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
9033 }
9034 
9035 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
9036                                    const CCValAssign &VA, const SDLoc &DL,
9037                                    const RISCVSubtarget &Subtarget) {
9038   EVT LocVT = VA.getLocVT();
9039 
9040   switch (VA.getLocInfo()) {
9041   default:
9042     llvm_unreachable("Unexpected CCValAssign::LocInfo");
9043   case CCValAssign::Full:
9044     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
9045       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
9046     break;
9047   case CCValAssign::BCvt:
9048     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
9049       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
9050     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9051       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
9052     else
9053       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
9054     break;
9055   }
9056   return Val;
9057 }
9058 
9059 // The caller is responsible for loading the full value if the argument is
9060 // passed with CCValAssign::Indirect.
9061 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
9062                                 const CCValAssign &VA, const SDLoc &DL) {
9063   MachineFunction &MF = DAG.getMachineFunction();
9064   MachineFrameInfo &MFI = MF.getFrameInfo();
9065   EVT LocVT = VA.getLocVT();
9066   EVT ValVT = VA.getValVT();
9067   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
9068   if (ValVT.isScalableVector()) {
9069     // When the value is a scalable vector, we save the pointer which points to
9070     // the scalable vector value in the stack. The ValVT will be the pointer
9071     // type, instead of the scalable vector type.
9072     ValVT = LocVT;
9073   }
9074   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
9075                                  /*IsImmutable=*/true);
9076   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
9077   SDValue Val;
9078 
9079   ISD::LoadExtType ExtType;
9080   switch (VA.getLocInfo()) {
9081   default:
9082     llvm_unreachable("Unexpected CCValAssign::LocInfo");
9083   case CCValAssign::Full:
9084   case CCValAssign::Indirect:
9085   case CCValAssign::BCvt:
9086     ExtType = ISD::NON_EXTLOAD;
9087     break;
9088   }
9089   Val = DAG.getExtLoad(
9090       ExtType, DL, LocVT, Chain, FIN,
9091       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
9092   return Val;
9093 }
9094 
9095 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
9096                                        const CCValAssign &VA, const SDLoc &DL) {
9097   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
9098          "Unexpected VA");
9099   MachineFunction &MF = DAG.getMachineFunction();
9100   MachineFrameInfo &MFI = MF.getFrameInfo();
9101   MachineRegisterInfo &RegInfo = MF.getRegInfo();
9102 
9103   if (VA.isMemLoc()) {
9104     // f64 is passed on the stack.
9105     int FI =
9106         MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
9107     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9108     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
9109                        MachinePointerInfo::getFixedStack(MF, FI));
9110   }
9111 
9112   assert(VA.isRegLoc() && "Expected register VA assignment");
9113 
9114   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
9115   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
9116   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
9117   SDValue Hi;
9118   if (VA.getLocReg() == RISCV::X17) {
9119     // Second half of f64 is passed on the stack.
9120     int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
9121     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9122     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
9123                      MachinePointerInfo::getFixedStack(MF, FI));
9124   } else {
9125     // Second half of f64 is passed in another GPR.
9126     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
9127     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
9128     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
9129   }
9130   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
9131 }
9132 
9133 // FastCC has less than 1% performance improvement for some particular
9134 // benchmark. But theoretically, it may has benenfit for some cases.
9135 static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
9136                             unsigned ValNo, MVT ValVT, MVT LocVT,
9137                             CCValAssign::LocInfo LocInfo,
9138                             ISD::ArgFlagsTy ArgFlags, CCState &State,
9139                             bool IsFixed, bool IsRet, Type *OrigTy,
9140                             const RISCVTargetLowering &TLI,
9141                             Optional<unsigned> FirstMaskArgument) {
9142 
9143   // X5 and X6 might be used for save-restore libcall.
9144   static const MCPhysReg GPRList[] = {
9145       RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
9146       RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
9147       RISCV::X29, RISCV::X30, RISCV::X31};
9148 
9149   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9150     if (unsigned Reg = State.AllocateReg(GPRList)) {
9151       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9152       return false;
9153     }
9154   }
9155 
9156   if (LocVT == MVT::f16) {
9157     static const MCPhysReg FPR16List[] = {
9158         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
9159         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
9160         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
9161         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
9162     if (unsigned Reg = State.AllocateReg(FPR16List)) {
9163       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9164       return false;
9165     }
9166   }
9167 
9168   if (LocVT == MVT::f32) {
9169     static const MCPhysReg FPR32List[] = {
9170         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
9171         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
9172         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
9173         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
9174     if (unsigned Reg = State.AllocateReg(FPR32List)) {
9175       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9176       return false;
9177     }
9178   }
9179 
9180   if (LocVT == MVT::f64) {
9181     static const MCPhysReg FPR64List[] = {
9182         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
9183         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
9184         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
9185         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
9186     if (unsigned Reg = State.AllocateReg(FPR64List)) {
9187       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9188       return false;
9189     }
9190   }
9191 
9192   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
9193     unsigned Offset4 = State.AllocateStack(4, Align(4));
9194     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
9195     return false;
9196   }
9197 
9198   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
9199     unsigned Offset5 = State.AllocateStack(8, Align(8));
9200     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
9201     return false;
9202   }
9203 
9204   if (LocVT.isVector()) {
9205     if (unsigned Reg =
9206             allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
9207       // Fixed-length vectors are located in the corresponding scalable-vector
9208       // container types.
9209       if (ValVT.isFixedLengthVector())
9210         LocVT = TLI.getContainerForFixedLengthVector(LocVT);
9211       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9212     } else {
9213       // Try and pass the address via a "fast" GPR.
9214       if (unsigned GPRReg = State.AllocateReg(GPRList)) {
9215         LocInfo = CCValAssign::Indirect;
9216         LocVT = TLI.getSubtarget().getXLenVT();
9217         State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
9218       } else if (ValVT.isFixedLengthVector()) {
9219         auto StackAlign =
9220             MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
9221         unsigned StackOffset =
9222             State.AllocateStack(ValVT.getStoreSize(), StackAlign);
9223         State.addLoc(
9224             CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9225       } else {
9226         // Can't pass scalable vectors on the stack.
9227         return true;
9228       }
9229     }
9230 
9231     return false;
9232   }
9233 
9234   return true; // CC didn't match.
9235 }
9236 
9237 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
9238                          CCValAssign::LocInfo LocInfo,
9239                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
9240 
9241   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9242     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
9243     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
9244     static const MCPhysReg GPRList[] = {
9245         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
9246         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
9247     if (unsigned Reg = State.AllocateReg(GPRList)) {
9248       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9249       return false;
9250     }
9251   }
9252 
9253   if (LocVT == MVT::f32) {
9254     // Pass in STG registers: F1, ..., F6
9255     //                        fs0 ... fs5
9256     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
9257                                           RISCV::F18_F, RISCV::F19_F,
9258                                           RISCV::F20_F, RISCV::F21_F};
9259     if (unsigned Reg = State.AllocateReg(FPR32List)) {
9260       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9261       return false;
9262     }
9263   }
9264 
9265   if (LocVT == MVT::f64) {
9266     // Pass in STG registers: D1, ..., D6
9267     //                        fs6 ... fs11
9268     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
9269                                           RISCV::F24_D, RISCV::F25_D,
9270                                           RISCV::F26_D, RISCV::F27_D};
9271     if (unsigned Reg = State.AllocateReg(FPR64List)) {
9272       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9273       return false;
9274     }
9275   }
9276 
9277   report_fatal_error("No registers left in GHC calling convention");
9278   return true;
9279 }
9280 
9281 // Transform physical registers into virtual registers.
9282 SDValue RISCVTargetLowering::LowerFormalArguments(
9283     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9284     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9285     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9286 
9287   MachineFunction &MF = DAG.getMachineFunction();
9288 
9289   switch (CallConv) {
9290   default:
9291     report_fatal_error("Unsupported calling convention");
9292   case CallingConv::C:
9293   case CallingConv::Fast:
9294     break;
9295   case CallingConv::GHC:
9296     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
9297         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
9298       report_fatal_error(
9299         "GHC calling convention requires the F and D instruction set extensions");
9300   }
9301 
9302   const Function &Func = MF.getFunction();
9303   if (Func.hasFnAttribute("interrupt")) {
9304     if (!Func.arg_empty())
9305       report_fatal_error(
9306         "Functions with the interrupt attribute cannot have arguments!");
9307 
9308     StringRef Kind =
9309       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
9310 
9311     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
9312       report_fatal_error(
9313         "Function interrupt attribute argument not supported!");
9314   }
9315 
9316   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9317   MVT XLenVT = Subtarget.getXLenVT();
9318   unsigned XLenInBytes = Subtarget.getXLen() / 8;
9319   // Used with vargs to acumulate store chains.
9320   std::vector<SDValue> OutChains;
9321 
9322   // Assign locations to all of the incoming arguments.
9323   SmallVector<CCValAssign, 16> ArgLocs;
9324   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9325 
9326   if (CallConv == CallingConv::GHC)
9327     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
9328   else
9329     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
9330                      CallConv == CallingConv::Fast ? CC_RISCV_FastCC
9331                                                    : CC_RISCV);
9332 
9333   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
9334     CCValAssign &VA = ArgLocs[i];
9335     SDValue ArgValue;
9336     // Passing f64 on RV32D with a soft float ABI must be handled as a special
9337     // case.
9338     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
9339       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
9340     else if (VA.isRegLoc())
9341       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
9342     else
9343       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9344 
9345     if (VA.getLocInfo() == CCValAssign::Indirect) {
9346       // If the original argument was split and passed by reference (e.g. i128
9347       // on RV32), we need to load all parts of it here (using the same
9348       // address). Vectors may be partly split to registers and partly to the
9349       // stack, in which case the base address is partly offset and subsequent
9350       // stores are relative to that.
9351       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
9352                                    MachinePointerInfo()));
9353       unsigned ArgIndex = Ins[i].OrigArgIndex;
9354       unsigned ArgPartOffset = Ins[i].PartOffset;
9355       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
9356       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
9357         CCValAssign &PartVA = ArgLocs[i + 1];
9358         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
9359         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9360         if (PartVA.getValVT().isScalableVector())
9361           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
9362         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
9363         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
9364                                      MachinePointerInfo()));
9365         ++i;
9366       }
9367       continue;
9368     }
9369     InVals.push_back(ArgValue);
9370   }
9371 
9372   if (IsVarArg) {
9373     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
9374     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
9375     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
9376     MachineFrameInfo &MFI = MF.getFrameInfo();
9377     MachineRegisterInfo &RegInfo = MF.getRegInfo();
9378     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
9379 
9380     // Offset of the first variable argument from stack pointer, and size of
9381     // the vararg save area. For now, the varargs save area is either zero or
9382     // large enough to hold a0-a7.
9383     int VaArgOffset, VarArgsSaveSize;
9384 
9385     // If all registers are allocated, then all varargs must be passed on the
9386     // stack and we don't need to save any argregs.
9387     if (ArgRegs.size() == Idx) {
9388       VaArgOffset = CCInfo.getNextStackOffset();
9389       VarArgsSaveSize = 0;
9390     } else {
9391       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
9392       VaArgOffset = -VarArgsSaveSize;
9393     }
9394 
9395     // Record the frame index of the first variable argument
9396     // which is a value necessary to VASTART.
9397     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
9398     RVFI->setVarArgsFrameIndex(FI);
9399 
9400     // If saving an odd number of registers then create an extra stack slot to
9401     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
9402     // offsets to even-numbered registered remain 2*XLEN-aligned.
9403     if (Idx % 2) {
9404       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
9405       VarArgsSaveSize += XLenInBytes;
9406     }
9407 
9408     // Copy the integer registers that may have been used for passing varargs
9409     // to the vararg save area.
9410     for (unsigned I = Idx; I < ArgRegs.size();
9411          ++I, VaArgOffset += XLenInBytes) {
9412       const Register Reg = RegInfo.createVirtualRegister(RC);
9413       RegInfo.addLiveIn(ArgRegs[I], Reg);
9414       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
9415       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
9416       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9417       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
9418                                    MachinePointerInfo::getFixedStack(MF, FI));
9419       cast<StoreSDNode>(Store.getNode())
9420           ->getMemOperand()
9421           ->setValue((Value *)nullptr);
9422       OutChains.push_back(Store);
9423     }
9424     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
9425   }
9426 
9427   // All stores are grouped in one node to allow the matching between
9428   // the size of Ins and InVals. This only happens for vararg functions.
9429   if (!OutChains.empty()) {
9430     OutChains.push_back(Chain);
9431     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
9432   }
9433 
9434   return Chain;
9435 }
9436 
9437 /// isEligibleForTailCallOptimization - Check whether the call is eligible
9438 /// for tail call optimization.
9439 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
9440 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
9441     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
9442     const SmallVector<CCValAssign, 16> &ArgLocs) const {
9443 
9444   auto &Callee = CLI.Callee;
9445   auto CalleeCC = CLI.CallConv;
9446   auto &Outs = CLI.Outs;
9447   auto &Caller = MF.getFunction();
9448   auto CallerCC = Caller.getCallingConv();
9449 
9450   // Exception-handling functions need a special set of instructions to
9451   // indicate a return to the hardware. Tail-calling another function would
9452   // probably break this.
9453   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
9454   // should be expanded as new function attributes are introduced.
9455   if (Caller.hasFnAttribute("interrupt"))
9456     return false;
9457 
9458   // Do not tail call opt if the stack is used to pass parameters.
9459   if (CCInfo.getNextStackOffset() != 0)
9460     return false;
9461 
9462   // Do not tail call opt if any parameters need to be passed indirectly.
9463   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
9464   // passed indirectly. So the address of the value will be passed in a
9465   // register, or if not available, then the address is put on the stack. In
9466   // order to pass indirectly, space on the stack often needs to be allocated
9467   // in order to store the value. In this case the CCInfo.getNextStackOffset()
9468   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
9469   // are passed CCValAssign::Indirect.
9470   for (auto &VA : ArgLocs)
9471     if (VA.getLocInfo() == CCValAssign::Indirect)
9472       return false;
9473 
9474   // Do not tail call opt if either caller or callee uses struct return
9475   // semantics.
9476   auto IsCallerStructRet = Caller.hasStructRetAttr();
9477   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
9478   if (IsCallerStructRet || IsCalleeStructRet)
9479     return false;
9480 
9481   // Externally-defined functions with weak linkage should not be
9482   // tail-called. The behaviour of branch instructions in this situation (as
9483   // used for tail calls) is implementation-defined, so we cannot rely on the
9484   // linker replacing the tail call with a return.
9485   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
9486     const GlobalValue *GV = G->getGlobal();
9487     if (GV->hasExternalWeakLinkage())
9488       return false;
9489   }
9490 
9491   // The callee has to preserve all registers the caller needs to preserve.
9492   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9493   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
9494   if (CalleeCC != CallerCC) {
9495     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
9496     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9497       return false;
9498   }
9499 
9500   // Byval parameters hand the function a pointer directly into the stack area
9501   // we want to reuse during a tail call. Working around this *is* possible
9502   // but less efficient and uglier in LowerCall.
9503   for (auto &Arg : Outs)
9504     if (Arg.Flags.isByVal())
9505       return false;
9506 
9507   return true;
9508 }
9509 
9510 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
9511   return DAG.getDataLayout().getPrefTypeAlign(
9512       VT.getTypeForEVT(*DAG.getContext()));
9513 }
9514 
9515 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
9516 // and output parameter nodes.
9517 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
9518                                        SmallVectorImpl<SDValue> &InVals) const {
9519   SelectionDAG &DAG = CLI.DAG;
9520   SDLoc &DL = CLI.DL;
9521   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
9522   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
9523   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
9524   SDValue Chain = CLI.Chain;
9525   SDValue Callee = CLI.Callee;
9526   bool &IsTailCall = CLI.IsTailCall;
9527   CallingConv::ID CallConv = CLI.CallConv;
9528   bool IsVarArg = CLI.IsVarArg;
9529   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9530   MVT XLenVT = Subtarget.getXLenVT();
9531 
9532   MachineFunction &MF = DAG.getMachineFunction();
9533 
9534   // Analyze the operands of the call, assigning locations to each operand.
9535   SmallVector<CCValAssign, 16> ArgLocs;
9536   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9537 
9538   if (CallConv == CallingConv::GHC)
9539     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
9540   else
9541     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
9542                       CallConv == CallingConv::Fast ? CC_RISCV_FastCC
9543                                                     : CC_RISCV);
9544 
9545   // Check if it's really possible to do a tail call.
9546   if (IsTailCall)
9547     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
9548 
9549   if (IsTailCall)
9550     ++NumTailCalls;
9551   else if (CLI.CB && CLI.CB->isMustTailCall())
9552     report_fatal_error("failed to perform tail call elimination on a call "
9553                        "site marked musttail");
9554 
9555   // Get a count of how many bytes are to be pushed on the stack.
9556   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
9557 
9558   // Create local copies for byval args
9559   SmallVector<SDValue, 8> ByValArgs;
9560   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9561     ISD::ArgFlagsTy Flags = Outs[i].Flags;
9562     if (!Flags.isByVal())
9563       continue;
9564 
9565     SDValue Arg = OutVals[i];
9566     unsigned Size = Flags.getByValSize();
9567     Align Alignment = Flags.getNonZeroByValAlign();
9568 
9569     int FI =
9570         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
9571     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9572     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
9573 
9574     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
9575                           /*IsVolatile=*/false,
9576                           /*AlwaysInline=*/false, IsTailCall,
9577                           MachinePointerInfo(), MachinePointerInfo());
9578     ByValArgs.push_back(FIPtr);
9579   }
9580 
9581   if (!IsTailCall)
9582     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
9583 
9584   // Copy argument values to their designated locations.
9585   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
9586   SmallVector<SDValue, 8> MemOpChains;
9587   SDValue StackPtr;
9588   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
9589     CCValAssign &VA = ArgLocs[i];
9590     SDValue ArgValue = OutVals[i];
9591     ISD::ArgFlagsTy Flags = Outs[i].Flags;
9592 
9593     // Handle passing f64 on RV32D with a soft float ABI as a special case.
9594     bool IsF64OnRV32DSoftABI =
9595         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
9596     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
9597       SDValue SplitF64 = DAG.getNode(
9598           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
9599       SDValue Lo = SplitF64.getValue(0);
9600       SDValue Hi = SplitF64.getValue(1);
9601 
9602       Register RegLo = VA.getLocReg();
9603       RegsToPass.push_back(std::make_pair(RegLo, Lo));
9604 
9605       if (RegLo == RISCV::X17) {
9606         // Second half of f64 is passed on the stack.
9607         // Work out the address of the stack slot.
9608         if (!StackPtr.getNode())
9609           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
9610         // Emit the store.
9611         MemOpChains.push_back(
9612             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
9613       } else {
9614         // Second half of f64 is passed in another GPR.
9615         assert(RegLo < RISCV::X31 && "Invalid register pair");
9616         Register RegHigh = RegLo + 1;
9617         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
9618       }
9619       continue;
9620     }
9621 
9622     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
9623     // as any other MemLoc.
9624 
9625     // Promote the value if needed.
9626     // For now, only handle fully promoted and indirect arguments.
9627     if (VA.getLocInfo() == CCValAssign::Indirect) {
9628       // Store the argument in a stack slot and pass its address.
9629       Align StackAlign =
9630           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
9631                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
9632       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
9633       // If the original argument was split (e.g. i128), we need
9634       // to store the required parts of it here (and pass just one address).
9635       // Vectors may be partly split to registers and partly to the stack, in
9636       // which case the base address is partly offset and subsequent stores are
9637       // relative to that.
9638       unsigned ArgIndex = Outs[i].OrigArgIndex;
9639       unsigned ArgPartOffset = Outs[i].PartOffset;
9640       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
9641       // Calculate the total size to store. We don't have access to what we're
9642       // actually storing other than performing the loop and collecting the
9643       // info.
9644       SmallVector<std::pair<SDValue, SDValue>> Parts;
9645       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
9646         SDValue PartValue = OutVals[i + 1];
9647         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
9648         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9649         EVT PartVT = PartValue.getValueType();
9650         if (PartVT.isScalableVector())
9651           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
9652         StoredSize += PartVT.getStoreSize();
9653         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
9654         Parts.push_back(std::make_pair(PartValue, Offset));
9655         ++i;
9656       }
9657       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
9658       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
9659       MemOpChains.push_back(
9660           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
9661                        MachinePointerInfo::getFixedStack(MF, FI)));
9662       for (const auto &Part : Parts) {
9663         SDValue PartValue = Part.first;
9664         SDValue PartOffset = Part.second;
9665         SDValue Address =
9666             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
9667         MemOpChains.push_back(
9668             DAG.getStore(Chain, DL, PartValue, Address,
9669                          MachinePointerInfo::getFixedStack(MF, FI)));
9670       }
9671       ArgValue = SpillSlot;
9672     } else {
9673       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
9674     }
9675 
9676     // Use local copy if it is a byval arg.
9677     if (Flags.isByVal())
9678       ArgValue = ByValArgs[j++];
9679 
9680     if (VA.isRegLoc()) {
9681       // Queue up the argument copies and emit them at the end.
9682       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
9683     } else {
9684       assert(VA.isMemLoc() && "Argument not register or memory");
9685       assert(!IsTailCall && "Tail call not allowed if stack is used "
9686                             "for passing parameters");
9687 
9688       // Work out the address of the stack slot.
9689       if (!StackPtr.getNode())
9690         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
9691       SDValue Address =
9692           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
9693                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
9694 
9695       // Emit the store.
9696       MemOpChains.push_back(
9697           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
9698     }
9699   }
9700 
9701   // Join the stores, which are independent of one another.
9702   if (!MemOpChains.empty())
9703     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
9704 
9705   SDValue Glue;
9706 
9707   // Build a sequence of copy-to-reg nodes, chained and glued together.
9708   for (auto &Reg : RegsToPass) {
9709     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
9710     Glue = Chain.getValue(1);
9711   }
9712 
9713   // Validate that none of the argument registers have been marked as
9714   // reserved, if so report an error. Do the same for the return address if this
9715   // is not a tailcall.
9716   validateCCReservedRegs(RegsToPass, MF);
9717   if (!IsTailCall &&
9718       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
9719     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
9720         MF.getFunction(),
9721         "Return address register required, but has been reserved."});
9722 
9723   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
9724   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
9725   // split it and then direct call can be matched by PseudoCALL.
9726   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
9727     const GlobalValue *GV = S->getGlobal();
9728 
9729     unsigned OpFlags = RISCVII::MO_CALL;
9730     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
9731       OpFlags = RISCVII::MO_PLT;
9732 
9733     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
9734   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
9735     unsigned OpFlags = RISCVII::MO_CALL;
9736 
9737     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
9738                                                  nullptr))
9739       OpFlags = RISCVII::MO_PLT;
9740 
9741     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
9742   }
9743 
9744   // The first call operand is the chain and the second is the target address.
9745   SmallVector<SDValue, 8> Ops;
9746   Ops.push_back(Chain);
9747   Ops.push_back(Callee);
9748 
9749   // Add argument registers to the end of the list so that they are
9750   // known live into the call.
9751   for (auto &Reg : RegsToPass)
9752     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
9753 
9754   if (!IsTailCall) {
9755     // Add a register mask operand representing the call-preserved registers.
9756     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
9757     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
9758     assert(Mask && "Missing call preserved mask for calling convention");
9759     Ops.push_back(DAG.getRegisterMask(Mask));
9760   }
9761 
9762   // Glue the call to the argument copies, if any.
9763   if (Glue.getNode())
9764     Ops.push_back(Glue);
9765 
9766   // Emit the call.
9767   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
9768 
9769   if (IsTailCall) {
9770     MF.getFrameInfo().setHasTailCall();
9771     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
9772   }
9773 
9774   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
9775   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
9776   Glue = Chain.getValue(1);
9777 
9778   // Mark the end of the call, which is glued to the call itself.
9779   Chain = DAG.getCALLSEQ_END(Chain,
9780                              DAG.getConstant(NumBytes, DL, PtrVT, true),
9781                              DAG.getConstant(0, DL, PtrVT, true),
9782                              Glue, DL);
9783   Glue = Chain.getValue(1);
9784 
9785   // Assign locations to each value returned by this call.
9786   SmallVector<CCValAssign, 16> RVLocs;
9787   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
9788   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
9789 
9790   // Copy all of the result registers out of their specified physreg.
9791   for (auto &VA : RVLocs) {
9792     // Copy the value out
9793     SDValue RetValue =
9794         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
9795     // Glue the RetValue to the end of the call sequence
9796     Chain = RetValue.getValue(1);
9797     Glue = RetValue.getValue(2);
9798 
9799     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9800       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
9801       SDValue RetValue2 =
9802           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
9803       Chain = RetValue2.getValue(1);
9804       Glue = RetValue2.getValue(2);
9805       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
9806                              RetValue2);
9807     }
9808 
9809     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
9810 
9811     InVals.push_back(RetValue);
9812   }
9813 
9814   return Chain;
9815 }
9816 
9817 bool RISCVTargetLowering::CanLowerReturn(
9818     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
9819     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
9820   SmallVector<CCValAssign, 16> RVLocs;
9821   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
9822 
9823   Optional<unsigned> FirstMaskArgument;
9824   if (Subtarget.hasVInstructions())
9825     FirstMaskArgument = preAssignMask(Outs);
9826 
9827   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9828     MVT VT = Outs[i].VT;
9829     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
9830     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
9831     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
9832                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
9833                  *this, FirstMaskArgument))
9834       return false;
9835   }
9836   return true;
9837 }
9838 
9839 SDValue
9840 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
9841                                  bool IsVarArg,
9842                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
9843                                  const SmallVectorImpl<SDValue> &OutVals,
9844                                  const SDLoc &DL, SelectionDAG &DAG) const {
9845   const MachineFunction &MF = DAG.getMachineFunction();
9846   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
9847 
9848   // Stores the assignment of the return value to a location.
9849   SmallVector<CCValAssign, 16> RVLocs;
9850 
9851   // Info about the registers and stack slot.
9852   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
9853                  *DAG.getContext());
9854 
9855   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
9856                     nullptr, CC_RISCV);
9857 
9858   if (CallConv == CallingConv::GHC && !RVLocs.empty())
9859     report_fatal_error("GHC functions return void only");
9860 
9861   SDValue Glue;
9862   SmallVector<SDValue, 4> RetOps(1, Chain);
9863 
9864   // Copy the result values into the output registers.
9865   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
9866     SDValue Val = OutVals[i];
9867     CCValAssign &VA = RVLocs[i];
9868     assert(VA.isRegLoc() && "Can only return in registers!");
9869 
9870     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9871       // Handle returning f64 on RV32D with a soft float ABI.
9872       assert(VA.isRegLoc() && "Expected return via registers");
9873       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
9874                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
9875       SDValue Lo = SplitF64.getValue(0);
9876       SDValue Hi = SplitF64.getValue(1);
9877       Register RegLo = VA.getLocReg();
9878       assert(RegLo < RISCV::X31 && "Invalid register pair");
9879       Register RegHi = RegLo + 1;
9880 
9881       if (STI.isRegisterReservedByUser(RegLo) ||
9882           STI.isRegisterReservedByUser(RegHi))
9883         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
9884             MF.getFunction(),
9885             "Return value register required, but has been reserved."});
9886 
9887       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
9888       Glue = Chain.getValue(1);
9889       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
9890       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
9891       Glue = Chain.getValue(1);
9892       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
9893     } else {
9894       // Handle a 'normal' return.
9895       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
9896       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
9897 
9898       if (STI.isRegisterReservedByUser(VA.getLocReg()))
9899         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
9900             MF.getFunction(),
9901             "Return value register required, but has been reserved."});
9902 
9903       // Guarantee that all emitted copies are stuck together.
9904       Glue = Chain.getValue(1);
9905       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
9906     }
9907   }
9908 
9909   RetOps[0] = Chain; // Update chain.
9910 
9911   // Add the glue node if we have it.
9912   if (Glue.getNode()) {
9913     RetOps.push_back(Glue);
9914   }
9915 
9916   unsigned RetOpc = RISCVISD::RET_FLAG;
9917   // Interrupt service routines use different return instructions.
9918   const Function &Func = DAG.getMachineFunction().getFunction();
9919   if (Func.hasFnAttribute("interrupt")) {
9920     if (!Func.getReturnType()->isVoidTy())
9921       report_fatal_error(
9922           "Functions with the interrupt attribute must have void return type!");
9923 
9924     MachineFunction &MF = DAG.getMachineFunction();
9925     StringRef Kind =
9926       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
9927 
9928     if (Kind == "user")
9929       RetOpc = RISCVISD::URET_FLAG;
9930     else if (Kind == "supervisor")
9931       RetOpc = RISCVISD::SRET_FLAG;
9932     else
9933       RetOpc = RISCVISD::MRET_FLAG;
9934   }
9935 
9936   return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
9937 }
9938 
9939 void RISCVTargetLowering::validateCCReservedRegs(
9940     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
9941     MachineFunction &MF) const {
9942   const Function &F = MF.getFunction();
9943   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
9944 
9945   if (llvm::any_of(Regs, [&STI](auto Reg) {
9946         return STI.isRegisterReservedByUser(Reg.first);
9947       }))
9948     F.getContext().diagnose(DiagnosticInfoUnsupported{
9949         F, "Argument register required, but has been reserved."});
9950 }
9951 
9952 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
9953   return CI->isTailCall();
9954 }
9955 
9956 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
9957 #define NODE_NAME_CASE(NODE)                                                   \
9958   case RISCVISD::NODE:                                                         \
9959     return "RISCVISD::" #NODE;
9960   // clang-format off
9961   switch ((RISCVISD::NodeType)Opcode) {
9962   case RISCVISD::FIRST_NUMBER:
9963     break;
9964   NODE_NAME_CASE(RET_FLAG)
9965   NODE_NAME_CASE(URET_FLAG)
9966   NODE_NAME_CASE(SRET_FLAG)
9967   NODE_NAME_CASE(MRET_FLAG)
9968   NODE_NAME_CASE(CALL)
9969   NODE_NAME_CASE(SELECT_CC)
9970   NODE_NAME_CASE(BR_CC)
9971   NODE_NAME_CASE(BuildPairF64)
9972   NODE_NAME_CASE(SplitF64)
9973   NODE_NAME_CASE(TAIL)
9974   NODE_NAME_CASE(MULHSU)
9975   NODE_NAME_CASE(SLLW)
9976   NODE_NAME_CASE(SRAW)
9977   NODE_NAME_CASE(SRLW)
9978   NODE_NAME_CASE(DIVW)
9979   NODE_NAME_CASE(DIVUW)
9980   NODE_NAME_CASE(REMUW)
9981   NODE_NAME_CASE(ROLW)
9982   NODE_NAME_CASE(RORW)
9983   NODE_NAME_CASE(CLZW)
9984   NODE_NAME_CASE(CTZW)
9985   NODE_NAME_CASE(FSLW)
9986   NODE_NAME_CASE(FSRW)
9987   NODE_NAME_CASE(FSL)
9988   NODE_NAME_CASE(FSR)
9989   NODE_NAME_CASE(FMV_H_X)
9990   NODE_NAME_CASE(FMV_X_ANYEXTH)
9991   NODE_NAME_CASE(FMV_W_X_RV64)
9992   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
9993   NODE_NAME_CASE(FCVT_X)
9994   NODE_NAME_CASE(FCVT_XU)
9995   NODE_NAME_CASE(FCVT_W_RV64)
9996   NODE_NAME_CASE(FCVT_WU_RV64)
9997   NODE_NAME_CASE(STRICT_FCVT_W_RV64)
9998   NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
9999   NODE_NAME_CASE(READ_CYCLE_WIDE)
10000   NODE_NAME_CASE(GREV)
10001   NODE_NAME_CASE(GREVW)
10002   NODE_NAME_CASE(GORC)
10003   NODE_NAME_CASE(GORCW)
10004   NODE_NAME_CASE(SHFL)
10005   NODE_NAME_CASE(SHFLW)
10006   NODE_NAME_CASE(UNSHFL)
10007   NODE_NAME_CASE(UNSHFLW)
10008   NODE_NAME_CASE(BFP)
10009   NODE_NAME_CASE(BFPW)
10010   NODE_NAME_CASE(BCOMPRESS)
10011   NODE_NAME_CASE(BCOMPRESSW)
10012   NODE_NAME_CASE(BDECOMPRESS)
10013   NODE_NAME_CASE(BDECOMPRESSW)
10014   NODE_NAME_CASE(VMV_V_X_VL)
10015   NODE_NAME_CASE(VFMV_V_F_VL)
10016   NODE_NAME_CASE(VMV_X_S)
10017   NODE_NAME_CASE(VMV_S_X_VL)
10018   NODE_NAME_CASE(VFMV_S_F_VL)
10019   NODE_NAME_CASE(SPLAT_VECTOR_I64)
10020   NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
10021   NODE_NAME_CASE(READ_VLENB)
10022   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
10023   NODE_NAME_CASE(VSLIDEUP_VL)
10024   NODE_NAME_CASE(VSLIDE1UP_VL)
10025   NODE_NAME_CASE(VSLIDEDOWN_VL)
10026   NODE_NAME_CASE(VSLIDE1DOWN_VL)
10027   NODE_NAME_CASE(VID_VL)
10028   NODE_NAME_CASE(VFNCVT_ROD_VL)
10029   NODE_NAME_CASE(VECREDUCE_ADD_VL)
10030   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
10031   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
10032   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
10033   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
10034   NODE_NAME_CASE(VECREDUCE_AND_VL)
10035   NODE_NAME_CASE(VECREDUCE_OR_VL)
10036   NODE_NAME_CASE(VECREDUCE_XOR_VL)
10037   NODE_NAME_CASE(VECREDUCE_FADD_VL)
10038   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
10039   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
10040   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
10041   NODE_NAME_CASE(ADD_VL)
10042   NODE_NAME_CASE(AND_VL)
10043   NODE_NAME_CASE(MUL_VL)
10044   NODE_NAME_CASE(OR_VL)
10045   NODE_NAME_CASE(SDIV_VL)
10046   NODE_NAME_CASE(SHL_VL)
10047   NODE_NAME_CASE(SREM_VL)
10048   NODE_NAME_CASE(SRA_VL)
10049   NODE_NAME_CASE(SRL_VL)
10050   NODE_NAME_CASE(SUB_VL)
10051   NODE_NAME_CASE(UDIV_VL)
10052   NODE_NAME_CASE(UREM_VL)
10053   NODE_NAME_CASE(XOR_VL)
10054   NODE_NAME_CASE(SADDSAT_VL)
10055   NODE_NAME_CASE(UADDSAT_VL)
10056   NODE_NAME_CASE(SSUBSAT_VL)
10057   NODE_NAME_CASE(USUBSAT_VL)
10058   NODE_NAME_CASE(FADD_VL)
10059   NODE_NAME_CASE(FSUB_VL)
10060   NODE_NAME_CASE(FMUL_VL)
10061   NODE_NAME_CASE(FDIV_VL)
10062   NODE_NAME_CASE(FNEG_VL)
10063   NODE_NAME_CASE(FABS_VL)
10064   NODE_NAME_CASE(FSQRT_VL)
10065   NODE_NAME_CASE(FMA_VL)
10066   NODE_NAME_CASE(FCOPYSIGN_VL)
10067   NODE_NAME_CASE(SMIN_VL)
10068   NODE_NAME_CASE(SMAX_VL)
10069   NODE_NAME_CASE(UMIN_VL)
10070   NODE_NAME_CASE(UMAX_VL)
10071   NODE_NAME_CASE(FMINNUM_VL)
10072   NODE_NAME_CASE(FMAXNUM_VL)
10073   NODE_NAME_CASE(MULHS_VL)
10074   NODE_NAME_CASE(MULHU_VL)
10075   NODE_NAME_CASE(FP_TO_SINT_VL)
10076   NODE_NAME_CASE(FP_TO_UINT_VL)
10077   NODE_NAME_CASE(SINT_TO_FP_VL)
10078   NODE_NAME_CASE(UINT_TO_FP_VL)
10079   NODE_NAME_CASE(FP_EXTEND_VL)
10080   NODE_NAME_CASE(FP_ROUND_VL)
10081   NODE_NAME_CASE(VWMUL_VL)
10082   NODE_NAME_CASE(VWMULU_VL)
10083   NODE_NAME_CASE(VWADDU_VL)
10084   NODE_NAME_CASE(SETCC_VL)
10085   NODE_NAME_CASE(VSELECT_VL)
10086   NODE_NAME_CASE(VMAND_VL)
10087   NODE_NAME_CASE(VMOR_VL)
10088   NODE_NAME_CASE(VMXOR_VL)
10089   NODE_NAME_CASE(VMCLR_VL)
10090   NODE_NAME_CASE(VMSET_VL)
10091   NODE_NAME_CASE(VRGATHER_VX_VL)
10092   NODE_NAME_CASE(VRGATHER_VV_VL)
10093   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
10094   NODE_NAME_CASE(VSEXT_VL)
10095   NODE_NAME_CASE(VZEXT_VL)
10096   NODE_NAME_CASE(VCPOP_VL)
10097   NODE_NAME_CASE(VLE_VL)
10098   NODE_NAME_CASE(VSE_VL)
10099   NODE_NAME_CASE(READ_CSR)
10100   NODE_NAME_CASE(WRITE_CSR)
10101   NODE_NAME_CASE(SWAP_CSR)
10102   }
10103   // clang-format on
10104   return nullptr;
10105 #undef NODE_NAME_CASE
10106 }
10107 
10108 /// getConstraintType - Given a constraint letter, return the type of
10109 /// constraint it is for this target.
10110 RISCVTargetLowering::ConstraintType
10111 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
10112   if (Constraint.size() == 1) {
10113     switch (Constraint[0]) {
10114     default:
10115       break;
10116     case 'f':
10117       return C_RegisterClass;
10118     case 'I':
10119     case 'J':
10120     case 'K':
10121       return C_Immediate;
10122     case 'A':
10123       return C_Memory;
10124     case 'S': // A symbolic address
10125       return C_Other;
10126     }
10127   } else {
10128     if (Constraint == "vr" || Constraint == "vm")
10129       return C_RegisterClass;
10130   }
10131   return TargetLowering::getConstraintType(Constraint);
10132 }
10133 
10134 std::pair<unsigned, const TargetRegisterClass *>
10135 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
10136                                                   StringRef Constraint,
10137                                                   MVT VT) const {
10138   // First, see if this is a constraint that directly corresponds to a
10139   // RISCV register class.
10140   if (Constraint.size() == 1) {
10141     switch (Constraint[0]) {
10142     case 'r':
10143       // TODO: Support fixed vectors up to XLen for P extension?
10144       if (VT.isVector())
10145         break;
10146       return std::make_pair(0U, &RISCV::GPRRegClass);
10147     case 'f':
10148       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
10149         return std::make_pair(0U, &RISCV::FPR16RegClass);
10150       if (Subtarget.hasStdExtF() && VT == MVT::f32)
10151         return std::make_pair(0U, &RISCV::FPR32RegClass);
10152       if (Subtarget.hasStdExtD() && VT == MVT::f64)
10153         return std::make_pair(0U, &RISCV::FPR64RegClass);
10154       break;
10155     default:
10156       break;
10157     }
10158   } else if (Constraint == "vr") {
10159     for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
10160                            &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
10161       if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
10162         return std::make_pair(0U, RC);
10163     }
10164   } else if (Constraint == "vm") {
10165     if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
10166       return std::make_pair(0U, &RISCV::VMV0RegClass);
10167   }
10168 
10169   // Clang will correctly decode the usage of register name aliases into their
10170   // official names. However, other frontends like `rustc` do not. This allows
10171   // users of these frontends to use the ABI names for registers in LLVM-style
10172   // register constraints.
10173   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
10174                                .Case("{zero}", RISCV::X0)
10175                                .Case("{ra}", RISCV::X1)
10176                                .Case("{sp}", RISCV::X2)
10177                                .Case("{gp}", RISCV::X3)
10178                                .Case("{tp}", RISCV::X4)
10179                                .Case("{t0}", RISCV::X5)
10180                                .Case("{t1}", RISCV::X6)
10181                                .Case("{t2}", RISCV::X7)
10182                                .Cases("{s0}", "{fp}", RISCV::X8)
10183                                .Case("{s1}", RISCV::X9)
10184                                .Case("{a0}", RISCV::X10)
10185                                .Case("{a1}", RISCV::X11)
10186                                .Case("{a2}", RISCV::X12)
10187                                .Case("{a3}", RISCV::X13)
10188                                .Case("{a4}", RISCV::X14)
10189                                .Case("{a5}", RISCV::X15)
10190                                .Case("{a6}", RISCV::X16)
10191                                .Case("{a7}", RISCV::X17)
10192                                .Case("{s2}", RISCV::X18)
10193                                .Case("{s3}", RISCV::X19)
10194                                .Case("{s4}", RISCV::X20)
10195                                .Case("{s5}", RISCV::X21)
10196                                .Case("{s6}", RISCV::X22)
10197                                .Case("{s7}", RISCV::X23)
10198                                .Case("{s8}", RISCV::X24)
10199                                .Case("{s9}", RISCV::X25)
10200                                .Case("{s10}", RISCV::X26)
10201                                .Case("{s11}", RISCV::X27)
10202                                .Case("{t3}", RISCV::X28)
10203                                .Case("{t4}", RISCV::X29)
10204                                .Case("{t5}", RISCV::X30)
10205                                .Case("{t6}", RISCV::X31)
10206                                .Default(RISCV::NoRegister);
10207   if (XRegFromAlias != RISCV::NoRegister)
10208     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
10209 
10210   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
10211   // TableGen record rather than the AsmName to choose registers for InlineAsm
10212   // constraints, plus we want to match those names to the widest floating point
10213   // register type available, manually select floating point registers here.
10214   //
10215   // The second case is the ABI name of the register, so that frontends can also
10216   // use the ABI names in register constraint lists.
10217   if (Subtarget.hasStdExtF()) {
10218     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
10219                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
10220                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
10221                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
10222                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
10223                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
10224                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
10225                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
10226                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
10227                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
10228                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
10229                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
10230                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
10231                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
10232                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
10233                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
10234                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
10235                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
10236                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
10237                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
10238                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
10239                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
10240                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
10241                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
10242                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
10243                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
10244                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
10245                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
10246                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
10247                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
10248                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
10249                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
10250                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
10251                         .Default(RISCV::NoRegister);
10252     if (FReg != RISCV::NoRegister) {
10253       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
10254       if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
10255         unsigned RegNo = FReg - RISCV::F0_F;
10256         unsigned DReg = RISCV::F0_D + RegNo;
10257         return std::make_pair(DReg, &RISCV::FPR64RegClass);
10258       }
10259       if (VT == MVT::f32 || VT == MVT::Other)
10260         return std::make_pair(FReg, &RISCV::FPR32RegClass);
10261       if (Subtarget.hasStdExtZfh() && VT == MVT::f16) {
10262         unsigned RegNo = FReg - RISCV::F0_F;
10263         unsigned HReg = RISCV::F0_H + RegNo;
10264         return std::make_pair(HReg, &RISCV::FPR16RegClass);
10265       }
10266     }
10267   }
10268 
10269   if (Subtarget.hasVInstructions()) {
10270     Register VReg = StringSwitch<Register>(Constraint.lower())
10271                         .Case("{v0}", RISCV::V0)
10272                         .Case("{v1}", RISCV::V1)
10273                         .Case("{v2}", RISCV::V2)
10274                         .Case("{v3}", RISCV::V3)
10275                         .Case("{v4}", RISCV::V4)
10276                         .Case("{v5}", RISCV::V5)
10277                         .Case("{v6}", RISCV::V6)
10278                         .Case("{v7}", RISCV::V7)
10279                         .Case("{v8}", RISCV::V8)
10280                         .Case("{v9}", RISCV::V9)
10281                         .Case("{v10}", RISCV::V10)
10282                         .Case("{v11}", RISCV::V11)
10283                         .Case("{v12}", RISCV::V12)
10284                         .Case("{v13}", RISCV::V13)
10285                         .Case("{v14}", RISCV::V14)
10286                         .Case("{v15}", RISCV::V15)
10287                         .Case("{v16}", RISCV::V16)
10288                         .Case("{v17}", RISCV::V17)
10289                         .Case("{v18}", RISCV::V18)
10290                         .Case("{v19}", RISCV::V19)
10291                         .Case("{v20}", RISCV::V20)
10292                         .Case("{v21}", RISCV::V21)
10293                         .Case("{v22}", RISCV::V22)
10294                         .Case("{v23}", RISCV::V23)
10295                         .Case("{v24}", RISCV::V24)
10296                         .Case("{v25}", RISCV::V25)
10297                         .Case("{v26}", RISCV::V26)
10298                         .Case("{v27}", RISCV::V27)
10299                         .Case("{v28}", RISCV::V28)
10300                         .Case("{v29}", RISCV::V29)
10301                         .Case("{v30}", RISCV::V30)
10302                         .Case("{v31}", RISCV::V31)
10303                         .Default(RISCV::NoRegister);
10304     if (VReg != RISCV::NoRegister) {
10305       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
10306         return std::make_pair(VReg, &RISCV::VMRegClass);
10307       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
10308         return std::make_pair(VReg, &RISCV::VRRegClass);
10309       for (const auto *RC :
10310            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
10311         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
10312           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
10313           return std::make_pair(VReg, RC);
10314         }
10315       }
10316     }
10317   }
10318 
10319   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10320 }
10321 
10322 unsigned
10323 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
10324   // Currently only support length 1 constraints.
10325   if (ConstraintCode.size() == 1) {
10326     switch (ConstraintCode[0]) {
10327     case 'A':
10328       return InlineAsm::Constraint_A;
10329     default:
10330       break;
10331     }
10332   }
10333 
10334   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
10335 }
10336 
10337 void RISCVTargetLowering::LowerAsmOperandForConstraint(
10338     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
10339     SelectionDAG &DAG) const {
10340   // Currently only support length 1 constraints.
10341   if (Constraint.length() == 1) {
10342     switch (Constraint[0]) {
10343     case 'I':
10344       // Validate & create a 12-bit signed immediate operand.
10345       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10346         uint64_t CVal = C->getSExtValue();
10347         if (isInt<12>(CVal))
10348           Ops.push_back(
10349               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
10350       }
10351       return;
10352     case 'J':
10353       // Validate & create an integer zero operand.
10354       if (auto *C = dyn_cast<ConstantSDNode>(Op))
10355         if (C->getZExtValue() == 0)
10356           Ops.push_back(
10357               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
10358       return;
10359     case 'K':
10360       // Validate & create a 5-bit unsigned immediate operand.
10361       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10362         uint64_t CVal = C->getZExtValue();
10363         if (isUInt<5>(CVal))
10364           Ops.push_back(
10365               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
10366       }
10367       return;
10368     case 'S':
10369       if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
10370         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
10371                                                  GA->getValueType(0)));
10372       } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
10373         Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
10374                                                 BA->getValueType(0)));
10375       }
10376       return;
10377     default:
10378       break;
10379     }
10380   }
10381   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
10382 }
10383 
10384 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
10385                                                    Instruction *Inst,
10386                                                    AtomicOrdering Ord) const {
10387   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
10388     return Builder.CreateFence(Ord);
10389   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
10390     return Builder.CreateFence(AtomicOrdering::Release);
10391   return nullptr;
10392 }
10393 
10394 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
10395                                                     Instruction *Inst,
10396                                                     AtomicOrdering Ord) const {
10397   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
10398     return Builder.CreateFence(AtomicOrdering::Acquire);
10399   return nullptr;
10400 }
10401 
10402 TargetLowering::AtomicExpansionKind
10403 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
10404   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
10405   // point operations can't be used in an lr/sc sequence without breaking the
10406   // forward-progress guarantee.
10407   if (AI->isFloatingPointOperation())
10408     return AtomicExpansionKind::CmpXChg;
10409 
10410   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10411   if (Size == 8 || Size == 16)
10412     return AtomicExpansionKind::MaskedIntrinsic;
10413   return AtomicExpansionKind::None;
10414 }
10415 
10416 static Intrinsic::ID
10417 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
10418   if (XLen == 32) {
10419     switch (BinOp) {
10420     default:
10421       llvm_unreachable("Unexpected AtomicRMW BinOp");
10422     case AtomicRMWInst::Xchg:
10423       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
10424     case AtomicRMWInst::Add:
10425       return Intrinsic::riscv_masked_atomicrmw_add_i32;
10426     case AtomicRMWInst::Sub:
10427       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
10428     case AtomicRMWInst::Nand:
10429       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
10430     case AtomicRMWInst::Max:
10431       return Intrinsic::riscv_masked_atomicrmw_max_i32;
10432     case AtomicRMWInst::Min:
10433       return Intrinsic::riscv_masked_atomicrmw_min_i32;
10434     case AtomicRMWInst::UMax:
10435       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
10436     case AtomicRMWInst::UMin:
10437       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
10438     }
10439   }
10440 
10441   if (XLen == 64) {
10442     switch (BinOp) {
10443     default:
10444       llvm_unreachable("Unexpected AtomicRMW BinOp");
10445     case AtomicRMWInst::Xchg:
10446       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
10447     case AtomicRMWInst::Add:
10448       return Intrinsic::riscv_masked_atomicrmw_add_i64;
10449     case AtomicRMWInst::Sub:
10450       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
10451     case AtomicRMWInst::Nand:
10452       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
10453     case AtomicRMWInst::Max:
10454       return Intrinsic::riscv_masked_atomicrmw_max_i64;
10455     case AtomicRMWInst::Min:
10456       return Intrinsic::riscv_masked_atomicrmw_min_i64;
10457     case AtomicRMWInst::UMax:
10458       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
10459     case AtomicRMWInst::UMin:
10460       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
10461     }
10462   }
10463 
10464   llvm_unreachable("Unexpected XLen\n");
10465 }
10466 
10467 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
10468     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10469     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10470   unsigned XLen = Subtarget.getXLen();
10471   Value *Ordering =
10472       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
10473   Type *Tys[] = {AlignedAddr->getType()};
10474   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
10475       AI->getModule(),
10476       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
10477 
10478   if (XLen == 64) {
10479     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
10480     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10481     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
10482   }
10483 
10484   Value *Result;
10485 
10486   // Must pass the shift amount needed to sign extend the loaded value prior
10487   // to performing a signed comparison for min/max. ShiftAmt is the number of
10488   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
10489   // is the number of bits to left+right shift the value in order to
10490   // sign-extend.
10491   if (AI->getOperation() == AtomicRMWInst::Min ||
10492       AI->getOperation() == AtomicRMWInst::Max) {
10493     const DataLayout &DL = AI->getModule()->getDataLayout();
10494     unsigned ValWidth =
10495         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
10496     Value *SextShamt =
10497         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
10498     Result = Builder.CreateCall(LrwOpScwLoop,
10499                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
10500   } else {
10501     Result =
10502         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
10503   }
10504 
10505   if (XLen == 64)
10506     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10507   return Result;
10508 }
10509 
10510 TargetLowering::AtomicExpansionKind
10511 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
10512     AtomicCmpXchgInst *CI) const {
10513   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
10514   if (Size == 8 || Size == 16)
10515     return AtomicExpansionKind::MaskedIntrinsic;
10516   return AtomicExpansionKind::None;
10517 }
10518 
10519 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
10520     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
10521     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
10522   unsigned XLen = Subtarget.getXLen();
10523   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
10524   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
10525   if (XLen == 64) {
10526     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
10527     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
10528     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10529     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
10530   }
10531   Type *Tys[] = {AlignedAddr->getType()};
10532   Function *MaskedCmpXchg =
10533       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
10534   Value *Result = Builder.CreateCall(
10535       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
10536   if (XLen == 64)
10537     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10538   return Result;
10539 }
10540 
10541 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
10542   return false;
10543 }
10544 
10545 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
10546                                                EVT VT) const {
10547   if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
10548     return false;
10549 
10550   switch (FPVT.getSimpleVT().SimpleTy) {
10551   case MVT::f16:
10552     return Subtarget.hasStdExtZfh();
10553   case MVT::f32:
10554     return Subtarget.hasStdExtF();
10555   case MVT::f64:
10556     return Subtarget.hasStdExtD();
10557   default:
10558     return false;
10559   }
10560 }
10561 
10562 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
10563   // If we are using the small code model, we can reduce size of jump table
10564   // entry to 4 bytes.
10565   if (Subtarget.is64Bit() && !isPositionIndependent() &&
10566       getTargetMachine().getCodeModel() == CodeModel::Small) {
10567     return MachineJumpTableInfo::EK_Custom32;
10568   }
10569   return TargetLowering::getJumpTableEncoding();
10570 }
10571 
10572 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
10573     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
10574     unsigned uid, MCContext &Ctx) const {
10575   assert(Subtarget.is64Bit() && !isPositionIndependent() &&
10576          getTargetMachine().getCodeModel() == CodeModel::Small);
10577   return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
10578 }
10579 
10580 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
10581                                                      EVT VT) const {
10582   VT = VT.getScalarType();
10583 
10584   if (!VT.isSimple())
10585     return false;
10586 
10587   switch (VT.getSimpleVT().SimpleTy) {
10588   case MVT::f16:
10589     return Subtarget.hasStdExtZfh();
10590   case MVT::f32:
10591     return Subtarget.hasStdExtF();
10592   case MVT::f64:
10593     return Subtarget.hasStdExtD();
10594   default:
10595     break;
10596   }
10597 
10598   return false;
10599 }
10600 
10601 Register RISCVTargetLowering::getExceptionPointerRegister(
10602     const Constant *PersonalityFn) const {
10603   return RISCV::X10;
10604 }
10605 
10606 Register RISCVTargetLowering::getExceptionSelectorRegister(
10607     const Constant *PersonalityFn) const {
10608   return RISCV::X11;
10609 }
10610 
10611 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
10612   // Return false to suppress the unnecessary extensions if the LibCall
10613   // arguments or return value is f32 type for LP64 ABI.
10614   RISCVABI::ABI ABI = Subtarget.getTargetABI();
10615   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
10616     return false;
10617 
10618   return true;
10619 }
10620 
10621 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
10622   if (Subtarget.is64Bit() && Type == MVT::i32)
10623     return true;
10624 
10625   return IsSigned;
10626 }
10627 
10628 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
10629                                                  SDValue C) const {
10630   // Check integral scalar types.
10631   if (VT.isScalarInteger()) {
10632     // Omit the optimization if the sub target has the M extension and the data
10633     // size exceeds XLen.
10634     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
10635       return false;
10636     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
10637       // Break the MUL to a SLLI and an ADD/SUB.
10638       const APInt &Imm = ConstNode->getAPIntValue();
10639       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
10640           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
10641         return true;
10642       // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
10643       if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
10644           ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
10645            (Imm - 8).isPowerOf2()))
10646         return true;
10647       // Omit the following optimization if the sub target has the M extension
10648       // and the data size >= XLen.
10649       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
10650         return false;
10651       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
10652       // a pair of LUI/ADDI.
10653       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
10654         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
10655         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
10656             (1 - ImmS).isPowerOf2())
10657         return true;
10658       }
10659     }
10660   }
10661 
10662   return false;
10663 }
10664 
10665 bool RISCVTargetLowering::isMulAddWithConstProfitable(
10666     const SDValue &AddNode, const SDValue &ConstNode) const {
10667   // Let the DAGCombiner decide for vectors.
10668   EVT VT = AddNode.getValueType();
10669   if (VT.isVector())
10670     return true;
10671 
10672   // Let the DAGCombiner decide for larger types.
10673   if (VT.getScalarSizeInBits() > Subtarget.getXLen())
10674     return true;
10675 
10676   // It is worse if c1 is simm12 while c1*c2 is not.
10677   ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
10678   ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
10679   const APInt &C1 = C1Node->getAPIntValue();
10680   const APInt &C2 = C2Node->getAPIntValue();
10681   if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
10682     return false;
10683 
10684   // Default to true and let the DAGCombiner decide.
10685   return true;
10686 }
10687 
10688 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
10689     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
10690     bool *Fast) const {
10691   if (!VT.isVector())
10692     return false;
10693 
10694   EVT ElemVT = VT.getVectorElementType();
10695   if (Alignment >= ElemVT.getStoreSize()) {
10696     if (Fast)
10697       *Fast = true;
10698     return true;
10699   }
10700 
10701   return false;
10702 }
10703 
10704 bool RISCVTargetLowering::splitValueIntoRegisterParts(
10705     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
10706     unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
10707   bool IsABIRegCopy = CC.hasValue();
10708   EVT ValueVT = Val.getValueType();
10709   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
10710     // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
10711     // and cast to f32.
10712     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
10713     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
10714     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
10715                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
10716     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
10717     Parts[0] = Val;
10718     return true;
10719   }
10720 
10721   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
10722     LLVMContext &Context = *DAG.getContext();
10723     EVT ValueEltVT = ValueVT.getVectorElementType();
10724     EVT PartEltVT = PartVT.getVectorElementType();
10725     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
10726     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
10727     if (PartVTBitSize % ValueVTBitSize == 0) {
10728       assert(PartVTBitSize >= ValueVTBitSize);
10729       // If the element types are different, bitcast to the same element type of
10730       // PartVT first.
10731       // Give an example here, we want copy a <vscale x 1 x i8> value to
10732       // <vscale x 4 x i16>.
10733       // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
10734       // subvector, then we can bitcast to <vscale x 4 x i16>.
10735       if (ValueEltVT != PartEltVT) {
10736         if (PartVTBitSize > ValueVTBitSize) {
10737           unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
10738           assert(Count != 0 && "The number of element should not be zero.");
10739           EVT SameEltTypeVT =
10740               EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
10741           Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
10742                             DAG.getUNDEF(SameEltTypeVT), Val,
10743                             DAG.getVectorIdxConstant(0, DL));
10744         }
10745         Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
10746       } else {
10747         Val =
10748             DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
10749                         Val, DAG.getVectorIdxConstant(0, DL));
10750       }
10751       Parts[0] = Val;
10752       return true;
10753     }
10754   }
10755   return false;
10756 }
10757 
10758 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
10759     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
10760     MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
10761   bool IsABIRegCopy = CC.hasValue();
10762   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
10763     SDValue Val = Parts[0];
10764 
10765     // Cast the f32 to i32, truncate to i16, and cast back to f16.
10766     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
10767     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
10768     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
10769     return Val;
10770   }
10771 
10772   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
10773     LLVMContext &Context = *DAG.getContext();
10774     SDValue Val = Parts[0];
10775     EVT ValueEltVT = ValueVT.getVectorElementType();
10776     EVT PartEltVT = PartVT.getVectorElementType();
10777     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
10778     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
10779     if (PartVTBitSize % ValueVTBitSize == 0) {
10780       assert(PartVTBitSize >= ValueVTBitSize);
10781       EVT SameEltTypeVT = ValueVT;
10782       // If the element types are different, convert it to the same element type
10783       // of PartVT.
10784       // Give an example here, we want copy a <vscale x 1 x i8> value from
10785       // <vscale x 4 x i16>.
10786       // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
10787       // then we can extract <vscale x 1 x i8>.
10788       if (ValueEltVT != PartEltVT) {
10789         unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
10790         assert(Count != 0 && "The number of element should not be zero.");
10791         SameEltTypeVT =
10792             EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
10793         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
10794       }
10795       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
10796                         DAG.getVectorIdxConstant(0, DL));
10797       return Val;
10798     }
10799   }
10800   return SDValue();
10801 }
10802 
10803 SDValue
10804 RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10805                                    SelectionDAG &DAG,
10806                                    SmallVectorImpl<SDNode *> &Created) const {
10807   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
10808   if (isIntDivCheap(N->getValueType(0), Attr))
10809     return SDValue(N, 0); // Lower SDIV as SDIV
10810 
10811   assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
10812          "Unexpected divisor!");
10813 
10814   // Conditional move is needed, so do the transformation iff Zbt is enabled.
10815   if (!Subtarget.hasStdExtZbt())
10816     return SDValue();
10817 
10818   // When |Divisor| >= 2 ^ 12, it isn't profitable to do such transformation.
10819   // Besides, more critical path instructions will be generated when dividing
10820   // by 2. So we keep using the original DAGs for these cases.
10821   unsigned Lg2 = Divisor.countTrailingZeros();
10822   if (Lg2 == 1 || Lg2 >= 12)
10823     return SDValue();
10824 
10825   // fold (sdiv X, pow2)
10826   EVT VT = N->getValueType(0);
10827   if (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))
10828     return SDValue();
10829 
10830   SDLoc DL(N);
10831   SDValue N0 = N->getOperand(0);
10832   SDValue Zero = DAG.getConstant(0, DL, VT);
10833   SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
10834 
10835   // Add (N0 < 0) ? Pow2 - 1 : 0;
10836   SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT);
10837   SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
10838   SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
10839 
10840   Created.push_back(Cmp.getNode());
10841   Created.push_back(Add.getNode());
10842   Created.push_back(Sel.getNode());
10843 
10844   // Divide by pow2.
10845   SDValue SRA =
10846       DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT));
10847 
10848   // If we're dividing by a positive value, we're done.  Otherwise, we must
10849   // negate the result.
10850   if (Divisor.isNonNegative())
10851     return SRA;
10852 
10853   Created.push_back(SRA.getNode());
10854   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
10855 }
10856 
10857 #define GET_REGISTER_MATCHER
10858 #include "RISCVGenAsmMatcher.inc"
10859 
10860 Register
10861 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
10862                                        const MachineFunction &MF) const {
10863   Register Reg = MatchRegisterAltName(RegName);
10864   if (Reg == RISCV::NoRegister)
10865     Reg = MatchRegisterName(RegName);
10866   if (Reg == RISCV::NoRegister)
10867     report_fatal_error(
10868         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
10869   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
10870   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
10871     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
10872                              StringRef(RegName) + "\"."));
10873   return Reg;
10874 }
10875 
10876 namespace llvm {
10877 namespace RISCVVIntrinsicsTable {
10878 
10879 #define GET_RISCVVIntrinsicsTable_IMPL
10880 #include "RISCVGenSearchableTables.inc"
10881 
10882 } // namespace RISCVVIntrinsicsTable
10883 
10884 } // namespace llvm
10885