1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineJumpTableInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/CodeGen/ValueTypes.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/DiagnosticPrinter.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/KnownBits.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/raw_ostream.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "riscv-lower"
45 
46 STATISTIC(NumTailCalls, "Number of tail calls");
47 
48 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
49                                          const RISCVSubtarget &STI)
50     : TargetLowering(TM), Subtarget(STI) {
51 
52   if (Subtarget.isRV32E())
53     report_fatal_error("Codegen not yet implemented for RV32E");
54 
55   RISCVABI::ABI ABI = Subtarget.getTargetABI();
56   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
57 
58   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
59       !Subtarget.hasStdExtF()) {
60     errs() << "Hard-float 'f' ABI can't be used for a target that "
61                 "doesn't support the F instruction set extension (ignoring "
62                           "target-abi)\n";
63     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
64   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
65              !Subtarget.hasStdExtD()) {
66     errs() << "Hard-float 'd' ABI can't be used for a target that "
67               "doesn't support the D instruction set extension (ignoring "
68               "target-abi)\n";
69     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
70   }
71 
72   switch (ABI) {
73   default:
74     report_fatal_error("Don't know how to lower this ABI");
75   case RISCVABI::ABI_ILP32:
76   case RISCVABI::ABI_ILP32F:
77   case RISCVABI::ABI_ILP32D:
78   case RISCVABI::ABI_LP64:
79   case RISCVABI::ABI_LP64F:
80   case RISCVABI::ABI_LP64D:
81     break;
82   }
83 
84   MVT XLenVT = Subtarget.getXLenVT();
85 
86   // Set up the register classes.
87   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
88 
89   if (Subtarget.hasStdExtZfh())
90     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
91   if (Subtarget.hasStdExtF())
92     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
93   if (Subtarget.hasStdExtD())
94     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
95 
96   static const MVT::SimpleValueType BoolVecVTs[] = {
97       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
98       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
99   static const MVT::SimpleValueType IntVecVTs[] = {
100       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
101       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
102       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
103       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
104       MVT::nxv4i64, MVT::nxv8i64};
105   static const MVT::SimpleValueType F16VecVTs[] = {
106       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
107       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
108   static const MVT::SimpleValueType F32VecVTs[] = {
109       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
110   static const MVT::SimpleValueType F64VecVTs[] = {
111       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
112 
113   if (Subtarget.hasVInstructions()) {
114     auto addRegClassForRVV = [this](MVT VT) {
115       unsigned Size = VT.getSizeInBits().getKnownMinValue();
116       assert(Size <= 512 && isPowerOf2_32(Size));
117       const TargetRegisterClass *RC;
118       if (Size <= 64)
119         RC = &RISCV::VRRegClass;
120       else if (Size == 128)
121         RC = &RISCV::VRM2RegClass;
122       else if (Size == 256)
123         RC = &RISCV::VRM4RegClass;
124       else
125         RC = &RISCV::VRM8RegClass;
126 
127       addRegisterClass(VT, RC);
128     };
129 
130     for (MVT VT : BoolVecVTs)
131       addRegClassForRVV(VT);
132     for (MVT VT : IntVecVTs) {
133       if (VT.getVectorElementType() == MVT::i64 &&
134           !Subtarget.hasVInstructionsI64())
135         continue;
136       addRegClassForRVV(VT);
137     }
138 
139     if (Subtarget.hasVInstructionsF16())
140       for (MVT VT : F16VecVTs)
141         addRegClassForRVV(VT);
142 
143     if (Subtarget.hasVInstructionsF32())
144       for (MVT VT : F32VecVTs)
145         addRegClassForRVV(VT);
146 
147     if (Subtarget.hasVInstructionsF64())
148       for (MVT VT : F64VecVTs)
149         addRegClassForRVV(VT);
150 
151     if (Subtarget.useRVVForFixedLengthVectors()) {
152       auto addRegClassForFixedVectors = [this](MVT VT) {
153         MVT ContainerVT = getContainerForFixedLengthVector(VT);
154         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
155         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
156         addRegisterClass(VT, TRI.getRegClass(RCID));
157       };
158       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
159         if (useRVVForFixedLengthVectorVT(VT))
160           addRegClassForFixedVectors(VT);
161 
162       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
163         if (useRVVForFixedLengthVectorVT(VT))
164           addRegClassForFixedVectors(VT);
165     }
166   }
167 
168   // Compute derived properties from the register classes.
169   computeRegisterProperties(STI.getRegisterInfo());
170 
171   setStackPointerRegisterToSaveRestore(RISCV::X2);
172 
173   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
174     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
175 
176   // TODO: add all necessary setOperationAction calls.
177   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
178 
179   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
180   setOperationAction(ISD::BR_CC, XLenVT, Expand);
181   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
182   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
183 
184   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
185   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
186 
187   setOperationAction(ISD::VASTART, MVT::Other, Custom);
188   setOperationAction(ISD::VAARG, MVT::Other, Expand);
189   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
190   setOperationAction(ISD::VAEND, MVT::Other, Expand);
191 
192   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
193   if (!Subtarget.hasStdExtZbb()) {
194     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
195     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
196   }
197 
198   if (Subtarget.is64Bit()) {
199     setOperationAction(ISD::ADD, MVT::i32, Custom);
200     setOperationAction(ISD::SUB, MVT::i32, Custom);
201     setOperationAction(ISD::SHL, MVT::i32, Custom);
202     setOperationAction(ISD::SRA, MVT::i32, Custom);
203     setOperationAction(ISD::SRL, MVT::i32, Custom);
204 
205     setOperationAction(ISD::UADDO, MVT::i32, Custom);
206     setOperationAction(ISD::USUBO, MVT::i32, Custom);
207     setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
208     setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
209   } else {
210     setLibcallName(RTLIB::SHL_I128, nullptr);
211     setLibcallName(RTLIB::SRL_I128, nullptr);
212     setLibcallName(RTLIB::SRA_I128, nullptr);
213     setLibcallName(RTLIB::MUL_I128, nullptr);
214     setLibcallName(RTLIB::MULO_I64, nullptr);
215   }
216 
217   if (!Subtarget.hasStdExtM()) {
218     setOperationAction(ISD::MUL, XLenVT, Expand);
219     setOperationAction(ISD::MULHS, XLenVT, Expand);
220     setOperationAction(ISD::MULHU, XLenVT, Expand);
221     setOperationAction(ISD::SDIV, XLenVT, Expand);
222     setOperationAction(ISD::UDIV, XLenVT, Expand);
223     setOperationAction(ISD::SREM, XLenVT, Expand);
224     setOperationAction(ISD::UREM, XLenVT, Expand);
225   } else {
226     if (Subtarget.is64Bit()) {
227       setOperationAction(ISD::MUL, MVT::i32, Custom);
228       setOperationAction(ISD::MUL, MVT::i128, Custom);
229 
230       setOperationAction(ISD::SDIV, MVT::i8, Custom);
231       setOperationAction(ISD::UDIV, MVT::i8, Custom);
232       setOperationAction(ISD::UREM, MVT::i8, Custom);
233       setOperationAction(ISD::SDIV, MVT::i16, Custom);
234       setOperationAction(ISD::UDIV, MVT::i16, Custom);
235       setOperationAction(ISD::UREM, MVT::i16, Custom);
236       setOperationAction(ISD::SDIV, MVT::i32, Custom);
237       setOperationAction(ISD::UDIV, MVT::i32, Custom);
238       setOperationAction(ISD::UREM, MVT::i32, Custom);
239     } else {
240       setOperationAction(ISD::MUL, MVT::i64, Custom);
241     }
242   }
243 
244   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
245   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
246   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
247   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
248 
249   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
250   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
251   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
252 
253   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
254     if (Subtarget.is64Bit()) {
255       setOperationAction(ISD::ROTL, MVT::i32, Custom);
256       setOperationAction(ISD::ROTR, MVT::i32, Custom);
257     }
258   } else {
259     setOperationAction(ISD::ROTL, XLenVT, Expand);
260     setOperationAction(ISD::ROTR, XLenVT, Expand);
261   }
262 
263   if (Subtarget.hasStdExtZbp()) {
264     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
265     // more combining.
266     setOperationAction(ISD::BITREVERSE, XLenVT,   Custom);
267     setOperationAction(ISD::BSWAP,      XLenVT,   Custom);
268     setOperationAction(ISD::BITREVERSE, MVT::i8,  Custom);
269     // BSWAP i8 doesn't exist.
270     setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
271     setOperationAction(ISD::BSWAP,      MVT::i16, Custom);
272 
273     if (Subtarget.is64Bit()) {
274       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
275       setOperationAction(ISD::BSWAP,      MVT::i32, Custom);
276     }
277   } else {
278     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
279     // pattern match it directly in isel.
280     setOperationAction(ISD::BSWAP, XLenVT,
281                        Subtarget.hasStdExtZbb() ? Legal : Expand);
282   }
283 
284   if (Subtarget.hasStdExtZbb()) {
285     setOperationAction(ISD::SMIN, XLenVT, Legal);
286     setOperationAction(ISD::SMAX, XLenVT, Legal);
287     setOperationAction(ISD::UMIN, XLenVT, Legal);
288     setOperationAction(ISD::UMAX, XLenVT, Legal);
289 
290     if (Subtarget.is64Bit()) {
291       setOperationAction(ISD::CTTZ, MVT::i32, Custom);
292       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
293       setOperationAction(ISD::CTLZ, MVT::i32, Custom);
294       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
295     }
296   } else {
297     setOperationAction(ISD::CTTZ, XLenVT, Expand);
298     setOperationAction(ISD::CTLZ, XLenVT, Expand);
299     setOperationAction(ISD::CTPOP, XLenVT, Expand);
300   }
301 
302   if (Subtarget.hasStdExtZbt()) {
303     setOperationAction(ISD::FSHL, XLenVT, Custom);
304     setOperationAction(ISD::FSHR, XLenVT, Custom);
305     setOperationAction(ISD::SELECT, XLenVT, Legal);
306 
307     if (Subtarget.is64Bit()) {
308       setOperationAction(ISD::FSHL, MVT::i32, Custom);
309       setOperationAction(ISD::FSHR, MVT::i32, Custom);
310     }
311   } else {
312     setOperationAction(ISD::SELECT, XLenVT, Custom);
313   }
314 
315   static const ISD::CondCode FPCCToExpand[] = {
316       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
317       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
318       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
319 
320   static const ISD::NodeType FPOpToExpand[] = {
321       ISD::FSIN, ISD::FCOS,       ISD::FSINCOS,   ISD::FPOW,
322       ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
323 
324   if (Subtarget.hasStdExtZfh())
325     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
326 
327   if (Subtarget.hasStdExtZfh()) {
328     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
329     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
330     setOperationAction(ISD::LRINT, MVT::f16, Legal);
331     setOperationAction(ISD::LLRINT, MVT::f16, Legal);
332     setOperationAction(ISD::LROUND, MVT::f16, Legal);
333     setOperationAction(ISD::LLROUND, MVT::f16, Legal);
334     setOperationAction(ISD::STRICT_LRINT, MVT::f16, Legal);
335     setOperationAction(ISD::STRICT_LLRINT, MVT::f16, Legal);
336     setOperationAction(ISD::STRICT_LROUND, MVT::f16, Legal);
337     setOperationAction(ISD::STRICT_LLROUND, MVT::f16, Legal);
338     setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal);
339     setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal);
340     setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal);
341     setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal);
342     setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal);
343     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
344     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
345     setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal);
346     setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Legal);
347     setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Legal);
348     for (auto CC : FPCCToExpand)
349       setCondCodeAction(CC, MVT::f16, Expand);
350     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
351     setOperationAction(ISD::SELECT, MVT::f16, Custom);
352     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
353 
354     setOperationAction(ISD::FREM,       MVT::f16, Promote);
355     setOperationAction(ISD::FCEIL,      MVT::f16, Promote);
356     setOperationAction(ISD::FFLOOR,     MVT::f16, Promote);
357     setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
358     setOperationAction(ISD::FRINT,      MVT::f16, Promote);
359     setOperationAction(ISD::FROUND,     MVT::f16, Promote);
360     setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
361     setOperationAction(ISD::FTRUNC,     MVT::f16, Promote);
362     setOperationAction(ISD::FPOW,       MVT::f16, Promote);
363     setOperationAction(ISD::FPOWI,      MVT::f16, Promote);
364     setOperationAction(ISD::FCOS,       MVT::f16, Promote);
365     setOperationAction(ISD::FSIN,       MVT::f16, Promote);
366     setOperationAction(ISD::FSINCOS,    MVT::f16, Promote);
367     setOperationAction(ISD::FEXP,       MVT::f16, Promote);
368     setOperationAction(ISD::FEXP2,      MVT::f16, Promote);
369     setOperationAction(ISD::FLOG,       MVT::f16, Promote);
370     setOperationAction(ISD::FLOG2,      MVT::f16, Promote);
371     setOperationAction(ISD::FLOG10,     MVT::f16, Promote);
372 
373     // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
374     // complete support for all operations in LegalizeDAG.
375 
376     // We need to custom promote this.
377     if (Subtarget.is64Bit())
378       setOperationAction(ISD::FPOWI, MVT::i32, Custom);
379   }
380 
381   if (Subtarget.hasStdExtF()) {
382     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
383     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
384     setOperationAction(ISD::LRINT, MVT::f32, Legal);
385     setOperationAction(ISD::LLRINT, MVT::f32, Legal);
386     setOperationAction(ISD::LROUND, MVT::f32, Legal);
387     setOperationAction(ISD::LLROUND, MVT::f32, Legal);
388     setOperationAction(ISD::STRICT_LRINT, MVT::f32, Legal);
389     setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Legal);
390     setOperationAction(ISD::STRICT_LROUND, MVT::f32, Legal);
391     setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Legal);
392     setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
393     setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
394     setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
395     setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
396     setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
397     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
398     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
399     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
400     for (auto CC : FPCCToExpand)
401       setCondCodeAction(CC, MVT::f32, Expand);
402     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
403     setOperationAction(ISD::SELECT, MVT::f32, Custom);
404     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
405     for (auto Op : FPOpToExpand)
406       setOperationAction(Op, MVT::f32, Expand);
407     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
408     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
409   }
410 
411   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
412     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
413 
414   if (Subtarget.hasStdExtD()) {
415     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
416     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
417     setOperationAction(ISD::LRINT, MVT::f64, Legal);
418     setOperationAction(ISD::LLRINT, MVT::f64, Legal);
419     setOperationAction(ISD::LROUND, MVT::f64, Legal);
420     setOperationAction(ISD::LLROUND, MVT::f64, Legal);
421     setOperationAction(ISD::STRICT_LRINT, MVT::f64, Legal);
422     setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Legal);
423     setOperationAction(ISD::STRICT_LROUND, MVT::f64, Legal);
424     setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Legal);
425     setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
426     setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
427     setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
428     setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
429     setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
430     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
431     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
432     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
433     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
434     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
435     for (auto CC : FPCCToExpand)
436       setCondCodeAction(CC, MVT::f64, Expand);
437     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
438     setOperationAction(ISD::SELECT, MVT::f64, Custom);
439     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
440     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
441     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
442     for (auto Op : FPOpToExpand)
443       setOperationAction(Op, MVT::f64, Expand);
444     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
445     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
446   }
447 
448   if (Subtarget.is64Bit()) {
449     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
450     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
451     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
452     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
453   }
454 
455   if (Subtarget.hasStdExtF()) {
456     setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
457     setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
458 
459     setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal);
460     setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal);
461     setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal);
462     setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal);
463 
464     setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
465     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
466   }
467 
468   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
469   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
470   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
471   setOperationAction(ISD::JumpTable, XLenVT, Custom);
472 
473   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
474 
475   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
476   // Unfortunately this can't be determined just from the ISA naming string.
477   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
478                      Subtarget.is64Bit() ? Legal : Custom);
479 
480   setOperationAction(ISD::TRAP, MVT::Other, Legal);
481   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
482   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
483   if (Subtarget.is64Bit())
484     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
485 
486   if (Subtarget.hasStdExtA()) {
487     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
488     setMinCmpXchgSizeInBits(32);
489   } else {
490     setMaxAtomicSizeInBitsSupported(0);
491   }
492 
493   setBooleanContents(ZeroOrOneBooleanContent);
494 
495   if (Subtarget.hasVInstructions()) {
496     setBooleanVectorContents(ZeroOrOneBooleanContent);
497 
498     setOperationAction(ISD::VSCALE, XLenVT, Custom);
499 
500     // RVV intrinsics may have illegal operands.
501     // We also need to custom legalize vmv.x.s.
502     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
503     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
504     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
505     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
506     if (Subtarget.is64Bit()) {
507       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
508     } else {
509       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
510       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
511     }
512 
513     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
514     setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
515 
516     static const unsigned IntegerVPOps[] = {
517         ISD::VP_ADD,         ISD::VP_SUB,         ISD::VP_MUL,
518         ISD::VP_SDIV,        ISD::VP_UDIV,        ISD::VP_SREM,
519         ISD::VP_UREM,        ISD::VP_AND,         ISD::VP_OR,
520         ISD::VP_XOR,         ISD::VP_ASHR,        ISD::VP_LSHR,
521         ISD::VP_SHL,         ISD::VP_REDUCE_ADD,  ISD::VP_REDUCE_AND,
522         ISD::VP_REDUCE_OR,   ISD::VP_REDUCE_XOR,  ISD::VP_REDUCE_SMAX,
523         ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
524         ISD::VP_SELECT};
525 
526     static const unsigned FloatingPointVPOps[] = {
527         ISD::VP_FADD,        ISD::VP_FSUB,        ISD::VP_FMUL,
528         ISD::VP_FDIV,        ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
529         ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SELECT};
530 
531     if (!Subtarget.is64Bit()) {
532       // We must custom-lower certain vXi64 operations on RV32 due to the vector
533       // element type being illegal.
534       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
535       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
536 
537       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
538       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
539       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
540       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
541       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
542       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
543       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
544       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
545 
546       setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
547       setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
548       setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
549       setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
550       setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
551       setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
552       setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
553       setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
554     }
555 
556     for (MVT VT : BoolVecVTs) {
557       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
558 
559       // Mask VTs are custom-expanded into a series of standard nodes
560       setOperationAction(ISD::TRUNCATE, VT, Custom);
561       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
562       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
563       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
564 
565       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
566       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
567 
568       setOperationAction(ISD::SELECT, VT, Custom);
569       setOperationAction(ISD::SELECT_CC, VT, Expand);
570       setOperationAction(ISD::VSELECT, VT, Expand);
571       setOperationAction(ISD::VP_SELECT, VT, Expand);
572 
573       setOperationAction(ISD::VP_AND, VT, Custom);
574       setOperationAction(ISD::VP_OR, VT, Custom);
575       setOperationAction(ISD::VP_XOR, VT, Custom);
576 
577       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
578       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
579       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
580 
581       setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
582       setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
583       setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
584 
585       // RVV has native int->float & float->int conversions where the
586       // element type sizes are within one power-of-two of each other. Any
587       // wider distances between type sizes have to be lowered as sequences
588       // which progressively narrow the gap in stages.
589       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
590       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
591       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
592       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
593 
594       // Expand all extending loads to types larger than this, and truncating
595       // stores from types larger than this.
596       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
597         setTruncStoreAction(OtherVT, VT, Expand);
598         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
599         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
600         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
601       }
602     }
603 
604     for (MVT VT : IntVecVTs) {
605       if (VT.getVectorElementType() == MVT::i64 &&
606           !Subtarget.hasVInstructionsI64())
607         continue;
608 
609       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
610       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
611 
612       // Vectors implement MULHS/MULHU.
613       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
614       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
615 
616       setOperationAction(ISD::SMIN, VT, Legal);
617       setOperationAction(ISD::SMAX, VT, Legal);
618       setOperationAction(ISD::UMIN, VT, Legal);
619       setOperationAction(ISD::UMAX, VT, Legal);
620 
621       setOperationAction(ISD::ROTL, VT, Expand);
622       setOperationAction(ISD::ROTR, VT, Expand);
623 
624       setOperationAction(ISD::CTTZ, VT, Expand);
625       setOperationAction(ISD::CTLZ, VT, Expand);
626       setOperationAction(ISD::CTPOP, VT, Expand);
627 
628       setOperationAction(ISD::BSWAP, VT, Expand);
629 
630       // Custom-lower extensions and truncations from/to mask types.
631       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
632       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
633       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
634 
635       // RVV has native int->float & float->int conversions where the
636       // element type sizes are within one power-of-two of each other. Any
637       // wider distances between type sizes have to be lowered as sequences
638       // which progressively narrow the gap in stages.
639       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
640       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
641       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
642       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
643 
644       setOperationAction(ISD::SADDSAT, VT, Legal);
645       setOperationAction(ISD::UADDSAT, VT, Legal);
646       setOperationAction(ISD::SSUBSAT, VT, Legal);
647       setOperationAction(ISD::USUBSAT, VT, Legal);
648 
649       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
650       // nodes which truncate by one power of two at a time.
651       setOperationAction(ISD::TRUNCATE, VT, Custom);
652 
653       // Custom-lower insert/extract operations to simplify patterns.
654       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
655       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
656 
657       // Custom-lower reduction operations to set up the corresponding custom
658       // nodes' operands.
659       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
660       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
661       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
662       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
663       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
664       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
665       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
666       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
667 
668       for (unsigned VPOpc : IntegerVPOps)
669         setOperationAction(VPOpc, VT, Custom);
670 
671       setOperationAction(ISD::LOAD, VT, Custom);
672       setOperationAction(ISD::STORE, VT, Custom);
673 
674       setOperationAction(ISD::MLOAD, VT, Custom);
675       setOperationAction(ISD::MSTORE, VT, Custom);
676       setOperationAction(ISD::MGATHER, VT, Custom);
677       setOperationAction(ISD::MSCATTER, VT, Custom);
678 
679       setOperationAction(ISD::VP_LOAD, VT, Custom);
680       setOperationAction(ISD::VP_STORE, VT, Custom);
681       setOperationAction(ISD::VP_GATHER, VT, Custom);
682       setOperationAction(ISD::VP_SCATTER, VT, Custom);
683 
684       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
685       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
686       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
687 
688       setOperationAction(ISD::SELECT, VT, Custom);
689       setOperationAction(ISD::SELECT_CC, VT, Expand);
690 
691       setOperationAction(ISD::STEP_VECTOR, VT, Custom);
692       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
693 
694       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
695         setTruncStoreAction(VT, OtherVT, Expand);
696         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
697         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
698         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
699       }
700 
701       // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
702       // type that can represent the value exactly.
703       if (VT.getVectorElementType() != MVT::i64) {
704         MVT FloatEltVT =
705             VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
706         EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
707         if (isTypeLegal(FloatVT)) {
708           setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
709           setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
710         }
711       }
712     }
713 
714     // Expand various CCs to best match the RVV ISA, which natively supports UNE
715     // but no other unordered comparisons, and supports all ordered comparisons
716     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
717     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
718     // and we pattern-match those back to the "original", swapping operands once
719     // more. This way we catch both operations and both "vf" and "fv" forms with
720     // fewer patterns.
721     static const ISD::CondCode VFPCCToExpand[] = {
722         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
723         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
724         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
725     };
726 
727     // Sets common operation actions on RVV floating-point vector types.
728     const auto SetCommonVFPActions = [&](MVT VT) {
729       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
730       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
731       // sizes are within one power-of-two of each other. Therefore conversions
732       // between vXf16 and vXf64 must be lowered as sequences which convert via
733       // vXf32.
734       setOperationAction(ISD::FP_ROUND, VT, Custom);
735       setOperationAction(ISD::FP_EXTEND, VT, Custom);
736       // Custom-lower insert/extract operations to simplify patterns.
737       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
738       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
739       // Expand various condition codes (explained above).
740       for (auto CC : VFPCCToExpand)
741         setCondCodeAction(CC, VT, Expand);
742 
743       setOperationAction(ISD::FMINNUM, VT, Legal);
744       setOperationAction(ISD::FMAXNUM, VT, Legal);
745 
746       setOperationAction(ISD::FTRUNC, VT, Custom);
747       setOperationAction(ISD::FCEIL, VT, Custom);
748       setOperationAction(ISD::FFLOOR, VT, Custom);
749 
750       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
751       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
752       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
753       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
754 
755       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
756 
757       setOperationAction(ISD::LOAD, VT, Custom);
758       setOperationAction(ISD::STORE, VT, Custom);
759 
760       setOperationAction(ISD::MLOAD, VT, Custom);
761       setOperationAction(ISD::MSTORE, VT, Custom);
762       setOperationAction(ISD::MGATHER, VT, Custom);
763       setOperationAction(ISD::MSCATTER, VT, Custom);
764 
765       setOperationAction(ISD::VP_LOAD, VT, Custom);
766       setOperationAction(ISD::VP_STORE, VT, Custom);
767       setOperationAction(ISD::VP_GATHER, VT, Custom);
768       setOperationAction(ISD::VP_SCATTER, VT, Custom);
769 
770       setOperationAction(ISD::SELECT, VT, Custom);
771       setOperationAction(ISD::SELECT_CC, VT, Expand);
772 
773       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
774       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
775       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
776 
777       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
778 
779       for (unsigned VPOpc : FloatingPointVPOps)
780         setOperationAction(VPOpc, VT, Custom);
781     };
782 
783     // Sets common extload/truncstore actions on RVV floating-point vector
784     // types.
785     const auto SetCommonVFPExtLoadTruncStoreActions =
786         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
787           for (auto SmallVT : SmallerVTs) {
788             setTruncStoreAction(VT, SmallVT, Expand);
789             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
790           }
791         };
792 
793     if (Subtarget.hasVInstructionsF16())
794       for (MVT VT : F16VecVTs)
795         SetCommonVFPActions(VT);
796 
797     for (MVT VT : F32VecVTs) {
798       if (Subtarget.hasVInstructionsF32())
799         SetCommonVFPActions(VT);
800       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
801     }
802 
803     for (MVT VT : F64VecVTs) {
804       if (Subtarget.hasVInstructionsF64())
805         SetCommonVFPActions(VT);
806       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
807       SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
808     }
809 
810     if (Subtarget.useRVVForFixedLengthVectors()) {
811       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
812         if (!useRVVForFixedLengthVectorVT(VT))
813           continue;
814 
815         // By default everything must be expanded.
816         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
817           setOperationAction(Op, VT, Expand);
818         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
819           setTruncStoreAction(VT, OtherVT, Expand);
820           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
821           setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
822           setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
823         }
824 
825         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
826         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
827         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
828 
829         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
830         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
831 
832         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
833         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
834 
835         setOperationAction(ISD::LOAD, VT, Custom);
836         setOperationAction(ISD::STORE, VT, Custom);
837 
838         setOperationAction(ISD::SETCC, VT, Custom);
839 
840         setOperationAction(ISD::SELECT, VT, Custom);
841 
842         setOperationAction(ISD::TRUNCATE, VT, Custom);
843 
844         setOperationAction(ISD::BITCAST, VT, Custom);
845 
846         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
847         setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
848         setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
849 
850         setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
851         setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
852         setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
853 
854         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
855         setOperationAction(ISD::UINT_TO_FP, VT, Custom);
856         setOperationAction(ISD::FP_TO_SINT, VT, Custom);
857         setOperationAction(ISD::FP_TO_UINT, VT, Custom);
858 
859         // Operations below are different for between masks and other vectors.
860         if (VT.getVectorElementType() == MVT::i1) {
861           setOperationAction(ISD::VP_AND, VT, Custom);
862           setOperationAction(ISD::VP_OR, VT, Custom);
863           setOperationAction(ISD::VP_XOR, VT, Custom);
864           setOperationAction(ISD::AND, VT, Custom);
865           setOperationAction(ISD::OR, VT, Custom);
866           setOperationAction(ISD::XOR, VT, Custom);
867           continue;
868         }
869 
870         // Use SPLAT_VECTOR to prevent type legalization from destroying the
871         // splats when type legalizing i64 scalar on RV32.
872         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
873         // improvements first.
874         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
875           setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
876           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
877         }
878 
879         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
880         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
881 
882         setOperationAction(ISD::MLOAD, VT, Custom);
883         setOperationAction(ISD::MSTORE, VT, Custom);
884         setOperationAction(ISD::MGATHER, VT, Custom);
885         setOperationAction(ISD::MSCATTER, VT, Custom);
886 
887         setOperationAction(ISD::VP_LOAD, VT, Custom);
888         setOperationAction(ISD::VP_STORE, VT, Custom);
889         setOperationAction(ISD::VP_GATHER, VT, Custom);
890         setOperationAction(ISD::VP_SCATTER, VT, Custom);
891 
892         setOperationAction(ISD::ADD, VT, Custom);
893         setOperationAction(ISD::MUL, VT, Custom);
894         setOperationAction(ISD::SUB, VT, Custom);
895         setOperationAction(ISD::AND, VT, Custom);
896         setOperationAction(ISD::OR, VT, Custom);
897         setOperationAction(ISD::XOR, VT, Custom);
898         setOperationAction(ISD::SDIV, VT, Custom);
899         setOperationAction(ISD::SREM, VT, Custom);
900         setOperationAction(ISD::UDIV, VT, Custom);
901         setOperationAction(ISD::UREM, VT, Custom);
902         setOperationAction(ISD::SHL, VT, Custom);
903         setOperationAction(ISD::SRA, VT, Custom);
904         setOperationAction(ISD::SRL, VT, Custom);
905 
906         setOperationAction(ISD::SMIN, VT, Custom);
907         setOperationAction(ISD::SMAX, VT, Custom);
908         setOperationAction(ISD::UMIN, VT, Custom);
909         setOperationAction(ISD::UMAX, VT, Custom);
910         setOperationAction(ISD::ABS,  VT, Custom);
911 
912         setOperationAction(ISD::MULHS, VT, Custom);
913         setOperationAction(ISD::MULHU, VT, Custom);
914 
915         setOperationAction(ISD::SADDSAT, VT, Custom);
916         setOperationAction(ISD::UADDSAT, VT, Custom);
917         setOperationAction(ISD::SSUBSAT, VT, Custom);
918         setOperationAction(ISD::USUBSAT, VT, Custom);
919 
920         setOperationAction(ISD::VSELECT, VT, Custom);
921         setOperationAction(ISD::SELECT_CC, VT, Expand);
922 
923         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
924         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
925         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
926 
927         // Custom-lower reduction operations to set up the corresponding custom
928         // nodes' operands.
929         setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
930         setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
931         setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
932         setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
933         setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
934 
935         for (unsigned VPOpc : IntegerVPOps)
936           setOperationAction(VPOpc, VT, Custom);
937 
938         // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
939         // type that can represent the value exactly.
940         if (VT.getVectorElementType() != MVT::i64) {
941           MVT FloatEltVT =
942               VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
943           EVT FloatVT =
944               MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
945           if (isTypeLegal(FloatVT)) {
946             setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
947             setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
948           }
949         }
950       }
951 
952       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
953         if (!useRVVForFixedLengthVectorVT(VT))
954           continue;
955 
956         // By default everything must be expanded.
957         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
958           setOperationAction(Op, VT, Expand);
959         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
960           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
961           setTruncStoreAction(VT, OtherVT, Expand);
962         }
963 
964         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
965         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
966         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
967 
968         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
969         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
970         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
971         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
972         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
973 
974         setOperationAction(ISD::LOAD, VT, Custom);
975         setOperationAction(ISD::STORE, VT, Custom);
976         setOperationAction(ISD::MLOAD, VT, Custom);
977         setOperationAction(ISD::MSTORE, VT, Custom);
978         setOperationAction(ISD::MGATHER, VT, Custom);
979         setOperationAction(ISD::MSCATTER, VT, Custom);
980 
981         setOperationAction(ISD::VP_LOAD, VT, Custom);
982         setOperationAction(ISD::VP_STORE, VT, Custom);
983         setOperationAction(ISD::VP_GATHER, VT, Custom);
984         setOperationAction(ISD::VP_SCATTER, VT, Custom);
985 
986         setOperationAction(ISD::FADD, VT, Custom);
987         setOperationAction(ISD::FSUB, VT, Custom);
988         setOperationAction(ISD::FMUL, VT, Custom);
989         setOperationAction(ISD::FDIV, VT, Custom);
990         setOperationAction(ISD::FNEG, VT, Custom);
991         setOperationAction(ISD::FABS, VT, Custom);
992         setOperationAction(ISD::FCOPYSIGN, VT, Custom);
993         setOperationAction(ISD::FSQRT, VT, Custom);
994         setOperationAction(ISD::FMA, VT, Custom);
995         setOperationAction(ISD::FMINNUM, VT, Custom);
996         setOperationAction(ISD::FMAXNUM, VT, Custom);
997 
998         setOperationAction(ISD::FP_ROUND, VT, Custom);
999         setOperationAction(ISD::FP_EXTEND, VT, Custom);
1000 
1001         setOperationAction(ISD::FTRUNC, VT, Custom);
1002         setOperationAction(ISD::FCEIL, VT, Custom);
1003         setOperationAction(ISD::FFLOOR, VT, Custom);
1004 
1005         for (auto CC : VFPCCToExpand)
1006           setCondCodeAction(CC, VT, Expand);
1007 
1008         setOperationAction(ISD::VSELECT, VT, Custom);
1009         setOperationAction(ISD::SELECT, VT, Custom);
1010         setOperationAction(ISD::SELECT_CC, VT, Expand);
1011 
1012         setOperationAction(ISD::BITCAST, VT, Custom);
1013 
1014         setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1015         setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1016         setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1017         setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1018 
1019         for (unsigned VPOpc : FloatingPointVPOps)
1020           setOperationAction(VPOpc, VT, Custom);
1021       }
1022 
1023       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1024       setOperationAction(ISD::BITCAST, MVT::i8, Custom);
1025       setOperationAction(ISD::BITCAST, MVT::i16, Custom);
1026       setOperationAction(ISD::BITCAST, MVT::i32, Custom);
1027       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1028       setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1029       setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1030       setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1031     }
1032   }
1033 
1034   // Function alignments.
1035   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
1036   setMinFunctionAlignment(FunctionAlignment);
1037   setPrefFunctionAlignment(FunctionAlignment);
1038 
1039   setMinimumJumpTableEntries(5);
1040 
1041   // Jumps are expensive, compared to logic
1042   setJumpIsExpensive();
1043 
1044   setTargetDAGCombine(ISD::ADD);
1045   setTargetDAGCombine(ISD::SUB);
1046   setTargetDAGCombine(ISD::AND);
1047   setTargetDAGCombine(ISD::OR);
1048   setTargetDAGCombine(ISD::XOR);
1049   setTargetDAGCombine(ISD::ANY_EXTEND);
1050   if (Subtarget.hasStdExtF()) {
1051     setTargetDAGCombine(ISD::ZERO_EXTEND);
1052     setTargetDAGCombine(ISD::FP_TO_SINT);
1053     setTargetDAGCombine(ISD::FP_TO_UINT);
1054     setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
1055     setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
1056   }
1057   if (Subtarget.hasVInstructions()) {
1058     setTargetDAGCombine(ISD::FCOPYSIGN);
1059     setTargetDAGCombine(ISD::MGATHER);
1060     setTargetDAGCombine(ISD::MSCATTER);
1061     setTargetDAGCombine(ISD::VP_GATHER);
1062     setTargetDAGCombine(ISD::VP_SCATTER);
1063     setTargetDAGCombine(ISD::SRA);
1064     setTargetDAGCombine(ISD::SRL);
1065     setTargetDAGCombine(ISD::SHL);
1066     setTargetDAGCombine(ISD::STORE);
1067   }
1068 }
1069 
1070 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1071                                             LLVMContext &Context,
1072                                             EVT VT) const {
1073   if (!VT.isVector())
1074     return getPointerTy(DL);
1075   if (Subtarget.hasVInstructions() &&
1076       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1077     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1078   return VT.changeVectorElementTypeToInteger();
1079 }
1080 
1081 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1082   return Subtarget.getXLenVT();
1083 }
1084 
1085 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1086                                              const CallInst &I,
1087                                              MachineFunction &MF,
1088                                              unsigned Intrinsic) const {
1089   auto &DL = I.getModule()->getDataLayout();
1090   switch (Intrinsic) {
1091   default:
1092     return false;
1093   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1094   case Intrinsic::riscv_masked_atomicrmw_add_i32:
1095   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1096   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1097   case Intrinsic::riscv_masked_atomicrmw_max_i32:
1098   case Intrinsic::riscv_masked_atomicrmw_min_i32:
1099   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1100   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1101   case Intrinsic::riscv_masked_cmpxchg_i32: {
1102     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
1103     Info.opc = ISD::INTRINSIC_W_CHAIN;
1104     Info.memVT = MVT::getVT(PtrTy->getElementType());
1105     Info.ptrVal = I.getArgOperand(0);
1106     Info.offset = 0;
1107     Info.align = Align(4);
1108     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1109                  MachineMemOperand::MOVolatile;
1110     return true;
1111   }
1112   case Intrinsic::riscv_masked_strided_load:
1113     Info.opc = ISD::INTRINSIC_W_CHAIN;
1114     Info.ptrVal = I.getArgOperand(1);
1115     Info.memVT = getValueType(DL, I.getType()->getScalarType());
1116     Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
1117     Info.size = MemoryLocation::UnknownSize;
1118     Info.flags |= MachineMemOperand::MOLoad;
1119     return true;
1120   case Intrinsic::riscv_masked_strided_store:
1121     Info.opc = ISD::INTRINSIC_VOID;
1122     Info.ptrVal = I.getArgOperand(1);
1123     Info.memVT =
1124         getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
1125     Info.align = Align(
1126         DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
1127         8);
1128     Info.size = MemoryLocation::UnknownSize;
1129     Info.flags |= MachineMemOperand::MOStore;
1130     return true;
1131   }
1132 }
1133 
1134 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1135                                                 const AddrMode &AM, Type *Ty,
1136                                                 unsigned AS,
1137                                                 Instruction *I) const {
1138   // No global is ever allowed as a base.
1139   if (AM.BaseGV)
1140     return false;
1141 
1142   // Require a 12-bit signed offset.
1143   if (!isInt<12>(AM.BaseOffs))
1144     return false;
1145 
1146   switch (AM.Scale) {
1147   case 0: // "r+i" or just "i", depending on HasBaseReg.
1148     break;
1149   case 1:
1150     if (!AM.HasBaseReg) // allow "r+i".
1151       break;
1152     return false; // disallow "r+r" or "r+r+i".
1153   default:
1154     return false;
1155   }
1156 
1157   return true;
1158 }
1159 
1160 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1161   return isInt<12>(Imm);
1162 }
1163 
1164 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1165   return isInt<12>(Imm);
1166 }
1167 
1168 // On RV32, 64-bit integers are split into their high and low parts and held
1169 // in two different registers, so the trunc is free since the low register can
1170 // just be used.
1171 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1172   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1173     return false;
1174   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1175   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1176   return (SrcBits == 64 && DestBits == 32);
1177 }
1178 
1179 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1180   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1181       !SrcVT.isInteger() || !DstVT.isInteger())
1182     return false;
1183   unsigned SrcBits = SrcVT.getSizeInBits();
1184   unsigned DestBits = DstVT.getSizeInBits();
1185   return (SrcBits == 64 && DestBits == 32);
1186 }
1187 
1188 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1189   // Zexts are free if they can be combined with a load.
1190   // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1191   // poorly with type legalization of compares preferring sext.
1192   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1193     EVT MemVT = LD->getMemoryVT();
1194     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1195         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1196          LD->getExtensionType() == ISD::ZEXTLOAD))
1197       return true;
1198   }
1199 
1200   return TargetLowering::isZExtFree(Val, VT2);
1201 }
1202 
1203 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1204   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1205 }
1206 
1207 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
1208   return Subtarget.hasStdExtZbb();
1209 }
1210 
1211 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
1212   return Subtarget.hasStdExtZbb();
1213 }
1214 
1215 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1216   EVT VT = Y.getValueType();
1217 
1218   // FIXME: Support vectors once we have tests.
1219   if (VT.isVector())
1220     return false;
1221 
1222   return Subtarget.hasStdExtZbb() && !isa<ConstantSDNode>(Y);
1223 }
1224 
1225 /// Check if sinking \p I's operands to I's basic block is profitable, because
1226 /// the operands can be folded into a target instruction, e.g.
1227 /// splats of scalars can fold into vector instructions.
1228 bool RISCVTargetLowering::shouldSinkOperands(
1229     Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1230   using namespace llvm::PatternMatch;
1231 
1232   if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1233     return false;
1234 
1235   auto IsSinker = [&](Instruction *I, int Operand) {
1236     switch (I->getOpcode()) {
1237     case Instruction::Add:
1238     case Instruction::Sub:
1239     case Instruction::Mul:
1240     case Instruction::And:
1241     case Instruction::Or:
1242     case Instruction::Xor:
1243     case Instruction::FAdd:
1244     case Instruction::FSub:
1245     case Instruction::FMul:
1246     case Instruction::FDiv:
1247     case Instruction::ICmp:
1248     case Instruction::FCmp:
1249       return true;
1250     case Instruction::Shl:
1251     case Instruction::LShr:
1252     case Instruction::AShr:
1253     case Instruction::UDiv:
1254     case Instruction::SDiv:
1255     case Instruction::URem:
1256     case Instruction::SRem:
1257       return Operand == 1;
1258     case Instruction::Call:
1259       if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1260         switch (II->getIntrinsicID()) {
1261         case Intrinsic::fma:
1262           return Operand == 0 || Operand == 1;
1263         default:
1264           return false;
1265         }
1266       }
1267       return false;
1268     default:
1269       return false;
1270     }
1271   };
1272 
1273   for (auto OpIdx : enumerate(I->operands())) {
1274     if (!IsSinker(I, OpIdx.index()))
1275       continue;
1276 
1277     Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1278     // Make sure we are not already sinking this operand
1279     if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1280       continue;
1281 
1282     // We are looking for a splat that can be sunk.
1283     if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1284                              m_Undef(), m_ZeroMask())))
1285       continue;
1286 
1287     // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1288     // and vector registers
1289     for (Use &U : Op->uses()) {
1290       Instruction *Insn = cast<Instruction>(U.getUser());
1291       if (!IsSinker(Insn, U.getOperandNo()))
1292         return false;
1293     }
1294 
1295     Ops.push_back(&Op->getOperandUse(0));
1296     Ops.push_back(&OpIdx.value());
1297   }
1298   return true;
1299 }
1300 
1301 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1302                                        bool ForCodeSize) const {
1303   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1304   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1305     return false;
1306   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1307     return false;
1308   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1309     return false;
1310   return Imm.isZero();
1311 }
1312 
1313 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
1314   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1315          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1316          (VT == MVT::f64 && Subtarget.hasStdExtD());
1317 }
1318 
1319 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1320                                                       CallingConv::ID CC,
1321                                                       EVT VT) const {
1322   // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1323   // We might still end up using a GPR but that will be decided based on ABI.
1324   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1325   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1326     return MVT::f32;
1327 
1328   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1329 }
1330 
1331 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1332                                                            CallingConv::ID CC,
1333                                                            EVT VT) const {
1334   // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1335   // We might still end up using a GPR but that will be decided based on ABI.
1336   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1337   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1338     return 1;
1339 
1340   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1341 }
1342 
1343 // Changes the condition code and swaps operands if necessary, so the SetCC
1344 // operation matches one of the comparisons supported directly by branches
1345 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1346 // with 1/-1.
1347 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1348                                     ISD::CondCode &CC, SelectionDAG &DAG) {
1349   // Convert X > -1 to X >= 0.
1350   if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1351     RHS = DAG.getConstant(0, DL, RHS.getValueType());
1352     CC = ISD::SETGE;
1353     return;
1354   }
1355   // Convert X < 1 to 0 >= X.
1356   if (CC == ISD::SETLT && isOneConstant(RHS)) {
1357     RHS = LHS;
1358     LHS = DAG.getConstant(0, DL, RHS.getValueType());
1359     CC = ISD::SETGE;
1360     return;
1361   }
1362 
1363   switch (CC) {
1364   default:
1365     break;
1366   case ISD::SETGT:
1367   case ISD::SETLE:
1368   case ISD::SETUGT:
1369   case ISD::SETULE:
1370     CC = ISD::getSetCCSwappedOperands(CC);
1371     std::swap(LHS, RHS);
1372     break;
1373   }
1374 }
1375 
1376 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
1377   assert(VT.isScalableVector() && "Expecting a scalable vector type");
1378   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1379   if (VT.getVectorElementType() == MVT::i1)
1380     KnownSize *= 8;
1381 
1382   switch (KnownSize) {
1383   default:
1384     llvm_unreachable("Invalid LMUL.");
1385   case 8:
1386     return RISCVII::VLMUL::LMUL_F8;
1387   case 16:
1388     return RISCVII::VLMUL::LMUL_F4;
1389   case 32:
1390     return RISCVII::VLMUL::LMUL_F2;
1391   case 64:
1392     return RISCVII::VLMUL::LMUL_1;
1393   case 128:
1394     return RISCVII::VLMUL::LMUL_2;
1395   case 256:
1396     return RISCVII::VLMUL::LMUL_4;
1397   case 512:
1398     return RISCVII::VLMUL::LMUL_8;
1399   }
1400 }
1401 
1402 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
1403   switch (LMul) {
1404   default:
1405     llvm_unreachable("Invalid LMUL.");
1406   case RISCVII::VLMUL::LMUL_F8:
1407   case RISCVII::VLMUL::LMUL_F4:
1408   case RISCVII::VLMUL::LMUL_F2:
1409   case RISCVII::VLMUL::LMUL_1:
1410     return RISCV::VRRegClassID;
1411   case RISCVII::VLMUL::LMUL_2:
1412     return RISCV::VRM2RegClassID;
1413   case RISCVII::VLMUL::LMUL_4:
1414     return RISCV::VRM4RegClassID;
1415   case RISCVII::VLMUL::LMUL_8:
1416     return RISCV::VRM8RegClassID;
1417   }
1418 }
1419 
1420 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
1421   RISCVII::VLMUL LMUL = getLMUL(VT);
1422   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1423       LMUL == RISCVII::VLMUL::LMUL_F4 ||
1424       LMUL == RISCVII::VLMUL::LMUL_F2 ||
1425       LMUL == RISCVII::VLMUL::LMUL_1) {
1426     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1427                   "Unexpected subreg numbering");
1428     return RISCV::sub_vrm1_0 + Index;
1429   }
1430   if (LMUL == RISCVII::VLMUL::LMUL_2) {
1431     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1432                   "Unexpected subreg numbering");
1433     return RISCV::sub_vrm2_0 + Index;
1434   }
1435   if (LMUL == RISCVII::VLMUL::LMUL_4) {
1436     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1437                   "Unexpected subreg numbering");
1438     return RISCV::sub_vrm4_0 + Index;
1439   }
1440   llvm_unreachable("Invalid vector type.");
1441 }
1442 
1443 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
1444   if (VT.getVectorElementType() == MVT::i1)
1445     return RISCV::VRRegClassID;
1446   return getRegClassIDForLMUL(getLMUL(VT));
1447 }
1448 
1449 // Attempt to decompose a subvector insert/extract between VecVT and
1450 // SubVecVT via subregister indices. Returns the subregister index that
1451 // can perform the subvector insert/extract with the given element index, as
1452 // well as the index corresponding to any leftover subvectors that must be
1453 // further inserted/extracted within the register class for SubVecVT.
1454 std::pair<unsigned, unsigned>
1455 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1456     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1457     const RISCVRegisterInfo *TRI) {
1458   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1459                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1460                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1461                 "Register classes not ordered");
1462   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1463   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1464   // Try to compose a subregister index that takes us from the incoming
1465   // LMUL>1 register class down to the outgoing one. At each step we half
1466   // the LMUL:
1467   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1468   // Note that this is not guaranteed to find a subregister index, such as
1469   // when we are extracting from one VR type to another.
1470   unsigned SubRegIdx = RISCV::NoSubRegister;
1471   for (const unsigned RCID :
1472        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1473     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1474       VecVT = VecVT.getHalfNumVectorElementsVT();
1475       bool IsHi =
1476           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1477       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1478                                             getSubregIndexByMVT(VecVT, IsHi));
1479       if (IsHi)
1480         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1481     }
1482   return {SubRegIdx, InsertExtractIdx};
1483 }
1484 
1485 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1486 // stores for those types.
1487 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1488   return !Subtarget.useRVVForFixedLengthVectors() ||
1489          (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
1490 }
1491 
1492 bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
1493   if (ScalarTy->isPointerTy())
1494     return true;
1495 
1496   if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1497       ScalarTy->isIntegerTy(32))
1498     return true;
1499 
1500   if (ScalarTy->isIntegerTy(64))
1501     return Subtarget.hasVInstructionsI64();
1502 
1503   if (ScalarTy->isHalfTy())
1504     return Subtarget.hasVInstructionsF16();
1505   if (ScalarTy->isFloatTy())
1506     return Subtarget.hasVInstructionsF32();
1507   if (ScalarTy->isDoubleTy())
1508     return Subtarget.hasVInstructionsF64();
1509 
1510   return false;
1511 }
1512 
1513 static bool useRVVForFixedLengthVectorVT(MVT VT,
1514                                          const RISCVSubtarget &Subtarget) {
1515   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1516   if (!Subtarget.useRVVForFixedLengthVectors())
1517     return false;
1518 
1519   // We only support a set of vector types with a consistent maximum fixed size
1520   // across all supported vector element types to avoid legalization issues.
1521   // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1522   // fixed-length vector type we support is 1024 bytes.
1523   if (VT.getFixedSizeInBits() > 1024 * 8)
1524     return false;
1525 
1526   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1527 
1528   MVT EltVT = VT.getVectorElementType();
1529 
1530   // Don't use RVV for vectors we cannot scalarize if required.
1531   switch (EltVT.SimpleTy) {
1532   // i1 is supported but has different rules.
1533   default:
1534     return false;
1535   case MVT::i1:
1536     // Masks can only use a single register.
1537     if (VT.getVectorNumElements() > MinVLen)
1538       return false;
1539     MinVLen /= 8;
1540     break;
1541   case MVT::i8:
1542   case MVT::i16:
1543   case MVT::i32:
1544     break;
1545   case MVT::i64:
1546     if (!Subtarget.hasVInstructionsI64())
1547       return false;
1548     break;
1549   case MVT::f16:
1550     if (!Subtarget.hasVInstructionsF16())
1551       return false;
1552     break;
1553   case MVT::f32:
1554     if (!Subtarget.hasVInstructionsF32())
1555       return false;
1556     break;
1557   case MVT::f64:
1558     if (!Subtarget.hasVInstructionsF64())
1559       return false;
1560     break;
1561   }
1562 
1563   // Reject elements larger than ELEN.
1564   if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
1565     return false;
1566 
1567   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1568   // Don't use RVV for types that don't fit.
1569   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1570     return false;
1571 
1572   // TODO: Perhaps an artificial restriction, but worth having whilst getting
1573   // the base fixed length RVV support in place.
1574   if (!VT.isPow2VectorType())
1575     return false;
1576 
1577   return true;
1578 }
1579 
1580 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1581   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1582 }
1583 
1584 // Return the largest legal scalable vector type that matches VT's element type.
1585 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
1586                                             const RISCVSubtarget &Subtarget) {
1587   // This may be called before legal types are setup.
1588   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1589           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1590          "Expected legal fixed length vector!");
1591 
1592   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1593   unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
1594 
1595   MVT EltVT = VT.getVectorElementType();
1596   switch (EltVT.SimpleTy) {
1597   default:
1598     llvm_unreachable("unexpected element type for RVV container");
1599   case MVT::i1:
1600   case MVT::i8:
1601   case MVT::i16:
1602   case MVT::i32:
1603   case MVT::i64:
1604   case MVT::f16:
1605   case MVT::f32:
1606   case MVT::f64: {
1607     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1608     // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1609     // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1610     unsigned NumElts =
1611         (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1612     NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1613     assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1614     return MVT::getScalableVectorVT(EltVT, NumElts);
1615   }
1616   }
1617 }
1618 
1619 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
1620                                             const RISCVSubtarget &Subtarget) {
1621   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1622                                           Subtarget);
1623 }
1624 
1625 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1626   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
1627 }
1628 
1629 // Grow V to consume an entire RVV register.
1630 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1631                                        const RISCVSubtarget &Subtarget) {
1632   assert(VT.isScalableVector() &&
1633          "Expected to convert into a scalable vector!");
1634   assert(V.getValueType().isFixedLengthVector() &&
1635          "Expected a fixed length vector operand!");
1636   SDLoc DL(V);
1637   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1638   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1639 }
1640 
1641 // Shrink V so it's just big enough to maintain a VT's worth of data.
1642 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1643                                          const RISCVSubtarget &Subtarget) {
1644   assert(VT.isFixedLengthVector() &&
1645          "Expected to convert into a fixed length vector!");
1646   assert(V.getValueType().isScalableVector() &&
1647          "Expected a scalable vector operand!");
1648   SDLoc DL(V);
1649   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1650   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1651 }
1652 
1653 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1654 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1655 // the vector type that it is contained in.
1656 static std::pair<SDValue, SDValue>
1657 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1658                 const RISCVSubtarget &Subtarget) {
1659   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1660   MVT XLenVT = Subtarget.getXLenVT();
1661   SDValue VL = VecVT.isFixedLengthVector()
1662                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1663                    : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1664   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1665   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1666   return {Mask, VL};
1667 }
1668 
1669 // As above but assuming the given type is a scalable vector type.
1670 static std::pair<SDValue, SDValue>
1671 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1672                         const RISCVSubtarget &Subtarget) {
1673   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1674   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1675 }
1676 
1677 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1678 // of either is (currently) supported. This can get us into an infinite loop
1679 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1680 // as a ..., etc.
1681 // Until either (or both) of these can reliably lower any node, reporting that
1682 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1683 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1684 // which is not desirable.
1685 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1686     EVT VT, unsigned DefinedValues) const {
1687   return false;
1688 }
1689 
1690 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1691   // Only splats are currently supported.
1692   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1693     return true;
1694 
1695   return false;
1696 }
1697 
1698 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
1699                                   const RISCVSubtarget &Subtarget) {
1700   // RISCV FP-to-int conversions saturate to the destination register size, but
1701   // don't produce 0 for nan. We can use a conversion instruction and fix the
1702   // nan case with a compare and a select.
1703   SDValue Src = Op.getOperand(0);
1704 
1705   EVT DstVT = Op.getValueType();
1706   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1707 
1708   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1709   unsigned Opc;
1710   if (SatVT == DstVT)
1711     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
1712   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1713     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
1714   else
1715     return SDValue();
1716   // FIXME: Support other SatVTs by clamping before or after the conversion.
1717 
1718   SDLoc DL(Op);
1719   SDValue FpToInt = DAG.getNode(
1720       Opc, DL, DstVT, Src,
1721       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
1722 
1723   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1724   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1725 }
1726 
1727 // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
1728 // and back. Taking care to avoid converting values that are nan or already
1729 // correct.
1730 // TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
1731 // have FRM dependencies modeled yet.
1732 static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
1733   MVT VT = Op.getSimpleValueType();
1734   assert(VT.isVector() && "Unexpected type");
1735 
1736   SDLoc DL(Op);
1737 
1738   // Freeze the source since we are increasing the number of uses.
1739   SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0));
1740 
1741   // Truncate to integer and convert back to FP.
1742   MVT IntVT = VT.changeVectorElementTypeToInteger();
1743   SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src);
1744   Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1745 
1746   MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1747 
1748   if (Op.getOpcode() == ISD::FCEIL) {
1749     // If the truncated value is the greater than or equal to the original
1750     // value, we've computed the ceil. Otherwise, we went the wrong way and
1751     // need to increase by 1.
1752     // FIXME: This should use a masked operation. Handle here or in isel?
1753     SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated,
1754                                  DAG.getConstantFP(1.0, DL, VT));
1755     SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT);
1756     Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1757   } else if (Op.getOpcode() == ISD::FFLOOR) {
1758     // If the truncated value is the less than or equal to the original value,
1759     // we've computed the floor. Otherwise, we went the wrong way and need to
1760     // decrease by 1.
1761     // FIXME: This should use a masked operation. Handle here or in isel?
1762     SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated,
1763                                  DAG.getConstantFP(1.0, DL, VT));
1764     SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT);
1765     Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1766   }
1767 
1768   // Restore the original sign so that -0.0 is preserved.
1769   Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1770 
1771   // Determine the largest integer that can be represented exactly. This and
1772   // values larger than it don't have any fractional bits so don't need to
1773   // be converted.
1774   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1775   unsigned Precision = APFloat::semanticsPrecision(FltSem);
1776   APFloat MaxVal = APFloat(FltSem);
1777   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1778                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1779   SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1780 
1781   // If abs(Src) was larger than MaxVal or nan, keep it.
1782   SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1783   SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1784   return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1785 }
1786 
1787 static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
1788                                  const RISCVSubtarget &Subtarget) {
1789   MVT VT = Op.getSimpleValueType();
1790   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1791 
1792   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1793 
1794   SDLoc DL(Op);
1795   SDValue Mask, VL;
1796   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1797 
1798   unsigned Opc =
1799       VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
1800   SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
1801   return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1802 }
1803 
1804 struct VIDSequence {
1805   int64_t StepNumerator;
1806   unsigned StepDenominator;
1807   int64_t Addend;
1808 };
1809 
1810 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1811 // to the (non-zero) step S and start value X. This can be then lowered as the
1812 // RVV sequence (VID * S) + X, for example.
1813 // The step S is represented as an integer numerator divided by a positive
1814 // denominator. Note that the implementation currently only identifies
1815 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1816 // cannot detect 2/3, for example.
1817 // Note that this method will also match potentially unappealing index
1818 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1819 // determine whether this is worth generating code for.
1820 static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
1821   unsigned NumElts = Op.getNumOperands();
1822   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1823   if (!Op.getValueType().isInteger())
1824     return None;
1825 
1826   Optional<unsigned> SeqStepDenom;
1827   Optional<int64_t> SeqStepNum, SeqAddend;
1828   Optional<std::pair<uint64_t, unsigned>> PrevElt;
1829   unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1830   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1831     // Assume undef elements match the sequence; we just have to be careful
1832     // when interpolating across them.
1833     if (Op.getOperand(Idx).isUndef())
1834       continue;
1835     // The BUILD_VECTOR must be all constants.
1836     if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1837       return None;
1838 
1839     uint64_t Val = Op.getConstantOperandVal(Idx) &
1840                    maskTrailingOnes<uint64_t>(EltSizeInBits);
1841 
1842     if (PrevElt) {
1843       // Calculate the step since the last non-undef element, and ensure
1844       // it's consistent across the entire sequence.
1845       unsigned IdxDiff = Idx - PrevElt->second;
1846       int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1847 
1848       // A zero-value value difference means that we're somewhere in the middle
1849       // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1850       // step change before evaluating the sequence.
1851       if (ValDiff != 0) {
1852         int64_t Remainder = ValDiff % IdxDiff;
1853         // Normalize the step if it's greater than 1.
1854         if (Remainder != ValDiff) {
1855           // The difference must cleanly divide the element span.
1856           if (Remainder != 0)
1857             return None;
1858           ValDiff /= IdxDiff;
1859           IdxDiff = 1;
1860         }
1861 
1862         if (!SeqStepNum)
1863           SeqStepNum = ValDiff;
1864         else if (ValDiff != SeqStepNum)
1865           return None;
1866 
1867         if (!SeqStepDenom)
1868           SeqStepDenom = IdxDiff;
1869         else if (IdxDiff != *SeqStepDenom)
1870           return None;
1871       }
1872     }
1873 
1874     // Record and/or check any addend.
1875     if (SeqStepNum && SeqStepDenom) {
1876       uint64_t ExpectedVal =
1877           (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1878       int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1879       if (!SeqAddend)
1880         SeqAddend = Addend;
1881       else if (SeqAddend != Addend)
1882         return None;
1883     }
1884 
1885     // Record this non-undef element for later.
1886     if (!PrevElt || PrevElt->first != Val)
1887       PrevElt = std::make_pair(Val, Idx);
1888   }
1889   // We need to have logged both a step and an addend for this to count as
1890   // a legal index sequence.
1891   if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
1892     return None;
1893 
1894   return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
1895 }
1896 
1897 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
1898                                  const RISCVSubtarget &Subtarget) {
1899   MVT VT = Op.getSimpleValueType();
1900   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1901 
1902   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1903 
1904   SDLoc DL(Op);
1905   SDValue Mask, VL;
1906   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1907 
1908   MVT XLenVT = Subtarget.getXLenVT();
1909   unsigned NumElts = Op.getNumOperands();
1910 
1911   if (VT.getVectorElementType() == MVT::i1) {
1912     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1913       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1914       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1915     }
1916 
1917     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1918       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1919       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1920     }
1921 
1922     // Lower constant mask BUILD_VECTORs via an integer vector type, in
1923     // scalar integer chunks whose bit-width depends on the number of mask
1924     // bits and XLEN.
1925     // First, determine the most appropriate scalar integer type to use. This
1926     // is at most XLenVT, but may be shrunk to a smaller vector element type
1927     // according to the size of the final vector - use i8 chunks rather than
1928     // XLenVT if we're producing a v8i1. This results in more consistent
1929     // codegen across RV32 and RV64.
1930     unsigned NumViaIntegerBits =
1931         std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1932     NumViaIntegerBits = std::min(NumViaIntegerBits,
1933                                  Subtarget.getMaxELENForFixedLengthVectors());
1934     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1935       // If we have to use more than one INSERT_VECTOR_ELT then this
1936       // optimization is likely to increase code size; avoid peforming it in
1937       // such a case. We can use a load from a constant pool in this case.
1938       if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1939         return SDValue();
1940       // Now we can create our integer vector type. Note that it may be larger
1941       // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1942       MVT IntegerViaVecVT =
1943           MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1944                            divideCeil(NumElts, NumViaIntegerBits));
1945 
1946       uint64_t Bits = 0;
1947       unsigned BitPos = 0, IntegerEltIdx = 0;
1948       SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1949 
1950       for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1951         // Once we accumulate enough bits to fill our scalar type, insert into
1952         // our vector and clear our accumulated data.
1953         if (I != 0 && I % NumViaIntegerBits == 0) {
1954           if (NumViaIntegerBits <= 32)
1955             Bits = SignExtend64(Bits, 32);
1956           SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1957           Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1958                             Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1959           Bits = 0;
1960           BitPos = 0;
1961           IntegerEltIdx++;
1962         }
1963         SDValue V = Op.getOperand(I);
1964         bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
1965         Bits |= ((uint64_t)BitValue << BitPos);
1966       }
1967 
1968       // Insert the (remaining) scalar value into position in our integer
1969       // vector type.
1970       if (NumViaIntegerBits <= 32)
1971         Bits = SignExtend64(Bits, 32);
1972       SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1973       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
1974                         DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1975 
1976       if (NumElts < NumViaIntegerBits) {
1977         // If we're producing a smaller vector than our minimum legal integer
1978         // type, bitcast to the equivalent (known-legal) mask type, and extract
1979         // our final mask.
1980         assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
1981         Vec = DAG.getBitcast(MVT::v8i1, Vec);
1982         Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
1983                           DAG.getConstant(0, DL, XLenVT));
1984       } else {
1985         // Else we must have produced an integer type with the same size as the
1986         // mask type; bitcast for the final result.
1987         assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
1988         Vec = DAG.getBitcast(VT, Vec);
1989       }
1990 
1991       return Vec;
1992     }
1993 
1994     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
1995     // vector type, we have a legal equivalently-sized i8 type, so we can use
1996     // that.
1997     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
1998     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
1999 
2000     SDValue WideVec;
2001     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2002       // For a splat, perform a scalar truncate before creating the wider
2003       // vector.
2004       assert(Splat.getValueType() == XLenVT &&
2005              "Unexpected type for i1 splat value");
2006       Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
2007                           DAG.getConstant(1, DL, XLenVT));
2008       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
2009     } else {
2010       SmallVector<SDValue, 8> Ops(Op->op_values());
2011       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
2012       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
2013       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
2014     }
2015 
2016     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
2017   }
2018 
2019   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2020     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
2021                                         : RISCVISD::VMV_V_X_VL;
2022     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
2023     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2024   }
2025 
2026   // Try and match index sequences, which we can lower to the vid instruction
2027   // with optional modifications. An all-undef vector is matched by
2028   // getSplatValue, above.
2029   if (auto SimpleVID = isSimpleVIDSequence(Op)) {
2030     int64_t StepNumerator = SimpleVID->StepNumerator;
2031     unsigned StepDenominator = SimpleVID->StepDenominator;
2032     int64_t Addend = SimpleVID->Addend;
2033 
2034     assert(StepNumerator != 0 && "Invalid step");
2035     bool Negate = false;
2036     int64_t SplatStepVal = StepNumerator;
2037     unsigned StepOpcode = ISD::MUL;
2038     if (StepNumerator != 1) {
2039       if (isPowerOf2_64(std::abs(StepNumerator))) {
2040         Negate = StepNumerator < 0;
2041         StepOpcode = ISD::SHL;
2042         SplatStepVal = Log2_64(std::abs(StepNumerator));
2043       }
2044     }
2045 
2046     // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
2047     // threshold since it's the immediate value many RVV instructions accept.
2048     // There is no vmul.vi instruction so ensure multiply constant can fit in
2049     // a single addi instruction.
2050     if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
2051          (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
2052         isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) {
2053       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
2054       // Convert right out of the scalable type so we can use standard ISD
2055       // nodes for the rest of the computation. If we used scalable types with
2056       // these, we'd lose the fixed-length vector info and generate worse
2057       // vsetvli code.
2058       VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
2059       if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
2060           (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
2061         SDValue SplatStep = DAG.getSplatVector(
2062             VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
2063         VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
2064       }
2065       if (StepDenominator != 1) {
2066         SDValue SplatStep = DAG.getSplatVector(
2067             VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
2068         VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
2069       }
2070       if (Addend != 0 || Negate) {
2071         SDValue SplatAddend =
2072             DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
2073         VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
2074       }
2075       return VID;
2076     }
2077   }
2078 
2079   // Attempt to detect "hidden" splats, which only reveal themselves as splats
2080   // when re-interpreted as a vector with a larger element type. For example,
2081   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
2082   // could be instead splat as
2083   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
2084   // TODO: This optimization could also work on non-constant splats, but it
2085   // would require bit-manipulation instructions to construct the splat value.
2086   SmallVector<SDValue> Sequence;
2087   unsigned EltBitSize = VT.getScalarSizeInBits();
2088   const auto *BV = cast<BuildVectorSDNode>(Op);
2089   if (VT.isInteger() && EltBitSize < 64 &&
2090       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
2091       BV->getRepeatedSequence(Sequence) &&
2092       (Sequence.size() * EltBitSize) <= 64) {
2093     unsigned SeqLen = Sequence.size();
2094     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
2095     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
2096     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
2097             ViaIntVT == MVT::i64) &&
2098            "Unexpected sequence type");
2099 
2100     unsigned EltIdx = 0;
2101     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2102     uint64_t SplatValue = 0;
2103     // Construct the amalgamated value which can be splatted as this larger
2104     // vector type.
2105     for (const auto &SeqV : Sequence) {
2106       if (!SeqV.isUndef())
2107         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2108                        << (EltIdx * EltBitSize));
2109       EltIdx++;
2110     }
2111 
2112     // On RV64, sign-extend from 32 to 64 bits where possible in order to
2113     // achieve better constant materializion.
2114     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
2115       SplatValue = SignExtend64(SplatValue, 32);
2116 
2117     // Since we can't introduce illegal i64 types at this stage, we can only
2118     // perform an i64 splat on RV32 if it is its own sign-extended value. That
2119     // way we can use RVV instructions to splat.
2120     assert((ViaIntVT.bitsLE(XLenVT) ||
2121             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
2122            "Unexpected bitcast sequence");
2123     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2124       SDValue ViaVL =
2125           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
2126       MVT ViaContainerVT =
2127           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
2128       SDValue Splat =
2129           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
2130                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
2131       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
2132       return DAG.getBitcast(VT, Splat);
2133     }
2134   }
2135 
2136   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
2137   // which constitute a large proportion of the elements. In such cases we can
2138   // splat a vector with the dominant element and make up the shortfall with
2139   // INSERT_VECTOR_ELTs.
2140   // Note that this includes vectors of 2 elements by association. The
2141   // upper-most element is the "dominant" one, allowing us to use a splat to
2142   // "insert" the upper element, and an insert of the lower element at position
2143   // 0, which improves codegen.
2144   SDValue DominantValue;
2145   unsigned MostCommonCount = 0;
2146   DenseMap<SDValue, unsigned> ValueCounts;
2147   unsigned NumUndefElts =
2148       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
2149 
2150   // Track the number of scalar loads we know we'd be inserting, estimated as
2151   // any non-zero floating-point constant. Other kinds of element are either
2152   // already in registers or are materialized on demand. The threshold at which
2153   // a vector load is more desirable than several scalar materializion and
2154   // vector-insertion instructions is not known.
2155   unsigned NumScalarLoads = 0;
2156 
2157   for (SDValue V : Op->op_values()) {
2158     if (V.isUndef())
2159       continue;
2160 
2161     ValueCounts.insert(std::make_pair(V, 0));
2162     unsigned &Count = ValueCounts[V];
2163 
2164     if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2165       NumScalarLoads += !CFP->isExactlyValue(+0.0);
2166 
2167     // Is this value dominant? In case of a tie, prefer the highest element as
2168     // it's cheaper to insert near the beginning of a vector than it is at the
2169     // end.
2170     if (++Count >= MostCommonCount) {
2171       DominantValue = V;
2172       MostCommonCount = Count;
2173     }
2174   }
2175 
2176   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
2177   unsigned NumDefElts = NumElts - NumUndefElts;
2178   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2179 
2180   // Don't perform this optimization when optimizing for size, since
2181   // materializing elements and inserting them tends to cause code bloat.
2182   if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
2183       ((MostCommonCount > DominantValueCountThreshold) ||
2184        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
2185     // Start by splatting the most common element.
2186     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
2187 
2188     DenseSet<SDValue> Processed{DominantValue};
2189     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
2190     for (const auto &OpIdx : enumerate(Op->ops())) {
2191       const SDValue &V = OpIdx.value();
2192       if (V.isUndef() || !Processed.insert(V).second)
2193         continue;
2194       if (ValueCounts[V] == 1) {
2195         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
2196                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
2197       } else {
2198         // Blend in all instances of this value using a VSELECT, using a
2199         // mask where each bit signals whether that element is the one
2200         // we're after.
2201         SmallVector<SDValue> Ops;
2202         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
2203           return DAG.getConstant(V == V1, DL, XLenVT);
2204         });
2205         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
2206                           DAG.getBuildVector(SelMaskTy, DL, Ops),
2207                           DAG.getSplatBuildVector(VT, DL, V), Vec);
2208       }
2209     }
2210 
2211     return Vec;
2212   }
2213 
2214   return SDValue();
2215 }
2216 
2217 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
2218                                    SDValue Hi, SDValue VL, SelectionDAG &DAG) {
2219   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2220     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2221     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2222     // If Hi constant is all the same sign bit as Lo, lower this as a custom
2223     // node in order to try and match RVV vector/scalar instructions.
2224     if ((LoC >> 31) == HiC)
2225       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
2226 
2227     // If vl is equal to VLMax and Hi constant is equal to Lo, we could use
2228     // vmv.v.x whose EEW = 32 to lower it.
2229     auto *Const = dyn_cast<ConstantSDNode>(VL);
2230     if (LoC == HiC && Const && Const->getSExtValue() == RISCV::VLMaxSentinel) {
2231       MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
2232       // TODO: if vl <= min(VLMAX), we can also do this. But we could not
2233       // access the subtarget here now.
2234       auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, Lo, VL);
2235       return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
2236     }
2237   }
2238 
2239   // Fall back to a stack store and stride x0 vector load.
2240   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
2241 }
2242 
2243 // Called by type legalization to handle splat of i64 on RV32.
2244 // FIXME: We can optimize this when the type has sign or zero bits in one
2245 // of the halves.
2246 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
2247                                    SDValue VL, SelectionDAG &DAG) {
2248   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2249   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2250                            DAG.getConstant(0, DL, MVT::i32));
2251   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2252                            DAG.getConstant(1, DL, MVT::i32));
2253   return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
2254 }
2255 
2256 // This function lowers a splat of a scalar operand Splat with the vector
2257 // length VL. It ensures the final sequence is type legal, which is useful when
2258 // lowering a splat after type legalization.
2259 static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
2260                                 SelectionDAG &DAG,
2261                                 const RISCVSubtarget &Subtarget) {
2262   if (VT.isFloatingPoint()) {
2263     // If VL is 1, we could use vfmv.s.f.
2264     if (isOneConstant(VL))
2265       return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT),
2266                          Scalar, VL);
2267     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
2268   }
2269 
2270   MVT XLenVT = Subtarget.getXLenVT();
2271 
2272   // Simplest case is that the operand needs to be promoted to XLenVT.
2273   if (Scalar.getValueType().bitsLE(XLenVT)) {
2274     // If the operand is a constant, sign extend to increase our chances
2275     // of being able to use a .vi instruction. ANY_EXTEND would become a
2276     // a zero extend and the simm5 check in isel would fail.
2277     // FIXME: Should we ignore the upper bits in isel instead?
2278     unsigned ExtOpc =
2279         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2280     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2281     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
2282     // If VL is 1 and the scalar value won't benefit from immediate, we could
2283     // use vmv.s.x.
2284     if (isOneConstant(VL) &&
2285         (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2286       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
2287                          VL);
2288     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
2289   }
2290 
2291   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2292          "Unexpected scalar for splat lowering!");
2293 
2294   if (isOneConstant(VL) && isNullConstant(Scalar))
2295     return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT),
2296                        DAG.getConstant(0, DL, XLenVT), VL);
2297 
2298   // Otherwise use the more complicated splatting algorithm.
2299   return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
2300 }
2301 
2302 // Is the mask a slidedown that shifts in undefs.
2303 static int matchShuffleAsSlideDown(ArrayRef<int> Mask) {
2304   int Size = Mask.size();
2305 
2306   // Elements shifted in should be undef.
2307   auto CheckUndefs = [&](int Shift) {
2308     for (int i = Size - Shift; i != Size; ++i)
2309       if (Mask[i] >= 0)
2310         return false;
2311     return true;
2312   };
2313 
2314   // Elements should be shifted or undef.
2315   auto MatchShift = [&](int Shift) {
2316     for (int i = 0; i != Size - Shift; ++i)
2317        if (Mask[i] >= 0 && Mask[i] != Shift + i)
2318          return false;
2319     return true;
2320   };
2321 
2322   // Try all possible shifts.
2323   for (int Shift = 1; Shift != Size; ++Shift)
2324     if (CheckUndefs(Shift) && MatchShift(Shift))
2325       return Shift;
2326 
2327   // No match.
2328   return -1;
2329 }
2330 
2331 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
2332                                 const RISCVSubtarget &Subtarget) {
2333   // We need to be able to widen elements to the next larger integer type.
2334   if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors())
2335     return false;
2336 
2337   int Size = Mask.size();
2338   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
2339 
2340   int Srcs[] = {-1, -1};
2341   for (int i = 0; i != Size; ++i) {
2342     // Ignore undef elements.
2343     if (Mask[i] < 0)
2344       continue;
2345 
2346     // Is this an even or odd element.
2347     int Pol = i % 2;
2348 
2349     // Ensure we consistently use the same source for this element polarity.
2350     int Src = Mask[i] / Size;
2351     if (Srcs[Pol] < 0)
2352       Srcs[Pol] = Src;
2353     if (Srcs[Pol] != Src)
2354       return false;
2355 
2356     // Make sure the element within the source is appropriate for this element
2357     // in the destination.
2358     int Elt = Mask[i] % Size;
2359     if (Elt != i / 2)
2360       return false;
2361   }
2362 
2363   // We need to find a source for each polarity and they can't be the same.
2364   if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
2365     return false;
2366 
2367   // Swap the sources if the second source was in the even polarity.
2368   SwapSources = Srcs[0] > Srcs[1];
2369 
2370   return true;
2371 }
2372 
2373 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
2374                                    const RISCVSubtarget &Subtarget) {
2375   SDValue V1 = Op.getOperand(0);
2376   SDValue V2 = Op.getOperand(1);
2377   SDLoc DL(Op);
2378   MVT XLenVT = Subtarget.getXLenVT();
2379   MVT VT = Op.getSimpleValueType();
2380   unsigned NumElts = VT.getVectorNumElements();
2381   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
2382 
2383   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2384 
2385   SDValue TrueMask, VL;
2386   std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2387 
2388   if (SVN->isSplat()) {
2389     const int Lane = SVN->getSplatIndex();
2390     if (Lane >= 0) {
2391       MVT SVT = VT.getVectorElementType();
2392 
2393       // Turn splatted vector load into a strided load with an X0 stride.
2394       SDValue V = V1;
2395       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
2396       // with undef.
2397       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
2398       int Offset = Lane;
2399       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
2400         int OpElements =
2401             V.getOperand(0).getSimpleValueType().getVectorNumElements();
2402         V = V.getOperand(Offset / OpElements);
2403         Offset %= OpElements;
2404       }
2405 
2406       // We need to ensure the load isn't atomic or volatile.
2407       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
2408         auto *Ld = cast<LoadSDNode>(V);
2409         Offset *= SVT.getStoreSize();
2410         SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2411                                                    TypeSize::Fixed(Offset), DL);
2412 
2413         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2414         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2415           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2416           SDValue IntID =
2417               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2418           SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
2419                            DAG.getRegister(RISCV::X0, XLenVT), VL};
2420           SDValue NewLoad = DAG.getMemIntrinsicNode(
2421               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2422               DAG.getMachineFunction().getMachineMemOperand(
2423                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2424           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2425           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2426         }
2427 
2428         // Otherwise use a scalar load and splat. This will give the best
2429         // opportunity to fold a splat into the operation. ISel can turn it into
2430         // the x0 strided load if we aren't able to fold away the select.
2431         if (SVT.isFloatingPoint())
2432           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2433                           Ld->getPointerInfo().getWithOffset(Offset),
2434                           Ld->getOriginalAlign(),
2435                           Ld->getMemOperand()->getFlags());
2436         else
2437           V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2438                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
2439                              Ld->getOriginalAlign(),
2440                              Ld->getMemOperand()->getFlags());
2441         DAG.makeEquivalentMemoryOrdering(Ld, V);
2442 
2443         unsigned Opc =
2444             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
2445         SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
2446         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2447       }
2448 
2449       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2450       assert(Lane < (int)NumElts && "Unexpected lane!");
2451       SDValue Gather =
2452           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
2453                       DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
2454       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2455     }
2456   }
2457 
2458   ArrayRef<int> Mask = SVN->getMask();
2459 
2460   // Try to match as a slidedown.
2461   int SlideAmt = matchShuffleAsSlideDown(Mask);
2462   if (SlideAmt >= 0) {
2463     // TODO: Should we reduce the VL to account for the upper undef elements?
2464     // Requires additional vsetvlis, but might be faster to execute.
2465     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2466     SDValue SlideDown =
2467         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2468                     DAG.getUNDEF(ContainerVT), V1,
2469                     DAG.getConstant(SlideAmt, DL, XLenVT),
2470                     TrueMask, VL);
2471     return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
2472   }
2473 
2474   // Detect an interleave shuffle and lower to
2475   // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
2476   bool SwapSources;
2477   if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
2478     // Swap sources if needed.
2479     if (SwapSources)
2480       std::swap(V1, V2);
2481 
2482     // Extract the lower half of the vectors.
2483     MVT HalfVT = VT.getHalfNumVectorElementsVT();
2484     V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
2485                      DAG.getConstant(0, DL, XLenVT));
2486     V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
2487                      DAG.getConstant(0, DL, XLenVT));
2488 
2489     // Double the element width and halve the number of elements in an int type.
2490     unsigned EltBits = VT.getScalarSizeInBits();
2491     MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
2492     MVT WideIntVT =
2493         MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
2494     // Convert this to a scalable vector. We need to base this on the
2495     // destination size to ensure there's always a type with a smaller LMUL.
2496     MVT WideIntContainerVT =
2497         getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
2498 
2499     // Convert sources to scalable vectors with the same element count as the
2500     // larger type.
2501     MVT HalfContainerVT = MVT::getVectorVT(
2502         VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
2503     V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
2504     V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
2505 
2506     // Cast sources to integer.
2507     MVT IntEltVT = MVT::getIntegerVT(EltBits);
2508     MVT IntHalfVT =
2509         MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
2510     V1 = DAG.getBitcast(IntHalfVT, V1);
2511     V2 = DAG.getBitcast(IntHalfVT, V2);
2512 
2513     // Freeze V2 since we use it twice and we need to be sure that the add and
2514     // multiply see the same value.
2515     V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2);
2516 
2517     // Recreate TrueMask using the widened type's element count.
2518     MVT MaskVT =
2519         MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount());
2520     TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2521 
2522     // Widen V1 and V2 with 0s and add one copy of V2 to V1.
2523     SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
2524                               V2, TrueMask, VL);
2525     // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
2526     SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
2527                                      DAG.getAllOnesConstant(DL, XLenVT));
2528     SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
2529                                    V2, Multiplier, TrueMask, VL);
2530     // Add the new copies to our previous addition giving us 2^eltbits copies of
2531     // V2. This is equivalent to shifting V2 left by eltbits. This should
2532     // combine with the vwmulu.vv above to form vwmaccu.vv.
2533     Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
2534                       TrueMask, VL);
2535     // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
2536     // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
2537     // vector VT.
2538     ContainerVT =
2539         MVT::getVectorVT(VT.getVectorElementType(),
2540                          WideIntContainerVT.getVectorElementCount() * 2);
2541     Add = DAG.getBitcast(ContainerVT, Add);
2542     return convertFromScalableVector(VT, Add, DAG, Subtarget);
2543   }
2544 
2545   // Detect shuffles which can be re-expressed as vector selects; these are
2546   // shuffles in which each element in the destination is taken from an element
2547   // at the corresponding index in either source vectors.
2548   bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
2549     int MaskIndex = MaskIdx.value();
2550     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2551   });
2552 
2553   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2554 
2555   SmallVector<SDValue> MaskVals;
2556   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2557   // merged with a second vrgather.
2558   SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2559 
2560   // By default we preserve the original operand order, and use a mask to
2561   // select LHS as true and RHS as false. However, since RVV vector selects may
2562   // feature splats but only on the LHS, we may choose to invert our mask and
2563   // instead select between RHS and LHS.
2564   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2565   bool InvertMask = IsSelect == SwapOps;
2566 
2567   // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2568   // half.
2569   DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2570 
2571   // Now construct the mask that will be used by the vselect or blended
2572   // vrgather operation. For vrgathers, construct the appropriate indices into
2573   // each vector.
2574   for (int MaskIndex : Mask) {
2575     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2576     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2577     if (!IsSelect) {
2578       bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2579       GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2580                                      ? DAG.getConstant(MaskIndex, DL, XLenVT)
2581                                      : DAG.getUNDEF(XLenVT));
2582       GatherIndicesRHS.push_back(
2583           IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2584                             : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2585       if (IsLHSOrUndefIndex && MaskIndex >= 0)
2586         ++LHSIndexCounts[MaskIndex];
2587       if (!IsLHSOrUndefIndex)
2588         ++RHSIndexCounts[MaskIndex - NumElts];
2589     }
2590   }
2591 
2592   if (SwapOps) {
2593     std::swap(V1, V2);
2594     std::swap(GatherIndicesLHS, GatherIndicesRHS);
2595   }
2596 
2597   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2598   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2599   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2600 
2601   if (IsSelect)
2602     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2603 
2604   if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2605     // On such a large vector we're unable to use i8 as the index type.
2606     // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2607     // may involve vector splitting if we're already at LMUL=8, or our
2608     // user-supplied maximum fixed-length LMUL.
2609     return SDValue();
2610   }
2611 
2612   unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2613   unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2614   MVT IndexVT = VT.changeTypeToInteger();
2615   // Since we can't introduce illegal index types at this stage, use i16 and
2616   // vrgatherei16 if the corresponding index type for plain vrgather is greater
2617   // than XLenVT.
2618   if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2619     GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2620     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2621   }
2622 
2623   MVT IndexContainerVT =
2624       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2625 
2626   SDValue Gather;
2627   // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2628   // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2629   if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2630     Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
2631   } else {
2632     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2633     // If only one index is used, we can use a "splat" vrgather.
2634     // TODO: We can splat the most-common index and fix-up any stragglers, if
2635     // that's beneficial.
2636     if (LHSIndexCounts.size() == 1) {
2637       int SplatIndex = LHSIndexCounts.begin()->getFirst();
2638       Gather =
2639           DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2640                       DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2641     } else {
2642       SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2643       LHSIndices =
2644           convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2645 
2646       Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2647                            TrueMask, VL);
2648     }
2649   }
2650 
2651   // If a second vector operand is used by this shuffle, blend it in with an
2652   // additional vrgather.
2653   if (!V2.isUndef()) {
2654     V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2655     // If only one index is used, we can use a "splat" vrgather.
2656     // TODO: We can splat the most-common index and fix-up any stragglers, if
2657     // that's beneficial.
2658     if (RHSIndexCounts.size() == 1) {
2659       int SplatIndex = RHSIndexCounts.begin()->getFirst();
2660       V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2661                        DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2662     } else {
2663       SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2664       RHSIndices =
2665           convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2666       V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
2667                        VL);
2668     }
2669 
2670     MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2671     SelectMask =
2672         convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2673 
2674     Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
2675                          Gather, VL);
2676   }
2677 
2678   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2679 }
2680 
2681 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
2682                                      SDLoc DL, SelectionDAG &DAG,
2683                                      const RISCVSubtarget &Subtarget) {
2684   if (VT.isScalableVector())
2685     return DAG.getFPExtendOrRound(Op, DL, VT);
2686   assert(VT.isFixedLengthVector() &&
2687          "Unexpected value type for RVV FP extend/round lowering");
2688   SDValue Mask, VL;
2689   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2690   unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
2691                         ? RISCVISD::FP_EXTEND_VL
2692                         : RISCVISD::FP_ROUND_VL;
2693   return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
2694 }
2695 
2696 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
2697 // the exponent.
2698 static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
2699   MVT VT = Op.getSimpleValueType();
2700   unsigned EltSize = VT.getScalarSizeInBits();
2701   SDValue Src = Op.getOperand(0);
2702   SDLoc DL(Op);
2703 
2704   // We need a FP type that can represent the value.
2705   // TODO: Use f16 for i8 when possible?
2706   MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
2707   MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
2708 
2709   // Legal types should have been checked in the RISCVTargetLowering
2710   // constructor.
2711   // TODO: Splitting may make sense in some cases.
2712   assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
2713          "Expected legal float type!");
2714 
2715   // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
2716   // The trailing zero count is equal to log2 of this single bit value.
2717   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
2718     SDValue Neg =
2719         DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
2720     Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
2721   }
2722 
2723   // We have a legal FP type, convert to it.
2724   SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
2725   // Bitcast to integer and shift the exponent to the LSB.
2726   EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
2727   SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
2728   unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
2729   SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
2730                               DAG.getConstant(ShiftAmt, DL, IntVT));
2731   // Truncate back to original type to allow vnsrl.
2732   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
2733   // The exponent contains log2 of the value in biased form.
2734   unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
2735 
2736   // For trailing zeros, we just need to subtract the bias.
2737   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
2738     return DAG.getNode(ISD::SUB, DL, VT, Trunc,
2739                        DAG.getConstant(ExponentBias, DL, VT));
2740 
2741   // For leading zeros, we need to remove the bias and convert from log2 to
2742   // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
2743   unsigned Adjust = ExponentBias + (EltSize - 1);
2744   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
2745 }
2746 
2747 // While RVV has alignment restrictions, we should always be able to load as a
2748 // legal equivalently-sized byte-typed vector instead. This method is
2749 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2750 // the load is already correctly-aligned, it returns SDValue().
2751 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2752                                                     SelectionDAG &DAG) const {
2753   auto *Load = cast<LoadSDNode>(Op);
2754   assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2755 
2756   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
2757                                      Load->getMemoryVT(),
2758                                      *Load->getMemOperand()))
2759     return SDValue();
2760 
2761   SDLoc DL(Op);
2762   MVT VT = Op.getSimpleValueType();
2763   unsigned EltSizeBits = VT.getScalarSizeInBits();
2764   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2765          "Unexpected unaligned RVV load type");
2766   MVT NewVT =
2767       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2768   assert(NewVT.isValid() &&
2769          "Expecting equally-sized RVV vector types to be legal");
2770   SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2771                           Load->getPointerInfo(), Load->getOriginalAlign(),
2772                           Load->getMemOperand()->getFlags());
2773   return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2774 }
2775 
2776 // While RVV has alignment restrictions, we should always be able to store as a
2777 // legal equivalently-sized byte-typed vector instead. This method is
2778 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2779 // returns SDValue() if the store is already correctly aligned.
2780 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2781                                                      SelectionDAG &DAG) const {
2782   auto *Store = cast<StoreSDNode>(Op);
2783   assert(Store && Store->getValue().getValueType().isVector() &&
2784          "Expected vector store");
2785 
2786   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
2787                                      Store->getMemoryVT(),
2788                                      *Store->getMemOperand()))
2789     return SDValue();
2790 
2791   SDLoc DL(Op);
2792   SDValue StoredVal = Store->getValue();
2793   MVT VT = StoredVal.getSimpleValueType();
2794   unsigned EltSizeBits = VT.getScalarSizeInBits();
2795   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2796          "Unexpected unaligned RVV store type");
2797   MVT NewVT =
2798       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2799   assert(NewVT.isValid() &&
2800          "Expecting equally-sized RVV vector types to be legal");
2801   StoredVal = DAG.getBitcast(NewVT, StoredVal);
2802   return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2803                       Store->getPointerInfo(), Store->getOriginalAlign(),
2804                       Store->getMemOperand()->getFlags());
2805 }
2806 
2807 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
2808                                             SelectionDAG &DAG) const {
2809   switch (Op.getOpcode()) {
2810   default:
2811     report_fatal_error("unimplemented operand");
2812   case ISD::GlobalAddress:
2813     return lowerGlobalAddress(Op, DAG);
2814   case ISD::BlockAddress:
2815     return lowerBlockAddress(Op, DAG);
2816   case ISD::ConstantPool:
2817     return lowerConstantPool(Op, DAG);
2818   case ISD::JumpTable:
2819     return lowerJumpTable(Op, DAG);
2820   case ISD::GlobalTLSAddress:
2821     return lowerGlobalTLSAddress(Op, DAG);
2822   case ISD::SELECT:
2823     return lowerSELECT(Op, DAG);
2824   case ISD::BRCOND:
2825     return lowerBRCOND(Op, DAG);
2826   case ISD::VASTART:
2827     return lowerVASTART(Op, DAG);
2828   case ISD::FRAMEADDR:
2829     return lowerFRAMEADDR(Op, DAG);
2830   case ISD::RETURNADDR:
2831     return lowerRETURNADDR(Op, DAG);
2832   case ISD::SHL_PARTS:
2833     return lowerShiftLeftParts(Op, DAG);
2834   case ISD::SRA_PARTS:
2835     return lowerShiftRightParts(Op, DAG, true);
2836   case ISD::SRL_PARTS:
2837     return lowerShiftRightParts(Op, DAG, false);
2838   case ISD::BITCAST: {
2839     SDLoc DL(Op);
2840     EVT VT = Op.getValueType();
2841     SDValue Op0 = Op.getOperand(0);
2842     EVT Op0VT = Op0.getValueType();
2843     MVT XLenVT = Subtarget.getXLenVT();
2844     if (VT.isFixedLengthVector()) {
2845       // We can handle fixed length vector bitcasts with a simple replacement
2846       // in isel.
2847       if (Op0VT.isFixedLengthVector())
2848         return Op;
2849       // When bitcasting from scalar to fixed-length vector, insert the scalar
2850       // into a one-element vector of the result type, and perform a vector
2851       // bitcast.
2852       if (!Op0VT.isVector()) {
2853         EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2854         if (!isTypeLegal(BVT))
2855           return SDValue();
2856         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2857                                               DAG.getUNDEF(BVT), Op0,
2858                                               DAG.getConstant(0, DL, XLenVT)));
2859       }
2860       return SDValue();
2861     }
2862     // Custom-legalize bitcasts from fixed-length vector types to scalar types
2863     // thus: bitcast the vector to a one-element vector type whose element type
2864     // is the same as the result type, and extract the first element.
2865     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2866       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
2867       if (!isTypeLegal(BVT))
2868         return SDValue();
2869       SDValue BVec = DAG.getBitcast(BVT, Op0);
2870       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
2871                          DAG.getConstant(0, DL, XLenVT));
2872     }
2873     if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
2874       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
2875       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
2876       return FPConv;
2877     }
2878     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
2879         Subtarget.hasStdExtF()) {
2880       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2881       SDValue FPConv =
2882           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
2883       return FPConv;
2884     }
2885     return SDValue();
2886   }
2887   case ISD::INTRINSIC_WO_CHAIN:
2888     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2889   case ISD::INTRINSIC_W_CHAIN:
2890     return LowerINTRINSIC_W_CHAIN(Op, DAG);
2891   case ISD::INTRINSIC_VOID:
2892     return LowerINTRINSIC_VOID(Op, DAG);
2893   case ISD::BSWAP:
2894   case ISD::BITREVERSE: {
2895     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
2896     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2897     MVT VT = Op.getSimpleValueType();
2898     SDLoc DL(Op);
2899     // Start with the maximum immediate value which is the bitwidth - 1.
2900     unsigned Imm = VT.getSizeInBits() - 1;
2901     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
2902     if (Op.getOpcode() == ISD::BSWAP)
2903       Imm &= ~0x7U;
2904     return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
2905                        DAG.getConstant(Imm, DL, VT));
2906   }
2907   case ISD::FSHL:
2908   case ISD::FSHR: {
2909     MVT VT = Op.getSimpleValueType();
2910     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
2911     SDLoc DL(Op);
2912     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
2913     // use log(XLen) bits. Mask the shift amount accordingly to prevent
2914     // accidentally setting the extra bit.
2915     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
2916     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
2917                                 DAG.getConstant(ShAmtWidth, DL, VT));
2918     // fshl and fshr concatenate their operands in the same order. fsr and fsl
2919     // instruction use different orders. fshl will return its first operand for
2920     // shift of zero, fshr will return its second operand. fsl and fsr both
2921     // return rs1 so the ISD nodes need to have different operand orders.
2922     // Shift amount is in rs2.
2923     SDValue Op0 = Op.getOperand(0);
2924     SDValue Op1 = Op.getOperand(1);
2925     unsigned Opc = RISCVISD::FSL;
2926     if (Op.getOpcode() == ISD::FSHR) {
2927       std::swap(Op0, Op1);
2928       Opc = RISCVISD::FSR;
2929     }
2930     return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
2931   }
2932   case ISD::TRUNCATE: {
2933     SDLoc DL(Op);
2934     MVT VT = Op.getSimpleValueType();
2935     // Only custom-lower vector truncates
2936     if (!VT.isVector())
2937       return Op;
2938 
2939     // Truncates to mask types are handled differently
2940     if (VT.getVectorElementType() == MVT::i1)
2941       return lowerVectorMaskTrunc(Op, DAG);
2942 
2943     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
2944     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
2945     // truncate by one power of two at a time.
2946     MVT DstEltVT = VT.getVectorElementType();
2947 
2948     SDValue Src = Op.getOperand(0);
2949     MVT SrcVT = Src.getSimpleValueType();
2950     MVT SrcEltVT = SrcVT.getVectorElementType();
2951 
2952     assert(DstEltVT.bitsLT(SrcEltVT) &&
2953            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
2954            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
2955            "Unexpected vector truncate lowering");
2956 
2957     MVT ContainerVT = SrcVT;
2958     if (SrcVT.isFixedLengthVector()) {
2959       ContainerVT = getContainerForFixedLengthVector(SrcVT);
2960       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2961     }
2962 
2963     SDValue Result = Src;
2964     SDValue Mask, VL;
2965     std::tie(Mask, VL) =
2966         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
2967     LLVMContext &Context = *DAG.getContext();
2968     const ElementCount Count = ContainerVT.getVectorElementCount();
2969     do {
2970       SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2971       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
2972       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
2973                            Mask, VL);
2974     } while (SrcEltVT != DstEltVT);
2975 
2976     if (SrcVT.isFixedLengthVector())
2977       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
2978 
2979     return Result;
2980   }
2981   case ISD::ANY_EXTEND:
2982   case ISD::ZERO_EXTEND:
2983     if (Op.getOperand(0).getValueType().isVector() &&
2984         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2985       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
2986     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
2987   case ISD::SIGN_EXTEND:
2988     if (Op.getOperand(0).getValueType().isVector() &&
2989         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2990       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
2991     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
2992   case ISD::SPLAT_VECTOR_PARTS:
2993     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
2994   case ISD::INSERT_VECTOR_ELT:
2995     return lowerINSERT_VECTOR_ELT(Op, DAG);
2996   case ISD::EXTRACT_VECTOR_ELT:
2997     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
2998   case ISD::VSCALE: {
2999     MVT VT = Op.getSimpleValueType();
3000     SDLoc DL(Op);
3001     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
3002     // We define our scalable vector types for lmul=1 to use a 64 bit known
3003     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
3004     // vscale as VLENB / 8.
3005     static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
3006     if (isa<ConstantSDNode>(Op.getOperand(0))) {
3007       // We assume VLENB is a multiple of 8. We manually choose the best shift
3008       // here because SimplifyDemandedBits isn't always able to simplify it.
3009       uint64_t Val = Op.getConstantOperandVal(0);
3010       if (isPowerOf2_64(Val)) {
3011         uint64_t Log2 = Log2_64(Val);
3012         if (Log2 < 3)
3013           return DAG.getNode(ISD::SRL, DL, VT, VLENB,
3014                              DAG.getConstant(3 - Log2, DL, VT));
3015         if (Log2 > 3)
3016           return DAG.getNode(ISD::SHL, DL, VT, VLENB,
3017                              DAG.getConstant(Log2 - 3, DL, VT));
3018         return VLENB;
3019       }
3020       // If the multiplier is a multiple of 8, scale it down to avoid needing
3021       // to shift the VLENB value.
3022       if ((Val % 8) == 0)
3023         return DAG.getNode(ISD::MUL, DL, VT, VLENB,
3024                            DAG.getConstant(Val / 8, DL, VT));
3025     }
3026 
3027     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
3028                                  DAG.getConstant(3, DL, VT));
3029     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
3030   }
3031   case ISD::FPOWI: {
3032     // Custom promote f16 powi with illegal i32 integer type on RV64. Once
3033     // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
3034     if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
3035         Op.getOperand(1).getValueType() == MVT::i32) {
3036       SDLoc DL(Op);
3037       SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
3038       SDValue Powi =
3039           DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
3040       return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
3041                          DAG.getIntPtrConstant(0, DL));
3042     }
3043     return SDValue();
3044   }
3045   case ISD::FP_EXTEND: {
3046     // RVV can only do fp_extend to types double the size as the source. We
3047     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
3048     // via f32.
3049     SDLoc DL(Op);
3050     MVT VT = Op.getSimpleValueType();
3051     SDValue Src = Op.getOperand(0);
3052     MVT SrcVT = Src.getSimpleValueType();
3053 
3054     // Prepare any fixed-length vector operands.
3055     MVT ContainerVT = VT;
3056     if (SrcVT.isFixedLengthVector()) {
3057       ContainerVT = getContainerForFixedLengthVector(VT);
3058       MVT SrcContainerVT =
3059           ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
3060       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3061     }
3062 
3063     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
3064         SrcVT.getVectorElementType() != MVT::f16) {
3065       // For scalable vectors, we only need to close the gap between
3066       // vXf16->vXf64.
3067       if (!VT.isFixedLengthVector())
3068         return Op;
3069       // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
3070       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
3071       return convertFromScalableVector(VT, Src, DAG, Subtarget);
3072     }
3073 
3074     MVT InterVT = VT.changeVectorElementType(MVT::f32);
3075     MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
3076     SDValue IntermediateExtend = getRVVFPExtendOrRound(
3077         Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
3078 
3079     SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
3080                                            DL, DAG, Subtarget);
3081     if (VT.isFixedLengthVector())
3082       return convertFromScalableVector(VT, Extend, DAG, Subtarget);
3083     return Extend;
3084   }
3085   case ISD::FP_ROUND: {
3086     // RVV can only do fp_round to types half the size as the source. We
3087     // custom-lower f64->f16 rounds via RVV's round-to-odd float
3088     // conversion instruction.
3089     SDLoc DL(Op);
3090     MVT VT = Op.getSimpleValueType();
3091     SDValue Src = Op.getOperand(0);
3092     MVT SrcVT = Src.getSimpleValueType();
3093 
3094     // Prepare any fixed-length vector operands.
3095     MVT ContainerVT = VT;
3096     if (VT.isFixedLengthVector()) {
3097       MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3098       ContainerVT =
3099           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3100       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3101     }
3102 
3103     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
3104         SrcVT.getVectorElementType() != MVT::f64) {
3105       // For scalable vectors, we only need to close the gap between
3106       // vXf64<->vXf16.
3107       if (!VT.isFixedLengthVector())
3108         return Op;
3109       // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
3110       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
3111       return convertFromScalableVector(VT, Src, DAG, Subtarget);
3112     }
3113 
3114     SDValue Mask, VL;
3115     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3116 
3117     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
3118     SDValue IntermediateRound =
3119         DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
3120     SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
3121                                           DL, DAG, Subtarget);
3122 
3123     if (VT.isFixedLengthVector())
3124       return convertFromScalableVector(VT, Round, DAG, Subtarget);
3125     return Round;
3126   }
3127   case ISD::FP_TO_SINT:
3128   case ISD::FP_TO_UINT:
3129   case ISD::SINT_TO_FP:
3130   case ISD::UINT_TO_FP: {
3131     // RVV can only do fp<->int conversions to types half/double the size as
3132     // the source. We custom-lower any conversions that do two hops into
3133     // sequences.
3134     MVT VT = Op.getSimpleValueType();
3135     if (!VT.isVector())
3136       return Op;
3137     SDLoc DL(Op);
3138     SDValue Src = Op.getOperand(0);
3139     MVT EltVT = VT.getVectorElementType();
3140     MVT SrcVT = Src.getSimpleValueType();
3141     MVT SrcEltVT = SrcVT.getVectorElementType();
3142     unsigned EltSize = EltVT.getSizeInBits();
3143     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3144     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
3145            "Unexpected vector element types");
3146 
3147     bool IsInt2FP = SrcEltVT.isInteger();
3148     // Widening conversions
3149     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
3150       if (IsInt2FP) {
3151         // Do a regular integer sign/zero extension then convert to float.
3152         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
3153                                       VT.getVectorElementCount());
3154         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
3155                                  ? ISD::ZERO_EXTEND
3156                                  : ISD::SIGN_EXTEND;
3157         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
3158         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
3159       }
3160       // FP2Int
3161       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
3162       // Do one doubling fp_extend then complete the operation by converting
3163       // to int.
3164       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3165       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
3166       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
3167     }
3168 
3169     // Narrowing conversions
3170     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
3171       if (IsInt2FP) {
3172         // One narrowing int_to_fp, then an fp_round.
3173         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
3174         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3175         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
3176         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
3177       }
3178       // FP2Int
3179       // One narrowing fp_to_int, then truncate the integer. If the float isn't
3180       // representable by the integer, the result is poison.
3181       MVT IVecVT =
3182           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
3183                            VT.getVectorElementCount());
3184       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
3185       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
3186     }
3187 
3188     // Scalable vectors can exit here. Patterns will handle equally-sized
3189     // conversions halving/doubling ones.
3190     if (!VT.isFixedLengthVector())
3191       return Op;
3192 
3193     // For fixed-length vectors we lower to a custom "VL" node.
3194     unsigned RVVOpc = 0;
3195     switch (Op.getOpcode()) {
3196     default:
3197       llvm_unreachable("Impossible opcode");
3198     case ISD::FP_TO_SINT:
3199       RVVOpc = RISCVISD::FP_TO_SINT_VL;
3200       break;
3201     case ISD::FP_TO_UINT:
3202       RVVOpc = RISCVISD::FP_TO_UINT_VL;
3203       break;
3204     case ISD::SINT_TO_FP:
3205       RVVOpc = RISCVISD::SINT_TO_FP_VL;
3206       break;
3207     case ISD::UINT_TO_FP:
3208       RVVOpc = RISCVISD::UINT_TO_FP_VL;
3209       break;
3210     }
3211 
3212     MVT ContainerVT, SrcContainerVT;
3213     // Derive the reference container type from the larger vector type.
3214     if (SrcEltSize > EltSize) {
3215       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3216       ContainerVT =
3217           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3218     } else {
3219       ContainerVT = getContainerForFixedLengthVector(VT);
3220       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
3221     }
3222 
3223     SDValue Mask, VL;
3224     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3225 
3226     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3227     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
3228     return convertFromScalableVector(VT, Src, DAG, Subtarget);
3229   }
3230   case ISD::FP_TO_SINT_SAT:
3231   case ISD::FP_TO_UINT_SAT:
3232     return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
3233   case ISD::FTRUNC:
3234   case ISD::FCEIL:
3235   case ISD::FFLOOR:
3236     return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
3237   case ISD::VECREDUCE_ADD:
3238   case ISD::VECREDUCE_UMAX:
3239   case ISD::VECREDUCE_SMAX:
3240   case ISD::VECREDUCE_UMIN:
3241   case ISD::VECREDUCE_SMIN:
3242     return lowerVECREDUCE(Op, DAG);
3243   case ISD::VECREDUCE_AND:
3244   case ISD::VECREDUCE_OR:
3245   case ISD::VECREDUCE_XOR:
3246     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3247       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
3248     return lowerVECREDUCE(Op, DAG);
3249   case ISD::VECREDUCE_FADD:
3250   case ISD::VECREDUCE_SEQ_FADD:
3251   case ISD::VECREDUCE_FMIN:
3252   case ISD::VECREDUCE_FMAX:
3253     return lowerFPVECREDUCE(Op, DAG);
3254   case ISD::VP_REDUCE_ADD:
3255   case ISD::VP_REDUCE_UMAX:
3256   case ISD::VP_REDUCE_SMAX:
3257   case ISD::VP_REDUCE_UMIN:
3258   case ISD::VP_REDUCE_SMIN:
3259   case ISD::VP_REDUCE_FADD:
3260   case ISD::VP_REDUCE_SEQ_FADD:
3261   case ISD::VP_REDUCE_FMIN:
3262   case ISD::VP_REDUCE_FMAX:
3263     return lowerVPREDUCE(Op, DAG);
3264   case ISD::VP_REDUCE_AND:
3265   case ISD::VP_REDUCE_OR:
3266   case ISD::VP_REDUCE_XOR:
3267     if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
3268       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
3269     return lowerVPREDUCE(Op, DAG);
3270   case ISD::INSERT_SUBVECTOR:
3271     return lowerINSERT_SUBVECTOR(Op, DAG);
3272   case ISD::EXTRACT_SUBVECTOR:
3273     return lowerEXTRACT_SUBVECTOR(Op, DAG);
3274   case ISD::STEP_VECTOR:
3275     return lowerSTEP_VECTOR(Op, DAG);
3276   case ISD::VECTOR_REVERSE:
3277     return lowerVECTOR_REVERSE(Op, DAG);
3278   case ISD::BUILD_VECTOR:
3279     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
3280   case ISD::SPLAT_VECTOR:
3281     if (Op.getValueType().getVectorElementType() == MVT::i1)
3282       return lowerVectorMaskSplat(Op, DAG);
3283     return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
3284   case ISD::VECTOR_SHUFFLE:
3285     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
3286   case ISD::CONCAT_VECTORS: {
3287     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
3288     // better than going through the stack, as the default expansion does.
3289     SDLoc DL(Op);
3290     MVT VT = Op.getSimpleValueType();
3291     unsigned NumOpElts =
3292         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
3293     SDValue Vec = DAG.getUNDEF(VT);
3294     for (const auto &OpIdx : enumerate(Op->ops())) {
3295       SDValue SubVec = OpIdx.value();
3296       // Don't insert undef subvectors.
3297       if (SubVec.isUndef())
3298         continue;
3299       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
3300                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
3301     }
3302     return Vec;
3303   }
3304   case ISD::LOAD:
3305     if (auto V = expandUnalignedRVVLoad(Op, DAG))
3306       return V;
3307     if (Op.getValueType().isFixedLengthVector())
3308       return lowerFixedLengthVectorLoadToRVV(Op, DAG);
3309     return Op;
3310   case ISD::STORE:
3311     if (auto V = expandUnalignedRVVStore(Op, DAG))
3312       return V;
3313     if (Op.getOperand(1).getValueType().isFixedLengthVector())
3314       return lowerFixedLengthVectorStoreToRVV(Op, DAG);
3315     return Op;
3316   case ISD::MLOAD:
3317   case ISD::VP_LOAD:
3318     return lowerMaskedLoad(Op, DAG);
3319   case ISD::MSTORE:
3320   case ISD::VP_STORE:
3321     return lowerMaskedStore(Op, DAG);
3322   case ISD::SETCC:
3323     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
3324   case ISD::ADD:
3325     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
3326   case ISD::SUB:
3327     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
3328   case ISD::MUL:
3329     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
3330   case ISD::MULHS:
3331     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
3332   case ISD::MULHU:
3333     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
3334   case ISD::AND:
3335     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
3336                                               RISCVISD::AND_VL);
3337   case ISD::OR:
3338     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
3339                                               RISCVISD::OR_VL);
3340   case ISD::XOR:
3341     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
3342                                               RISCVISD::XOR_VL);
3343   case ISD::SDIV:
3344     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
3345   case ISD::SREM:
3346     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
3347   case ISD::UDIV:
3348     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
3349   case ISD::UREM:
3350     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
3351   case ISD::SHL:
3352   case ISD::SRA:
3353   case ISD::SRL:
3354     if (Op.getSimpleValueType().isFixedLengthVector())
3355       return lowerFixedLengthVectorShiftToRVV(Op, DAG);
3356     // This can be called for an i32 shift amount that needs to be promoted.
3357     assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
3358            "Unexpected custom legalisation");
3359     return SDValue();
3360   case ISD::SADDSAT:
3361     return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
3362   case ISD::UADDSAT:
3363     return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
3364   case ISD::SSUBSAT:
3365     return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
3366   case ISD::USUBSAT:
3367     return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
3368   case ISD::FADD:
3369     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
3370   case ISD::FSUB:
3371     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
3372   case ISD::FMUL:
3373     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
3374   case ISD::FDIV:
3375     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
3376   case ISD::FNEG:
3377     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
3378   case ISD::FABS:
3379     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
3380   case ISD::FSQRT:
3381     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
3382   case ISD::FMA:
3383     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
3384   case ISD::SMIN:
3385     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
3386   case ISD::SMAX:
3387     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
3388   case ISD::UMIN:
3389     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
3390   case ISD::UMAX:
3391     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
3392   case ISD::FMINNUM:
3393     return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
3394   case ISD::FMAXNUM:
3395     return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
3396   case ISD::ABS:
3397     return lowerABS(Op, DAG);
3398   case ISD::CTLZ_ZERO_UNDEF:
3399   case ISD::CTTZ_ZERO_UNDEF:
3400     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
3401   case ISD::VSELECT:
3402     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
3403   case ISD::FCOPYSIGN:
3404     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
3405   case ISD::MGATHER:
3406   case ISD::VP_GATHER:
3407     return lowerMaskedGather(Op, DAG);
3408   case ISD::MSCATTER:
3409   case ISD::VP_SCATTER:
3410     return lowerMaskedScatter(Op, DAG);
3411   case ISD::FLT_ROUNDS_:
3412     return lowerGET_ROUNDING(Op, DAG);
3413   case ISD::SET_ROUNDING:
3414     return lowerSET_ROUNDING(Op, DAG);
3415   case ISD::VP_SELECT:
3416     return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
3417   case ISD::VP_ADD:
3418     return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
3419   case ISD::VP_SUB:
3420     return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
3421   case ISD::VP_MUL:
3422     return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
3423   case ISD::VP_SDIV:
3424     return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
3425   case ISD::VP_UDIV:
3426     return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
3427   case ISD::VP_SREM:
3428     return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
3429   case ISD::VP_UREM:
3430     return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
3431   case ISD::VP_AND:
3432     return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
3433   case ISD::VP_OR:
3434     return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
3435   case ISD::VP_XOR:
3436     return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
3437   case ISD::VP_ASHR:
3438     return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
3439   case ISD::VP_LSHR:
3440     return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
3441   case ISD::VP_SHL:
3442     return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
3443   case ISD::VP_FADD:
3444     return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
3445   case ISD::VP_FSUB:
3446     return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
3447   case ISD::VP_FMUL:
3448     return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
3449   case ISD::VP_FDIV:
3450     return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
3451   }
3452 }
3453 
3454 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
3455                              SelectionDAG &DAG, unsigned Flags) {
3456   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3457 }
3458 
3459 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
3460                              SelectionDAG &DAG, unsigned Flags) {
3461   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3462                                    Flags);
3463 }
3464 
3465 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
3466                              SelectionDAG &DAG, unsigned Flags) {
3467   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3468                                    N->getOffset(), Flags);
3469 }
3470 
3471 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
3472                              SelectionDAG &DAG, unsigned Flags) {
3473   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3474 }
3475 
3476 template <class NodeTy>
3477 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3478                                      bool IsLocal) const {
3479   SDLoc DL(N);
3480   EVT Ty = getPointerTy(DAG.getDataLayout());
3481 
3482   if (isPositionIndependent()) {
3483     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3484     if (IsLocal)
3485       // Use PC-relative addressing to access the symbol. This generates the
3486       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
3487       // %pcrel_lo(auipc)).
3488       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3489 
3490     // Use PC-relative addressing to access the GOT for this symbol, then load
3491     // the address from the GOT. This generates the pattern (PseudoLA sym),
3492     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
3493     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
3494   }
3495 
3496   switch (getTargetMachine().getCodeModel()) {
3497   default:
3498     report_fatal_error("Unsupported code model for lowering");
3499   case CodeModel::Small: {
3500     // Generate a sequence for accessing addresses within the first 2 GiB of
3501     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
3502     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
3503     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
3504     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3505     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
3506   }
3507   case CodeModel::Medium: {
3508     // Generate a sequence for accessing addresses within any 2GiB range within
3509     // the address space. This generates the pattern (PseudoLLA sym), which
3510     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
3511     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3512     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3513   }
3514   }
3515 }
3516 
3517 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
3518                                                 SelectionDAG &DAG) const {
3519   SDLoc DL(Op);
3520   EVT Ty = Op.getValueType();
3521   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3522   int64_t Offset = N->getOffset();
3523   MVT XLenVT = Subtarget.getXLenVT();
3524 
3525   const GlobalValue *GV = N->getGlobal();
3526   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
3527   SDValue Addr = getAddr(N, DAG, IsLocal);
3528 
3529   // In order to maximise the opportunity for common subexpression elimination,
3530   // emit a separate ADD node for the global address offset instead of folding
3531   // it in the global address node. Later peephole optimisations may choose to
3532   // fold it back in when profitable.
3533   if (Offset != 0)
3534     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3535                        DAG.getConstant(Offset, DL, XLenVT));
3536   return Addr;
3537 }
3538 
3539 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
3540                                                SelectionDAG &DAG) const {
3541   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
3542 
3543   return getAddr(N, DAG);
3544 }
3545 
3546 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
3547                                                SelectionDAG &DAG) const {
3548   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
3549 
3550   return getAddr(N, DAG);
3551 }
3552 
3553 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
3554                                             SelectionDAG &DAG) const {
3555   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
3556 
3557   return getAddr(N, DAG);
3558 }
3559 
3560 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3561                                               SelectionDAG &DAG,
3562                                               bool UseGOT) const {
3563   SDLoc DL(N);
3564   EVT Ty = getPointerTy(DAG.getDataLayout());
3565   const GlobalValue *GV = N->getGlobal();
3566   MVT XLenVT = Subtarget.getXLenVT();
3567 
3568   if (UseGOT) {
3569     // Use PC-relative addressing to access the GOT for this TLS symbol, then
3570     // load the address from the GOT and add the thread pointer. This generates
3571     // the pattern (PseudoLA_TLS_IE sym), which expands to
3572     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
3573     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3574     SDValue Load =
3575         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
3576 
3577     // Add the thread pointer.
3578     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3579     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
3580   }
3581 
3582   // Generate a sequence for accessing the address relative to the thread
3583   // pointer, with the appropriate adjustment for the thread pointer offset.
3584   // This generates the pattern
3585   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
3586   SDValue AddrHi =
3587       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
3588   SDValue AddrAdd =
3589       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
3590   SDValue AddrLo =
3591       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
3592 
3593   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3594   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3595   SDValue MNAdd = SDValue(
3596       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
3597       0);
3598   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
3599 }
3600 
3601 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3602                                                SelectionDAG &DAG) const {
3603   SDLoc DL(N);
3604   EVT Ty = getPointerTy(DAG.getDataLayout());
3605   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3606   const GlobalValue *GV = N->getGlobal();
3607 
3608   // Use a PC-relative addressing mode to access the global dynamic GOT address.
3609   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3610   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3611   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3612   SDValue Load =
3613       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
3614 
3615   // Prepare argument list to generate call.
3616   ArgListTy Args;
3617   ArgListEntry Entry;
3618   Entry.Node = Load;
3619   Entry.Ty = CallTy;
3620   Args.push_back(Entry);
3621 
3622   // Setup call to __tls_get_addr.
3623   TargetLowering::CallLoweringInfo CLI(DAG);
3624   CLI.setDebugLoc(DL)
3625       .setChain(DAG.getEntryNode())
3626       .setLibCallee(CallingConv::C, CallTy,
3627                     DAG.getExternalSymbol("__tls_get_addr", Ty),
3628                     std::move(Args));
3629 
3630   return LowerCallTo(CLI).first;
3631 }
3632 
3633 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3634                                                    SelectionDAG &DAG) const {
3635   SDLoc DL(Op);
3636   EVT Ty = Op.getValueType();
3637   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3638   int64_t Offset = N->getOffset();
3639   MVT XLenVT = Subtarget.getXLenVT();
3640 
3641   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
3642 
3643   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3644       CallingConv::GHC)
3645     report_fatal_error("In GHC calling convention TLS is not supported");
3646 
3647   SDValue Addr;
3648   switch (Model) {
3649   case TLSModel::LocalExec:
3650     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3651     break;
3652   case TLSModel::InitialExec:
3653     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3654     break;
3655   case TLSModel::LocalDynamic:
3656   case TLSModel::GeneralDynamic:
3657     Addr = getDynamicTLSAddr(N, DAG);
3658     break;
3659   }
3660 
3661   // In order to maximise the opportunity for common subexpression elimination,
3662   // emit a separate ADD node for the global address offset instead of folding
3663   // it in the global address node. Later peephole optimisations may choose to
3664   // fold it back in when profitable.
3665   if (Offset != 0)
3666     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3667                        DAG.getConstant(Offset, DL, XLenVT));
3668   return Addr;
3669 }
3670 
3671 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3672   SDValue CondV = Op.getOperand(0);
3673   SDValue TrueV = Op.getOperand(1);
3674   SDValue FalseV = Op.getOperand(2);
3675   SDLoc DL(Op);
3676   MVT VT = Op.getSimpleValueType();
3677   MVT XLenVT = Subtarget.getXLenVT();
3678 
3679   // Lower vector SELECTs to VSELECTs by splatting the condition.
3680   if (VT.isVector()) {
3681     MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3682     SDValue CondSplat = VT.isScalableVector()
3683                             ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3684                             : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3685     return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3686   }
3687 
3688   // If the result type is XLenVT and CondV is the output of a SETCC node
3689   // which also operated on XLenVT inputs, then merge the SETCC node into the
3690   // lowered RISCVISD::SELECT_CC to take advantage of the integer
3691   // compare+branch instructions. i.e.:
3692   // (select (setcc lhs, rhs, cc), truev, falsev)
3693   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3694   if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3695       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3696     SDValue LHS = CondV.getOperand(0);
3697     SDValue RHS = CondV.getOperand(1);
3698     const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3699     ISD::CondCode CCVal = CC->get();
3700 
3701     // Special case for a select of 2 constants that have a diffence of 1.
3702     // Normally this is done by DAGCombine, but if the select is introduced by
3703     // type legalization or op legalization, we miss it. Restricting to SETLT
3704     // case for now because that is what signed saturating add/sub need.
3705     // FIXME: We don't need the condition to be SETLT or even a SETCC,
3706     // but we would probably want to swap the true/false values if the condition
3707     // is SETGE/SETLE to avoid an XORI.
3708     if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3709         CCVal == ISD::SETLT) {
3710       const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
3711       const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
3712       if (TrueVal - 1 == FalseVal)
3713         return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
3714       if (TrueVal + 1 == FalseVal)
3715         return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
3716     }
3717 
3718     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3719 
3720     SDValue TargetCC = DAG.getCondCode(CCVal);
3721     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
3722     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3723   }
3724 
3725   // Otherwise:
3726   // (select condv, truev, falsev)
3727   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
3728   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3729   SDValue SetNE = DAG.getCondCode(ISD::SETNE);
3730 
3731   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
3732 
3733   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3734 }
3735 
3736 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
3737   SDValue CondV = Op.getOperand(1);
3738   SDLoc DL(Op);
3739   MVT XLenVT = Subtarget.getXLenVT();
3740 
3741   if (CondV.getOpcode() == ISD::SETCC &&
3742       CondV.getOperand(0).getValueType() == XLenVT) {
3743     SDValue LHS = CondV.getOperand(0);
3744     SDValue RHS = CondV.getOperand(1);
3745     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
3746 
3747     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3748 
3749     SDValue TargetCC = DAG.getCondCode(CCVal);
3750     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3751                        LHS, RHS, TargetCC, Op.getOperand(2));
3752   }
3753 
3754   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3755                      CondV, DAG.getConstant(0, DL, XLenVT),
3756                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3757 }
3758 
3759 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3760   MachineFunction &MF = DAG.getMachineFunction();
3761   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
3762 
3763   SDLoc DL(Op);
3764   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3765                                  getPointerTy(MF.getDataLayout()));
3766 
3767   // vastart just stores the address of the VarArgsFrameIndex slot into the
3768   // memory location argument.
3769   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3770   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3771                       MachinePointerInfo(SV));
3772 }
3773 
3774 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3775                                             SelectionDAG &DAG) const {
3776   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3777   MachineFunction &MF = DAG.getMachineFunction();
3778   MachineFrameInfo &MFI = MF.getFrameInfo();
3779   MFI.setFrameAddressIsTaken(true);
3780   Register FrameReg = RI.getFrameRegister(MF);
3781   int XLenInBytes = Subtarget.getXLen() / 8;
3782 
3783   EVT VT = Op.getValueType();
3784   SDLoc DL(Op);
3785   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3786   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3787   while (Depth--) {
3788     int Offset = -(XLenInBytes * 2);
3789     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3790                               DAG.getIntPtrConstant(Offset, DL));
3791     FrameAddr =
3792         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3793   }
3794   return FrameAddr;
3795 }
3796 
3797 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3798                                              SelectionDAG &DAG) const {
3799   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3800   MachineFunction &MF = DAG.getMachineFunction();
3801   MachineFrameInfo &MFI = MF.getFrameInfo();
3802   MFI.setReturnAddressIsTaken(true);
3803   MVT XLenVT = Subtarget.getXLenVT();
3804   int XLenInBytes = Subtarget.getXLen() / 8;
3805 
3806   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3807     return SDValue();
3808 
3809   EVT VT = Op.getValueType();
3810   SDLoc DL(Op);
3811   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3812   if (Depth) {
3813     int Off = -XLenInBytes;
3814     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3815     SDValue Offset = DAG.getConstant(Off, DL, VT);
3816     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3817                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3818                        MachinePointerInfo());
3819   }
3820 
3821   // Return the value of the return address register, marking it an implicit
3822   // live-in.
3823   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3824   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3825 }
3826 
3827 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3828                                                  SelectionDAG &DAG) const {
3829   SDLoc DL(Op);
3830   SDValue Lo = Op.getOperand(0);
3831   SDValue Hi = Op.getOperand(1);
3832   SDValue Shamt = Op.getOperand(2);
3833   EVT VT = Lo.getValueType();
3834 
3835   // if Shamt-XLEN < 0: // Shamt < XLEN
3836   //   Lo = Lo << Shamt
3837   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
3838   // else:
3839   //   Lo = 0
3840   //   Hi = Lo << (Shamt-XLEN)
3841 
3842   SDValue Zero = DAG.getConstant(0, DL, VT);
3843   SDValue One = DAG.getConstant(1, DL, VT);
3844   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3845   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3846   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3847   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3848 
3849   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3850   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3851   SDValue ShiftRightLo =
3852       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
3853   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3854   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3855   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
3856 
3857   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3858 
3859   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3860   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3861 
3862   SDValue Parts[2] = {Lo, Hi};
3863   return DAG.getMergeValues(Parts, DL);
3864 }
3865 
3866 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
3867                                                   bool IsSRA) const {
3868   SDLoc DL(Op);
3869   SDValue Lo = Op.getOperand(0);
3870   SDValue Hi = Op.getOperand(1);
3871   SDValue Shamt = Op.getOperand(2);
3872   EVT VT = Lo.getValueType();
3873 
3874   // SRA expansion:
3875   //   if Shamt-XLEN < 0: // Shamt < XLEN
3876   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3877   //     Hi = Hi >>s Shamt
3878   //   else:
3879   //     Lo = Hi >>s (Shamt-XLEN);
3880   //     Hi = Hi >>s (XLEN-1)
3881   //
3882   // SRL expansion:
3883   //   if Shamt-XLEN < 0: // Shamt < XLEN
3884   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3885   //     Hi = Hi >>u Shamt
3886   //   else:
3887   //     Lo = Hi >>u (Shamt-XLEN);
3888   //     Hi = 0;
3889 
3890   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3891 
3892   SDValue Zero = DAG.getConstant(0, DL, VT);
3893   SDValue One = DAG.getConstant(1, DL, VT);
3894   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3895   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3896   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3897   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3898 
3899   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3900   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3901   SDValue ShiftLeftHi =
3902       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
3903   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3904   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3905   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
3906   SDValue HiFalse =
3907       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
3908 
3909   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3910 
3911   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3912   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3913 
3914   SDValue Parts[2] = {Lo, Hi};
3915   return DAG.getMergeValues(Parts, DL);
3916 }
3917 
3918 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
3919 // legal equivalently-sized i8 type, so we can use that as a go-between.
3920 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
3921                                                   SelectionDAG &DAG) const {
3922   SDLoc DL(Op);
3923   MVT VT = Op.getSimpleValueType();
3924   SDValue SplatVal = Op.getOperand(0);
3925   // All-zeros or all-ones splats are handled specially.
3926   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
3927     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3928     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
3929   }
3930   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
3931     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3932     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
3933   }
3934   MVT XLenVT = Subtarget.getXLenVT();
3935   assert(SplatVal.getValueType() == XLenVT &&
3936          "Unexpected type for i1 splat value");
3937   MVT InterVT = VT.changeVectorElementType(MVT::i8);
3938   SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
3939                          DAG.getConstant(1, DL, XLenVT));
3940   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
3941   SDValue Zero = DAG.getConstant(0, DL, InterVT);
3942   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
3943 }
3944 
3945 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
3946 // illegal (currently only vXi64 RV32).
3947 // FIXME: We could also catch non-constant sign-extended i32 values and lower
3948 // them to SPLAT_VECTOR_I64
3949 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
3950                                                      SelectionDAG &DAG) const {
3951   SDLoc DL(Op);
3952   MVT VecVT = Op.getSimpleValueType();
3953   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
3954          "Unexpected SPLAT_VECTOR_PARTS lowering");
3955 
3956   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
3957   SDValue Lo = Op.getOperand(0);
3958   SDValue Hi = Op.getOperand(1);
3959 
3960   if (VecVT.isFixedLengthVector()) {
3961     MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3962     SDLoc DL(Op);
3963     SDValue Mask, VL;
3964     std::tie(Mask, VL) =
3965         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3966 
3967     SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
3968     return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
3969   }
3970 
3971   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3972     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3973     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3974     // If Hi constant is all the same sign bit as Lo, lower this as a custom
3975     // node in order to try and match RVV vector/scalar instructions.
3976     if ((LoC >> 31) == HiC)
3977       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3978   }
3979 
3980   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3981   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3982       isa<ConstantSDNode>(Hi.getOperand(1)) &&
3983       Hi.getConstantOperandVal(1) == 31)
3984     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3985 
3986   // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
3987   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
3988                      DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i64));
3989 }
3990 
3991 // Custom-lower extensions from mask vectors by using a vselect either with 1
3992 // for zero/any-extension or -1 for sign-extension:
3993 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
3994 // Note that any-extension is lowered identically to zero-extension.
3995 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
3996                                                 int64_t ExtTrueVal) const {
3997   SDLoc DL(Op);
3998   MVT VecVT = Op.getSimpleValueType();
3999   SDValue Src = Op.getOperand(0);
4000   // Only custom-lower extensions from mask types
4001   assert(Src.getValueType().isVector() &&
4002          Src.getValueType().getVectorElementType() == MVT::i1);
4003 
4004   MVT XLenVT = Subtarget.getXLenVT();
4005   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
4006   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
4007 
4008   if (VecVT.isScalableVector()) {
4009     // Be careful not to introduce illegal scalar types at this stage, and be
4010     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
4011     // illegal and must be expanded. Since we know that the constants are
4012     // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
4013     bool IsRV32E64 =
4014         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
4015 
4016     if (!IsRV32E64) {
4017       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
4018       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
4019     } else {
4020       SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
4021       SplatTrueVal =
4022           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
4023     }
4024 
4025     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
4026   }
4027 
4028   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4029   MVT I1ContainerVT =
4030       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4031 
4032   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
4033 
4034   SDValue Mask, VL;
4035   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4036 
4037   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
4038   SplatTrueVal =
4039       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
4040   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
4041                                SplatTrueVal, SplatZero, VL);
4042 
4043   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
4044 }
4045 
4046 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
4047     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
4048   MVT ExtVT = Op.getSimpleValueType();
4049   // Only custom-lower extensions from fixed-length vector types.
4050   if (!ExtVT.isFixedLengthVector())
4051     return Op;
4052   MVT VT = Op.getOperand(0).getSimpleValueType();
4053   // Grab the canonical container type for the extended type. Infer the smaller
4054   // type from that to ensure the same number of vector elements, as we know
4055   // the LMUL will be sufficient to hold the smaller type.
4056   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
4057   // Get the extended container type manually to ensure the same number of
4058   // vector elements between source and dest.
4059   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
4060                                      ContainerExtVT.getVectorElementCount());
4061 
4062   SDValue Op1 =
4063       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
4064 
4065   SDLoc DL(Op);
4066   SDValue Mask, VL;
4067   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4068 
4069   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
4070 
4071   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
4072 }
4073 
4074 // Custom-lower truncations from vectors to mask vectors by using a mask and a
4075 // setcc operation:
4076 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
4077 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
4078                                                   SelectionDAG &DAG) const {
4079   SDLoc DL(Op);
4080   EVT MaskVT = Op.getValueType();
4081   // Only expect to custom-lower truncations to mask types
4082   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
4083          "Unexpected type for vector mask lowering");
4084   SDValue Src = Op.getOperand(0);
4085   MVT VecVT = Src.getSimpleValueType();
4086 
4087   // If this is a fixed vector, we need to convert it to a scalable vector.
4088   MVT ContainerVT = VecVT;
4089   if (VecVT.isFixedLengthVector()) {
4090     ContainerVT = getContainerForFixedLengthVector(VecVT);
4091     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4092   }
4093 
4094   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
4095   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
4096 
4097   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
4098   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
4099 
4100   if (VecVT.isScalableVector()) {
4101     SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
4102     return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
4103   }
4104 
4105   SDValue Mask, VL;
4106   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4107 
4108   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4109   SDValue Trunc =
4110       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
4111   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
4112                       DAG.getCondCode(ISD::SETNE), Mask, VL);
4113   return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
4114 }
4115 
4116 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
4117 // first position of a vector, and that vector is slid up to the insert index.
4118 // By limiting the active vector length to index+1 and merging with the
4119 // original vector (with an undisturbed tail policy for elements >= VL), we
4120 // achieve the desired result of leaving all elements untouched except the one
4121 // at VL-1, which is replaced with the desired value.
4122 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4123                                                     SelectionDAG &DAG) const {
4124   SDLoc DL(Op);
4125   MVT VecVT = Op.getSimpleValueType();
4126   SDValue Vec = Op.getOperand(0);
4127   SDValue Val = Op.getOperand(1);
4128   SDValue Idx = Op.getOperand(2);
4129 
4130   if (VecVT.getVectorElementType() == MVT::i1) {
4131     // FIXME: For now we just promote to an i8 vector and insert into that,
4132     // but this is probably not optimal.
4133     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
4134     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
4135     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
4136     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
4137   }
4138 
4139   MVT ContainerVT = VecVT;
4140   // If the operand is a fixed-length vector, convert to a scalable one.
4141   if (VecVT.isFixedLengthVector()) {
4142     ContainerVT = getContainerForFixedLengthVector(VecVT);
4143     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4144   }
4145 
4146   MVT XLenVT = Subtarget.getXLenVT();
4147 
4148   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
4149   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
4150   // Even i64-element vectors on RV32 can be lowered without scalar
4151   // legalization if the most-significant 32 bits of the value are not affected
4152   // by the sign-extension of the lower 32 bits.
4153   // TODO: We could also catch sign extensions of a 32-bit value.
4154   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
4155     const auto *CVal = cast<ConstantSDNode>(Val);
4156     if (isInt<32>(CVal->getSExtValue())) {
4157       IsLegalInsert = true;
4158       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
4159     }
4160   }
4161 
4162   SDValue Mask, VL;
4163   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4164 
4165   SDValue ValInVec;
4166 
4167   if (IsLegalInsert) {
4168     unsigned Opc =
4169         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4170     if (isNullConstant(Idx)) {
4171       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
4172       if (!VecVT.isFixedLengthVector())
4173         return Vec;
4174       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
4175     }
4176     ValInVec =
4177         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
4178   } else {
4179     // On RV32, i64-element vectors must be specially handled to place the
4180     // value at element 0, by using two vslide1up instructions in sequence on
4181     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
4182     // this.
4183     SDValue One = DAG.getConstant(1, DL, XLenVT);
4184     SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
4185     SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
4186     MVT I32ContainerVT =
4187         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
4188     SDValue I32Mask =
4189         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
4190     // Limit the active VL to two.
4191     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
4192     // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
4193     // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
4194     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
4195                            InsertI64VL);
4196     // First slide in the hi value, then the lo in underneath it.
4197     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
4198                            ValHi, I32Mask, InsertI64VL);
4199     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
4200                            ValLo, I32Mask, InsertI64VL);
4201     // Bitcast back to the right container type.
4202     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
4203   }
4204 
4205   // Now that the value is in a vector, slide it into position.
4206   SDValue InsertVL =
4207       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
4208   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
4209                                 ValInVec, Idx, Mask, InsertVL);
4210   if (!VecVT.isFixedLengthVector())
4211     return Slideup;
4212   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
4213 }
4214 
4215 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
4216 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
4217 // types this is done using VMV_X_S to allow us to glean information about the
4218 // sign bits of the result.
4219 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4220                                                      SelectionDAG &DAG) const {
4221   SDLoc DL(Op);
4222   SDValue Idx = Op.getOperand(1);
4223   SDValue Vec = Op.getOperand(0);
4224   EVT EltVT = Op.getValueType();
4225   MVT VecVT = Vec.getSimpleValueType();
4226   MVT XLenVT = Subtarget.getXLenVT();
4227 
4228   if (VecVT.getVectorElementType() == MVT::i1) {
4229     // FIXME: For now we just promote to an i8 vector and extract from that,
4230     // but this is probably not optimal.
4231     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
4232     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
4233     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
4234   }
4235 
4236   // If this is a fixed vector, we need to convert it to a scalable vector.
4237   MVT ContainerVT = VecVT;
4238   if (VecVT.isFixedLengthVector()) {
4239     ContainerVT = getContainerForFixedLengthVector(VecVT);
4240     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4241   }
4242 
4243   // If the index is 0, the vector is already in the right position.
4244   if (!isNullConstant(Idx)) {
4245     // Use a VL of 1 to avoid processing more elements than we need.
4246     SDValue VL = DAG.getConstant(1, DL, XLenVT);
4247     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4248     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4249     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
4250                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
4251   }
4252 
4253   if (!EltVT.isInteger()) {
4254     // Floating-point extracts are handled in TableGen.
4255     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
4256                        DAG.getConstant(0, DL, XLenVT));
4257   }
4258 
4259   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
4260   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
4261 }
4262 
4263 // Some RVV intrinsics may claim that they want an integer operand to be
4264 // promoted or expanded.
4265 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
4266                                           const RISCVSubtarget &Subtarget) {
4267   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4268           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
4269          "Unexpected opcode");
4270 
4271   if (!Subtarget.hasVInstructions())
4272     return SDValue();
4273 
4274   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
4275   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
4276   SDLoc DL(Op);
4277 
4278   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
4279       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
4280   if (!II || !II->hasSplatOperand())
4281     return SDValue();
4282 
4283   unsigned SplatOp = II->SplatOperand + 1 + HasChain;
4284   assert(SplatOp < Op.getNumOperands());
4285 
4286   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
4287   SDValue &ScalarOp = Operands[SplatOp];
4288   MVT OpVT = ScalarOp.getSimpleValueType();
4289   MVT XLenVT = Subtarget.getXLenVT();
4290 
4291   // If this isn't a scalar, or its type is XLenVT we're done.
4292   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
4293     return SDValue();
4294 
4295   // Simplest case is that the operand needs to be promoted to XLenVT.
4296   if (OpVT.bitsLT(XLenVT)) {
4297     // If the operand is a constant, sign extend to increase our chances
4298     // of being able to use a .vi instruction. ANY_EXTEND would become a
4299     // a zero extend and the simm5 check in isel would fail.
4300     // FIXME: Should we ignore the upper bits in isel instead?
4301     unsigned ExtOpc =
4302         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4303     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
4304     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4305   }
4306 
4307   // Use the previous operand to get the vXi64 VT. The result might be a mask
4308   // VT for compares. Using the previous operand assumes that the previous
4309   // operand will never have a smaller element size than a scalar operand and
4310   // that a widening operation never uses SEW=64.
4311   // NOTE: If this fails the below assert, we can probably just find the
4312   // element count from any operand or result and use it to construct the VT.
4313   assert(II->SplatOperand > 0 && "Unexpected splat operand!");
4314   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
4315 
4316   // The more complex case is when the scalar is larger than XLenVT.
4317   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
4318          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
4319 
4320   // If this is a sign-extended 32-bit constant, we can truncate it and rely
4321   // on the instruction to sign-extend since SEW>XLEN.
4322   if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
4323     if (isInt<32>(CVal->getSExtValue())) {
4324       ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
4325       return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4326     }
4327   }
4328 
4329   // We need to convert the scalar to a splat vector.
4330   // FIXME: Can we implicitly truncate the scalar if it is known to
4331   // be sign extended?
4332   SDValue VL = Op.getOperand(II->VLOperand + 1 + HasChain);
4333   assert(VL.getValueType() == XLenVT);
4334   ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
4335   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4336 }
4337 
4338 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
4339                                                      SelectionDAG &DAG) const {
4340   unsigned IntNo = Op.getConstantOperandVal(0);
4341   SDLoc DL(Op);
4342   MVT XLenVT = Subtarget.getXLenVT();
4343 
4344   switch (IntNo) {
4345   default:
4346     break; // Don't custom lower most intrinsics.
4347   case Intrinsic::thread_pointer: {
4348     EVT PtrVT = getPointerTy(DAG.getDataLayout());
4349     return DAG.getRegister(RISCV::X4, PtrVT);
4350   }
4351   case Intrinsic::riscv_orc_b:
4352     // Lower to the GORCI encoding for orc.b.
4353     return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
4354                        DAG.getConstant(7, DL, XLenVT));
4355   case Intrinsic::riscv_grev:
4356   case Intrinsic::riscv_gorc: {
4357     unsigned Opc =
4358         IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
4359     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4360   }
4361   case Intrinsic::riscv_shfl:
4362   case Intrinsic::riscv_unshfl: {
4363     unsigned Opc =
4364         IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
4365     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4366   }
4367   case Intrinsic::riscv_bcompress:
4368   case Intrinsic::riscv_bdecompress: {
4369     unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
4370                                                        : RISCVISD::BDECOMPRESS;
4371     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4372   }
4373   case Intrinsic::riscv_bfp:
4374     return DAG.getNode(RISCVISD::BFP, DL, XLenVT, Op.getOperand(1),
4375                        Op.getOperand(2));
4376   case Intrinsic::riscv_fsl:
4377     return DAG.getNode(RISCVISD::FSL, DL, XLenVT, Op.getOperand(1),
4378                        Op.getOperand(2), Op.getOperand(3));
4379   case Intrinsic::riscv_fsr:
4380     return DAG.getNode(RISCVISD::FSR, DL, XLenVT, Op.getOperand(1),
4381                        Op.getOperand(2), Op.getOperand(3));
4382   case Intrinsic::riscv_vmv_x_s:
4383     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
4384     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
4385                        Op.getOperand(1));
4386   case Intrinsic::riscv_vmv_v_x:
4387     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
4388                             Op.getSimpleValueType(), DL, DAG, Subtarget);
4389   case Intrinsic::riscv_vfmv_v_f:
4390     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
4391                        Op.getOperand(1), Op.getOperand(2));
4392   case Intrinsic::riscv_vmv_s_x: {
4393     SDValue Scalar = Op.getOperand(2);
4394 
4395     if (Scalar.getValueType().bitsLE(XLenVT)) {
4396       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
4397       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
4398                          Op.getOperand(1), Scalar, Op.getOperand(3));
4399     }
4400 
4401     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
4402 
4403     // This is an i64 value that lives in two scalar registers. We have to
4404     // insert this in a convoluted way. First we build vXi64 splat containing
4405     // the/ two values that we assemble using some bit math. Next we'll use
4406     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
4407     // to merge element 0 from our splat into the source vector.
4408     // FIXME: This is probably not the best way to do this, but it is
4409     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
4410     // point.
4411     //   sw lo, (a0)
4412     //   sw hi, 4(a0)
4413     //   vlse vX, (a0)
4414     //
4415     //   vid.v      vVid
4416     //   vmseq.vx   mMask, vVid, 0
4417     //   vmerge.vvm vDest, vSrc, vVal, mMask
4418     MVT VT = Op.getSimpleValueType();
4419     SDValue Vec = Op.getOperand(1);
4420     SDValue VL = Op.getOperand(3);
4421 
4422     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
4423     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
4424                                       DAG.getConstant(0, DL, MVT::i32), VL);
4425 
4426     MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
4427     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4428     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
4429     SDValue SelectCond =
4430         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
4431                     DAG.getCondCode(ISD::SETEQ), Mask, VL);
4432     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
4433                        Vec, VL);
4434   }
4435   case Intrinsic::riscv_vslide1up:
4436   case Intrinsic::riscv_vslide1down:
4437   case Intrinsic::riscv_vslide1up_mask:
4438   case Intrinsic::riscv_vslide1down_mask: {
4439     // We need to special case these when the scalar is larger than XLen.
4440     unsigned NumOps = Op.getNumOperands();
4441     bool IsMasked = NumOps == 7;
4442     unsigned OpOffset = IsMasked ? 1 : 0;
4443     SDValue Scalar = Op.getOperand(2 + OpOffset);
4444     if (Scalar.getValueType().bitsLE(XLenVT))
4445       break;
4446 
4447     // Splatting a sign extended constant is fine.
4448     if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
4449       if (isInt<32>(CVal->getSExtValue()))
4450         break;
4451 
4452     MVT VT = Op.getSimpleValueType();
4453     assert(VT.getVectorElementType() == MVT::i64 &&
4454            Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
4455 
4456     // Convert the vector source to the equivalent nxvXi32 vector.
4457     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4458     SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
4459 
4460     SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
4461                                    DAG.getConstant(0, DL, XLenVT));
4462     SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
4463                                    DAG.getConstant(1, DL, XLenVT));
4464 
4465     // Double the VL since we halved SEW.
4466     SDValue VL = Op.getOperand(NumOps - (1 + OpOffset));
4467     SDValue I32VL =
4468         DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
4469 
4470     MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
4471     SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
4472 
4473     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
4474     // instructions.
4475     if (IntNo == Intrinsic::riscv_vslide1up ||
4476         IntNo == Intrinsic::riscv_vslide1up_mask) {
4477       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
4478                         I32Mask, I32VL);
4479       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
4480                         I32Mask, I32VL);
4481     } else {
4482       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
4483                         I32Mask, I32VL);
4484       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
4485                         I32Mask, I32VL);
4486     }
4487 
4488     // Convert back to nxvXi64.
4489     Vec = DAG.getBitcast(VT, Vec);
4490 
4491     if (!IsMasked)
4492       return Vec;
4493 
4494     // Apply mask after the operation.
4495     SDValue Mask = Op.getOperand(NumOps - 3);
4496     SDValue MaskedOff = Op.getOperand(1);
4497     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
4498   }
4499   }
4500 
4501   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
4502 }
4503 
4504 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
4505                                                     SelectionDAG &DAG) const {
4506   unsigned IntNo = Op.getConstantOperandVal(1);
4507   switch (IntNo) {
4508   default:
4509     break;
4510   case Intrinsic::riscv_masked_strided_load: {
4511     SDLoc DL(Op);
4512     MVT XLenVT = Subtarget.getXLenVT();
4513 
4514     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4515     // the selection of the masked intrinsics doesn't do this for us.
4516     SDValue Mask = Op.getOperand(5);
4517     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4518 
4519     MVT VT = Op->getSimpleValueType(0);
4520     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4521 
4522     SDValue PassThru = Op.getOperand(2);
4523     if (!IsUnmasked) {
4524       MVT MaskVT =
4525           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4526       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4527       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4528     }
4529 
4530     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4531 
4532     SDValue IntID = DAG.getTargetConstant(
4533         IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
4534         XLenVT);
4535 
4536     auto *Load = cast<MemIntrinsicSDNode>(Op);
4537     SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
4538     if (!IsUnmasked)
4539       Ops.push_back(PassThru);
4540     Ops.push_back(Op.getOperand(3)); // Ptr
4541     Ops.push_back(Op.getOperand(4)); // Stride
4542     if (!IsUnmasked)
4543       Ops.push_back(Mask);
4544     Ops.push_back(VL);
4545     if (!IsUnmasked) {
4546       SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
4547       Ops.push_back(Policy);
4548     }
4549 
4550     SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4551     SDValue Result =
4552         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
4553                                 Load->getMemoryVT(), Load->getMemOperand());
4554     SDValue Chain = Result.getValue(1);
4555     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4556     return DAG.getMergeValues({Result, Chain}, DL);
4557   }
4558   }
4559 
4560   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
4561 }
4562 
4563 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
4564                                                  SelectionDAG &DAG) const {
4565   unsigned IntNo = Op.getConstantOperandVal(1);
4566   switch (IntNo) {
4567   default:
4568     break;
4569   case Intrinsic::riscv_masked_strided_store: {
4570     SDLoc DL(Op);
4571     MVT XLenVT = Subtarget.getXLenVT();
4572 
4573     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4574     // the selection of the masked intrinsics doesn't do this for us.
4575     SDValue Mask = Op.getOperand(5);
4576     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4577 
4578     SDValue Val = Op.getOperand(2);
4579     MVT VT = Val.getSimpleValueType();
4580     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4581 
4582     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
4583     if (!IsUnmasked) {
4584       MVT MaskVT =
4585           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4586       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4587     }
4588 
4589     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4590 
4591     SDValue IntID = DAG.getTargetConstant(
4592         IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
4593         XLenVT);
4594 
4595     auto *Store = cast<MemIntrinsicSDNode>(Op);
4596     SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
4597     Ops.push_back(Val);
4598     Ops.push_back(Op.getOperand(3)); // Ptr
4599     Ops.push_back(Op.getOperand(4)); // Stride
4600     if (!IsUnmasked)
4601       Ops.push_back(Mask);
4602     Ops.push_back(VL);
4603 
4604     return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
4605                                    Ops, Store->getMemoryVT(),
4606                                    Store->getMemOperand());
4607   }
4608   }
4609 
4610   return SDValue();
4611 }
4612 
4613 static MVT getLMUL1VT(MVT VT) {
4614   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
4615          "Unexpected vector MVT");
4616   return MVT::getScalableVectorVT(
4617       VT.getVectorElementType(),
4618       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
4619 }
4620 
4621 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
4622   switch (ISDOpcode) {
4623   default:
4624     llvm_unreachable("Unhandled reduction");
4625   case ISD::VECREDUCE_ADD:
4626     return RISCVISD::VECREDUCE_ADD_VL;
4627   case ISD::VECREDUCE_UMAX:
4628     return RISCVISD::VECREDUCE_UMAX_VL;
4629   case ISD::VECREDUCE_SMAX:
4630     return RISCVISD::VECREDUCE_SMAX_VL;
4631   case ISD::VECREDUCE_UMIN:
4632     return RISCVISD::VECREDUCE_UMIN_VL;
4633   case ISD::VECREDUCE_SMIN:
4634     return RISCVISD::VECREDUCE_SMIN_VL;
4635   case ISD::VECREDUCE_AND:
4636     return RISCVISD::VECREDUCE_AND_VL;
4637   case ISD::VECREDUCE_OR:
4638     return RISCVISD::VECREDUCE_OR_VL;
4639   case ISD::VECREDUCE_XOR:
4640     return RISCVISD::VECREDUCE_XOR_VL;
4641   }
4642 }
4643 
4644 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
4645                                                          SelectionDAG &DAG,
4646                                                          bool IsVP) const {
4647   SDLoc DL(Op);
4648   SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
4649   MVT VecVT = Vec.getSimpleValueType();
4650   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
4651           Op.getOpcode() == ISD::VECREDUCE_OR ||
4652           Op.getOpcode() == ISD::VECREDUCE_XOR ||
4653           Op.getOpcode() == ISD::VP_REDUCE_AND ||
4654           Op.getOpcode() == ISD::VP_REDUCE_OR ||
4655           Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
4656          "Unexpected reduction lowering");
4657 
4658   MVT XLenVT = Subtarget.getXLenVT();
4659   assert(Op.getValueType() == XLenVT &&
4660          "Expected reduction output to be legalized to XLenVT");
4661 
4662   MVT ContainerVT = VecVT;
4663   if (VecVT.isFixedLengthVector()) {
4664     ContainerVT = getContainerForFixedLengthVector(VecVT);
4665     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4666   }
4667 
4668   SDValue Mask, VL;
4669   if (IsVP) {
4670     Mask = Op.getOperand(2);
4671     VL = Op.getOperand(3);
4672   } else {
4673     std::tie(Mask, VL) =
4674         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4675   }
4676 
4677   unsigned BaseOpc;
4678   ISD::CondCode CC;
4679   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
4680 
4681   switch (Op.getOpcode()) {
4682   default:
4683     llvm_unreachable("Unhandled reduction");
4684   case ISD::VECREDUCE_AND:
4685   case ISD::VP_REDUCE_AND: {
4686     // vcpop ~x == 0
4687     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4688     Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
4689     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
4690     CC = ISD::SETEQ;
4691     BaseOpc = ISD::AND;
4692     break;
4693   }
4694   case ISD::VECREDUCE_OR:
4695   case ISD::VP_REDUCE_OR:
4696     // vcpop x != 0
4697     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
4698     CC = ISD::SETNE;
4699     BaseOpc = ISD::OR;
4700     break;
4701   case ISD::VECREDUCE_XOR:
4702   case ISD::VP_REDUCE_XOR: {
4703     // ((vcpop x) & 1) != 0
4704     SDValue One = DAG.getConstant(1, DL, XLenVT);
4705     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
4706     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
4707     CC = ISD::SETNE;
4708     BaseOpc = ISD::XOR;
4709     break;
4710   }
4711   }
4712 
4713   SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
4714 
4715   if (!IsVP)
4716     return SetCC;
4717 
4718   // Now include the start value in the operation.
4719   // Note that we must return the start value when no elements are operated
4720   // upon. The vcpop instructions we've emitted in each case above will return
4721   // 0 for an inactive vector, and so we've already received the neutral value:
4722   // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
4723   // can simply include the start value.
4724   return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
4725 }
4726 
4727 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
4728                                             SelectionDAG &DAG) const {
4729   SDLoc DL(Op);
4730   SDValue Vec = Op.getOperand(0);
4731   EVT VecEVT = Vec.getValueType();
4732 
4733   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
4734 
4735   // Due to ordering in legalize types we may have a vector type that needs to
4736   // be split. Do that manually so we can get down to a legal type.
4737   while (getTypeAction(*DAG.getContext(), VecEVT) ==
4738          TargetLowering::TypeSplitVector) {
4739     SDValue Lo, Hi;
4740     std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
4741     VecEVT = Lo.getValueType();
4742     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
4743   }
4744 
4745   // TODO: The type may need to be widened rather than split. Or widened before
4746   // it can be split.
4747   if (!isTypeLegal(VecEVT))
4748     return SDValue();
4749 
4750   MVT VecVT = VecEVT.getSimpleVT();
4751   MVT VecEltVT = VecVT.getVectorElementType();
4752   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
4753 
4754   MVT ContainerVT = VecVT;
4755   if (VecVT.isFixedLengthVector()) {
4756     ContainerVT = getContainerForFixedLengthVector(VecVT);
4757     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4758   }
4759 
4760   MVT M1VT = getLMUL1VT(ContainerVT);
4761   MVT XLenVT = Subtarget.getXLenVT();
4762 
4763   SDValue Mask, VL;
4764   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4765 
4766   SDValue NeutralElem =
4767       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
4768   SDValue IdentitySplat = lowerScalarSplat(
4769       NeutralElem, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
4770   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
4771                                   IdentitySplat, Mask, VL);
4772   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
4773                              DAG.getConstant(0, DL, XLenVT));
4774   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
4775 }
4776 
4777 // Given a reduction op, this function returns the matching reduction opcode,
4778 // the vector SDValue and the scalar SDValue required to lower this to a
4779 // RISCVISD node.
4780 static std::tuple<unsigned, SDValue, SDValue>
4781 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
4782   SDLoc DL(Op);
4783   auto Flags = Op->getFlags();
4784   unsigned Opcode = Op.getOpcode();
4785   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
4786   switch (Opcode) {
4787   default:
4788     llvm_unreachable("Unhandled reduction");
4789   case ISD::VECREDUCE_FADD: {
4790     // Use positive zero if we can. It is cheaper to materialize.
4791     SDValue Zero =
4792         DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
4793     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
4794   }
4795   case ISD::VECREDUCE_SEQ_FADD:
4796     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
4797                            Op.getOperand(0));
4798   case ISD::VECREDUCE_FMIN:
4799     return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
4800                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
4801   case ISD::VECREDUCE_FMAX:
4802     return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
4803                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
4804   }
4805 }
4806 
4807 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
4808                                               SelectionDAG &DAG) const {
4809   SDLoc DL(Op);
4810   MVT VecEltVT = Op.getSimpleValueType();
4811 
4812   unsigned RVVOpcode;
4813   SDValue VectorVal, ScalarVal;
4814   std::tie(RVVOpcode, VectorVal, ScalarVal) =
4815       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
4816   MVT VecVT = VectorVal.getSimpleValueType();
4817 
4818   MVT ContainerVT = VecVT;
4819   if (VecVT.isFixedLengthVector()) {
4820     ContainerVT = getContainerForFixedLengthVector(VecVT);
4821     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
4822   }
4823 
4824   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
4825   MVT XLenVT = Subtarget.getXLenVT();
4826 
4827   SDValue Mask, VL;
4828   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4829 
4830   SDValue ScalarSplat = lowerScalarSplat(
4831       ScalarVal, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
4832   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
4833                                   VectorVal, ScalarSplat, Mask, VL);
4834   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
4835                      DAG.getConstant(0, DL, XLenVT));
4836 }
4837 
4838 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
4839   switch (ISDOpcode) {
4840   default:
4841     llvm_unreachable("Unhandled reduction");
4842   case ISD::VP_REDUCE_ADD:
4843     return RISCVISD::VECREDUCE_ADD_VL;
4844   case ISD::VP_REDUCE_UMAX:
4845     return RISCVISD::VECREDUCE_UMAX_VL;
4846   case ISD::VP_REDUCE_SMAX:
4847     return RISCVISD::VECREDUCE_SMAX_VL;
4848   case ISD::VP_REDUCE_UMIN:
4849     return RISCVISD::VECREDUCE_UMIN_VL;
4850   case ISD::VP_REDUCE_SMIN:
4851     return RISCVISD::VECREDUCE_SMIN_VL;
4852   case ISD::VP_REDUCE_AND:
4853     return RISCVISD::VECREDUCE_AND_VL;
4854   case ISD::VP_REDUCE_OR:
4855     return RISCVISD::VECREDUCE_OR_VL;
4856   case ISD::VP_REDUCE_XOR:
4857     return RISCVISD::VECREDUCE_XOR_VL;
4858   case ISD::VP_REDUCE_FADD:
4859     return RISCVISD::VECREDUCE_FADD_VL;
4860   case ISD::VP_REDUCE_SEQ_FADD:
4861     return RISCVISD::VECREDUCE_SEQ_FADD_VL;
4862   case ISD::VP_REDUCE_FMAX:
4863     return RISCVISD::VECREDUCE_FMAX_VL;
4864   case ISD::VP_REDUCE_FMIN:
4865     return RISCVISD::VECREDUCE_FMIN_VL;
4866   }
4867 }
4868 
4869 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
4870                                            SelectionDAG &DAG) const {
4871   SDLoc DL(Op);
4872   SDValue Vec = Op.getOperand(1);
4873   EVT VecEVT = Vec.getValueType();
4874 
4875   // TODO: The type may need to be widened rather than split. Or widened before
4876   // it can be split.
4877   if (!isTypeLegal(VecEVT))
4878     return SDValue();
4879 
4880   MVT VecVT = VecEVT.getSimpleVT();
4881   MVT VecEltVT = VecVT.getVectorElementType();
4882   unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
4883 
4884   MVT ContainerVT = VecVT;
4885   if (VecVT.isFixedLengthVector()) {
4886     ContainerVT = getContainerForFixedLengthVector(VecVT);
4887     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4888   }
4889 
4890   SDValue VL = Op.getOperand(3);
4891   SDValue Mask = Op.getOperand(2);
4892 
4893   MVT M1VT = getLMUL1VT(ContainerVT);
4894   MVT XLenVT = Subtarget.getXLenVT();
4895   MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
4896 
4897   SDValue StartSplat =
4898       lowerScalarSplat(Op.getOperand(0), DAG.getConstant(1, DL, XLenVT), M1VT,
4899                        DL, DAG, Subtarget);
4900   SDValue Reduction =
4901       DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
4902   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
4903                              DAG.getConstant(0, DL, XLenVT));
4904   if (!VecVT.isInteger())
4905     return Elt0;
4906   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
4907 }
4908 
4909 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
4910                                                    SelectionDAG &DAG) const {
4911   SDValue Vec = Op.getOperand(0);
4912   SDValue SubVec = Op.getOperand(1);
4913   MVT VecVT = Vec.getSimpleValueType();
4914   MVT SubVecVT = SubVec.getSimpleValueType();
4915 
4916   SDLoc DL(Op);
4917   MVT XLenVT = Subtarget.getXLenVT();
4918   unsigned OrigIdx = Op.getConstantOperandVal(2);
4919   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
4920 
4921   // We don't have the ability to slide mask vectors up indexed by their i1
4922   // elements; the smallest we can do is i8. Often we are able to bitcast to
4923   // equivalent i8 vectors. Note that when inserting a fixed-length vector
4924   // into a scalable one, we might not necessarily have enough scalable
4925   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
4926   if (SubVecVT.getVectorElementType() == MVT::i1 &&
4927       (OrigIdx != 0 || !Vec.isUndef())) {
4928     if (VecVT.getVectorMinNumElements() >= 8 &&
4929         SubVecVT.getVectorMinNumElements() >= 8) {
4930       assert(OrigIdx % 8 == 0 && "Invalid index");
4931       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
4932              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
4933              "Unexpected mask vector lowering");
4934       OrigIdx /= 8;
4935       SubVecVT =
4936           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
4937                            SubVecVT.isScalableVector());
4938       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
4939                                VecVT.isScalableVector());
4940       Vec = DAG.getBitcast(VecVT, Vec);
4941       SubVec = DAG.getBitcast(SubVecVT, SubVec);
4942     } else {
4943       // We can't slide this mask vector up indexed by its i1 elements.
4944       // This poses a problem when we wish to insert a scalable vector which
4945       // can't be re-expressed as a larger type. Just choose the slow path and
4946       // extend to a larger type, then truncate back down.
4947       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
4948       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
4949       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
4950       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
4951       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
4952                         Op.getOperand(2));
4953       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
4954       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
4955     }
4956   }
4957 
4958   // If the subvector vector is a fixed-length type, we cannot use subregister
4959   // manipulation to simplify the codegen; we don't know which register of a
4960   // LMUL group contains the specific subvector as we only know the minimum
4961   // register size. Therefore we must slide the vector group up the full
4962   // amount.
4963   if (SubVecVT.isFixedLengthVector()) {
4964     if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
4965       return Op;
4966     MVT ContainerVT = VecVT;
4967     if (VecVT.isFixedLengthVector()) {
4968       ContainerVT = getContainerForFixedLengthVector(VecVT);
4969       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4970     }
4971     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
4972                          DAG.getUNDEF(ContainerVT), SubVec,
4973                          DAG.getConstant(0, DL, XLenVT));
4974     if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
4975       SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
4976       return DAG.getBitcast(Op.getValueType(), SubVec);
4977     }
4978     SDValue Mask =
4979         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
4980     // Set the vector length to only the number of elements we care about. Note
4981     // that for slideup this includes the offset.
4982     SDValue VL =
4983         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
4984     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
4985     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
4986                                   SubVec, SlideupAmt, Mask, VL);
4987     if (VecVT.isFixedLengthVector())
4988       Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
4989     return DAG.getBitcast(Op.getValueType(), Slideup);
4990   }
4991 
4992   unsigned SubRegIdx, RemIdx;
4993   std::tie(SubRegIdx, RemIdx) =
4994       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
4995           VecVT, SubVecVT, OrigIdx, TRI);
4996 
4997   RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
4998   bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
4999                          SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
5000                          SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
5001 
5002   // 1. If the Idx has been completely eliminated and this subvector's size is
5003   // a vector register or a multiple thereof, or the surrounding elements are
5004   // undef, then this is a subvector insert which naturally aligns to a vector
5005   // register. These can easily be handled using subregister manipulation.
5006   // 2. If the subvector is smaller than a vector register, then the insertion
5007   // must preserve the undisturbed elements of the register. We do this by
5008   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
5009   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
5010   // subvector within the vector register, and an INSERT_SUBVECTOR of that
5011   // LMUL=1 type back into the larger vector (resolving to another subregister
5012   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
5013   // to avoid allocating a large register group to hold our subvector.
5014   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
5015     return Op;
5016 
5017   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
5018   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
5019   // (in our case undisturbed). This means we can set up a subvector insertion
5020   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
5021   // size of the subvector.
5022   MVT InterSubVT = VecVT;
5023   SDValue AlignedExtract = Vec;
5024   unsigned AlignedIdx = OrigIdx - RemIdx;
5025   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
5026     InterSubVT = getLMUL1VT(VecVT);
5027     // Extract a subvector equal to the nearest full vector register type. This
5028     // should resolve to a EXTRACT_SUBREG instruction.
5029     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
5030                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
5031   }
5032 
5033   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
5034   // For scalable vectors this must be further multiplied by vscale.
5035   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
5036 
5037   SDValue Mask, VL;
5038   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
5039 
5040   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
5041   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
5042   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
5043   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
5044 
5045   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
5046                        DAG.getUNDEF(InterSubVT), SubVec,
5047                        DAG.getConstant(0, DL, XLenVT));
5048 
5049   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
5050                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
5051 
5052   // If required, insert this subvector back into the correct vector register.
5053   // This should resolve to an INSERT_SUBREG instruction.
5054   if (VecVT.bitsGT(InterSubVT))
5055     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
5056                           DAG.getConstant(AlignedIdx, DL, XLenVT));
5057 
5058   // We might have bitcast from a mask type: cast back to the original type if
5059   // required.
5060   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
5061 }
5062 
5063 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
5064                                                     SelectionDAG &DAG) const {
5065   SDValue Vec = Op.getOperand(0);
5066   MVT SubVecVT = Op.getSimpleValueType();
5067   MVT VecVT = Vec.getSimpleValueType();
5068 
5069   SDLoc DL(Op);
5070   MVT XLenVT = Subtarget.getXLenVT();
5071   unsigned OrigIdx = Op.getConstantOperandVal(1);
5072   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
5073 
5074   // We don't have the ability to slide mask vectors down indexed by their i1
5075   // elements; the smallest we can do is i8. Often we are able to bitcast to
5076   // equivalent i8 vectors. Note that when extracting a fixed-length vector
5077   // from a scalable one, we might not necessarily have enough scalable
5078   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
5079   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
5080     if (VecVT.getVectorMinNumElements() >= 8 &&
5081         SubVecVT.getVectorMinNumElements() >= 8) {
5082       assert(OrigIdx % 8 == 0 && "Invalid index");
5083       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
5084              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
5085              "Unexpected mask vector lowering");
5086       OrigIdx /= 8;
5087       SubVecVT =
5088           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
5089                            SubVecVT.isScalableVector());
5090       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
5091                                VecVT.isScalableVector());
5092       Vec = DAG.getBitcast(VecVT, Vec);
5093     } else {
5094       // We can't slide this mask vector down, indexed by its i1 elements.
5095       // This poses a problem when we wish to extract a scalable vector which
5096       // can't be re-expressed as a larger type. Just choose the slow path and
5097       // extend to a larger type, then truncate back down.
5098       // TODO: We could probably improve this when extracting certain fixed
5099       // from fixed, where we can extract as i8 and shift the correct element
5100       // right to reach the desired subvector?
5101       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
5102       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
5103       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
5104       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
5105                         Op.getOperand(1));
5106       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
5107       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
5108     }
5109   }
5110 
5111   // If the subvector vector is a fixed-length type, we cannot use subregister
5112   // manipulation to simplify the codegen; we don't know which register of a
5113   // LMUL group contains the specific subvector as we only know the minimum
5114   // register size. Therefore we must slide the vector group down the full
5115   // amount.
5116   if (SubVecVT.isFixedLengthVector()) {
5117     // With an index of 0 this is a cast-like subvector, which can be performed
5118     // with subregister operations.
5119     if (OrigIdx == 0)
5120       return Op;
5121     MVT ContainerVT = VecVT;
5122     if (VecVT.isFixedLengthVector()) {
5123       ContainerVT = getContainerForFixedLengthVector(VecVT);
5124       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5125     }
5126     SDValue Mask =
5127         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
5128     // Set the vector length to only the number of elements we care about. This
5129     // avoids sliding down elements we're going to discard straight away.
5130     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
5131     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
5132     SDValue Slidedown =
5133         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
5134                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
5135     // Now we can use a cast-like subvector extract to get the result.
5136     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
5137                             DAG.getConstant(0, DL, XLenVT));
5138     return DAG.getBitcast(Op.getValueType(), Slidedown);
5139   }
5140 
5141   unsigned SubRegIdx, RemIdx;
5142   std::tie(SubRegIdx, RemIdx) =
5143       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
5144           VecVT, SubVecVT, OrigIdx, TRI);
5145 
5146   // If the Idx has been completely eliminated then this is a subvector extract
5147   // which naturally aligns to a vector register. These can easily be handled
5148   // using subregister manipulation.
5149   if (RemIdx == 0)
5150     return Op;
5151 
5152   // Else we must shift our vector register directly to extract the subvector.
5153   // Do this using VSLIDEDOWN.
5154 
5155   // If the vector type is an LMUL-group type, extract a subvector equal to the
5156   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
5157   // instruction.
5158   MVT InterSubVT = VecVT;
5159   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
5160     InterSubVT = getLMUL1VT(VecVT);
5161     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
5162                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
5163   }
5164 
5165   // Slide this vector register down by the desired number of elements in order
5166   // to place the desired subvector starting at element 0.
5167   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
5168   // For scalable vectors this must be further multiplied by vscale.
5169   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
5170 
5171   SDValue Mask, VL;
5172   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
5173   SDValue Slidedown =
5174       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
5175                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
5176 
5177   // Now the vector is in the right position, extract our final subvector. This
5178   // should resolve to a COPY.
5179   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
5180                           DAG.getConstant(0, DL, XLenVT));
5181 
5182   // We might have bitcast from a mask type: cast back to the original type if
5183   // required.
5184   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
5185 }
5186 
5187 // Lower step_vector to the vid instruction. Any non-identity step value must
5188 // be accounted for my manual expansion.
5189 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
5190                                               SelectionDAG &DAG) const {
5191   SDLoc DL(Op);
5192   MVT VT = Op.getSimpleValueType();
5193   MVT XLenVT = Subtarget.getXLenVT();
5194   SDValue Mask, VL;
5195   std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
5196   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
5197   uint64_t StepValImm = Op.getConstantOperandVal(0);
5198   if (StepValImm != 1) {
5199     if (isPowerOf2_64(StepValImm)) {
5200       SDValue StepVal =
5201           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
5202                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
5203       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
5204     } else {
5205       SDValue StepVal = lowerScalarSplat(
5206           DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT,
5207           DL, DAG, Subtarget);
5208       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
5209     }
5210   }
5211   return StepVec;
5212 }
5213 
5214 // Implement vector_reverse using vrgather.vv with indices determined by
5215 // subtracting the id of each element from (VLMAX-1). This will convert
5216 // the indices like so:
5217 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
5218 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
5219 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
5220                                                  SelectionDAG &DAG) const {
5221   SDLoc DL(Op);
5222   MVT VecVT = Op.getSimpleValueType();
5223   unsigned EltSize = VecVT.getScalarSizeInBits();
5224   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
5225 
5226   unsigned MaxVLMAX = 0;
5227   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
5228   if (VectorBitsMax != 0)
5229     MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
5230 
5231   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
5232   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
5233 
5234   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
5235   // to use vrgatherei16.vv.
5236   // TODO: It's also possible to use vrgatherei16.vv for other types to
5237   // decrease register width for the index calculation.
5238   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
5239     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
5240     // Reverse each half, then reassemble them in reverse order.
5241     // NOTE: It's also possible that after splitting that VLMAX no longer
5242     // requires vrgatherei16.vv.
5243     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
5244       SDValue Lo, Hi;
5245       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
5246       EVT LoVT, HiVT;
5247       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
5248       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
5249       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
5250       // Reassemble the low and high pieces reversed.
5251       // FIXME: This is a CONCAT_VECTORS.
5252       SDValue Res =
5253           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
5254                       DAG.getIntPtrConstant(0, DL));
5255       return DAG.getNode(
5256           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
5257           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
5258     }
5259 
5260     // Just promote the int type to i16 which will double the LMUL.
5261     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
5262     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
5263   }
5264 
5265   MVT XLenVT = Subtarget.getXLenVT();
5266   SDValue Mask, VL;
5267   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
5268 
5269   // Calculate VLMAX-1 for the desired SEW.
5270   unsigned MinElts = VecVT.getVectorMinNumElements();
5271   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
5272                               DAG.getConstant(MinElts, DL, XLenVT));
5273   SDValue VLMinus1 =
5274       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
5275 
5276   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
5277   bool IsRV32E64 =
5278       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
5279   SDValue SplatVL;
5280   if (!IsRV32E64)
5281     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
5282   else
5283     SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
5284 
5285   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
5286   SDValue Indices =
5287       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
5288 
5289   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
5290 }
5291 
5292 SDValue
5293 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
5294                                                      SelectionDAG &DAG) const {
5295   SDLoc DL(Op);
5296   auto *Load = cast<LoadSDNode>(Op);
5297 
5298   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5299                                         Load->getMemoryVT(),
5300                                         *Load->getMemOperand()) &&
5301          "Expecting a correctly-aligned load");
5302 
5303   MVT VT = Op.getSimpleValueType();
5304   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5305 
5306   SDValue VL =
5307       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
5308 
5309   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5310   SDValue NewLoad = DAG.getMemIntrinsicNode(
5311       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
5312       Load->getMemoryVT(), Load->getMemOperand());
5313 
5314   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5315   return DAG.getMergeValues({Result, Load->getChain()}, DL);
5316 }
5317 
5318 SDValue
5319 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
5320                                                       SelectionDAG &DAG) const {
5321   SDLoc DL(Op);
5322   auto *Store = cast<StoreSDNode>(Op);
5323 
5324   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5325                                         Store->getMemoryVT(),
5326                                         *Store->getMemOperand()) &&
5327          "Expecting a correctly-aligned store");
5328 
5329   SDValue StoreVal = Store->getValue();
5330   MVT VT = StoreVal.getSimpleValueType();
5331 
5332   // If the size less than a byte, we need to pad with zeros to make a byte.
5333   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
5334     VT = MVT::v8i1;
5335     StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
5336                            DAG.getConstant(0, DL, VT), StoreVal,
5337                            DAG.getIntPtrConstant(0, DL));
5338   }
5339 
5340   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5341 
5342   SDValue VL =
5343       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
5344 
5345   SDValue NewValue =
5346       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
5347   return DAG.getMemIntrinsicNode(
5348       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
5349       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
5350       Store->getMemoryVT(), Store->getMemOperand());
5351 }
5352 
5353 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
5354                                              SelectionDAG &DAG) const {
5355   SDLoc DL(Op);
5356   MVT VT = Op.getSimpleValueType();
5357 
5358   const auto *MemSD = cast<MemSDNode>(Op);
5359   EVT MemVT = MemSD->getMemoryVT();
5360   MachineMemOperand *MMO = MemSD->getMemOperand();
5361   SDValue Chain = MemSD->getChain();
5362   SDValue BasePtr = MemSD->getBasePtr();
5363 
5364   SDValue Mask, PassThru, VL;
5365   if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
5366     Mask = VPLoad->getMask();
5367     PassThru = DAG.getUNDEF(VT);
5368     VL = VPLoad->getVectorLength();
5369   } else {
5370     const auto *MLoad = cast<MaskedLoadSDNode>(Op);
5371     Mask = MLoad->getMask();
5372     PassThru = MLoad->getPassThru();
5373   }
5374 
5375   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5376 
5377   MVT XLenVT = Subtarget.getXLenVT();
5378 
5379   MVT ContainerVT = VT;
5380   if (VT.isFixedLengthVector()) {
5381     ContainerVT = getContainerForFixedLengthVector(VT);
5382     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
5383     if (!IsUnmasked) {
5384       MVT MaskVT =
5385           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5386       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5387     }
5388   }
5389 
5390   if (!VL)
5391     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5392 
5393   unsigned IntID =
5394       IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
5395   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5396   if (!IsUnmasked)
5397     Ops.push_back(PassThru);
5398   Ops.push_back(BasePtr);
5399   if (!IsUnmasked)
5400     Ops.push_back(Mask);
5401   Ops.push_back(VL);
5402   if (!IsUnmasked)
5403     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
5404 
5405   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5406 
5407   SDValue Result =
5408       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
5409   Chain = Result.getValue(1);
5410 
5411   if (VT.isFixedLengthVector())
5412     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
5413 
5414   return DAG.getMergeValues({Result, Chain}, DL);
5415 }
5416 
5417 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
5418                                               SelectionDAG &DAG) const {
5419   SDLoc DL(Op);
5420 
5421   const auto *MemSD = cast<MemSDNode>(Op);
5422   EVT MemVT = MemSD->getMemoryVT();
5423   MachineMemOperand *MMO = MemSD->getMemOperand();
5424   SDValue Chain = MemSD->getChain();
5425   SDValue BasePtr = MemSD->getBasePtr();
5426   SDValue Val, Mask, VL;
5427 
5428   if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
5429     Val = VPStore->getValue();
5430     Mask = VPStore->getMask();
5431     VL = VPStore->getVectorLength();
5432   } else {
5433     const auto *MStore = cast<MaskedStoreSDNode>(Op);
5434     Val = MStore->getValue();
5435     Mask = MStore->getMask();
5436   }
5437 
5438   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5439 
5440   MVT VT = Val.getSimpleValueType();
5441   MVT XLenVT = Subtarget.getXLenVT();
5442 
5443   MVT ContainerVT = VT;
5444   if (VT.isFixedLengthVector()) {
5445     ContainerVT = getContainerForFixedLengthVector(VT);
5446 
5447     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
5448     if (!IsUnmasked) {
5449       MVT MaskVT =
5450           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5451       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5452     }
5453   }
5454 
5455   if (!VL)
5456     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5457 
5458   unsigned IntID =
5459       IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
5460   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5461   Ops.push_back(Val);
5462   Ops.push_back(BasePtr);
5463   if (!IsUnmasked)
5464     Ops.push_back(Mask);
5465   Ops.push_back(VL);
5466 
5467   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
5468                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
5469 }
5470 
5471 SDValue
5472 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
5473                                                       SelectionDAG &DAG) const {
5474   MVT InVT = Op.getOperand(0).getSimpleValueType();
5475   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
5476 
5477   MVT VT = Op.getSimpleValueType();
5478 
5479   SDValue Op1 =
5480       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
5481   SDValue Op2 =
5482       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
5483 
5484   SDLoc DL(Op);
5485   SDValue VL =
5486       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
5487 
5488   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5489   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
5490 
5491   SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
5492                             Op.getOperand(2), Mask, VL);
5493 
5494   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
5495 }
5496 
5497 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
5498     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
5499   MVT VT = Op.getSimpleValueType();
5500 
5501   if (VT.getVectorElementType() == MVT::i1)
5502     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
5503 
5504   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
5505 }
5506 
5507 SDValue
5508 RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op,
5509                                                       SelectionDAG &DAG) const {
5510   unsigned Opc;
5511   switch (Op.getOpcode()) {
5512   default: llvm_unreachable("Unexpected opcode!");
5513   case ISD::SHL: Opc = RISCVISD::SHL_VL; break;
5514   case ISD::SRA: Opc = RISCVISD::SRA_VL; break;
5515   case ISD::SRL: Opc = RISCVISD::SRL_VL; break;
5516   }
5517 
5518   return lowerToScalableOp(Op, DAG, Opc);
5519 }
5520 
5521 // Lower vector ABS to smax(X, sub(0, X)).
5522 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
5523   SDLoc DL(Op);
5524   MVT VT = Op.getSimpleValueType();
5525   SDValue X = Op.getOperand(0);
5526 
5527   assert(VT.isFixedLengthVector() && "Unexpected type");
5528 
5529   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5530   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5531 
5532   SDValue Mask, VL;
5533   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5534 
5535   SDValue SplatZero =
5536       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
5537                   DAG.getConstant(0, DL, Subtarget.getXLenVT()));
5538   SDValue NegX =
5539       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
5540   SDValue Max =
5541       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
5542 
5543   return convertFromScalableVector(VT, Max, DAG, Subtarget);
5544 }
5545 
5546 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
5547     SDValue Op, SelectionDAG &DAG) const {
5548   SDLoc DL(Op);
5549   MVT VT = Op.getSimpleValueType();
5550   SDValue Mag = Op.getOperand(0);
5551   SDValue Sign = Op.getOperand(1);
5552   assert(Mag.getValueType() == Sign.getValueType() &&
5553          "Can only handle COPYSIGN with matching types.");
5554 
5555   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5556   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
5557   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
5558 
5559   SDValue Mask, VL;
5560   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5561 
5562   SDValue CopySign =
5563       DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
5564 
5565   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
5566 }
5567 
5568 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
5569     SDValue Op, SelectionDAG &DAG) const {
5570   MVT VT = Op.getSimpleValueType();
5571   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5572 
5573   MVT I1ContainerVT =
5574       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5575 
5576   SDValue CC =
5577       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
5578   SDValue Op1 =
5579       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
5580   SDValue Op2 =
5581       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
5582 
5583   SDLoc DL(Op);
5584   SDValue Mask, VL;
5585   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5586 
5587   SDValue Select =
5588       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
5589 
5590   return convertFromScalableVector(VT, Select, DAG, Subtarget);
5591 }
5592 
5593 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
5594                                                unsigned NewOpc,
5595                                                bool HasMask) const {
5596   MVT VT = Op.getSimpleValueType();
5597   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5598 
5599   // Create list of operands by converting existing ones to scalable types.
5600   SmallVector<SDValue, 6> Ops;
5601   for (const SDValue &V : Op->op_values()) {
5602     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
5603 
5604     // Pass through non-vector operands.
5605     if (!V.getValueType().isVector()) {
5606       Ops.push_back(V);
5607       continue;
5608     }
5609 
5610     // "cast" fixed length vector to a scalable vector.
5611     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
5612            "Only fixed length vectors are supported!");
5613     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
5614   }
5615 
5616   SDLoc DL(Op);
5617   SDValue Mask, VL;
5618   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5619   if (HasMask)
5620     Ops.push_back(Mask);
5621   Ops.push_back(VL);
5622 
5623   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
5624   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
5625 }
5626 
5627 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
5628 // * Operands of each node are assumed to be in the same order.
5629 // * The EVL operand is promoted from i32 to i64 on RV64.
5630 // * Fixed-length vectors are converted to their scalable-vector container
5631 //   types.
5632 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
5633                                        unsigned RISCVISDOpc) const {
5634   SDLoc DL(Op);
5635   MVT VT = Op.getSimpleValueType();
5636   SmallVector<SDValue, 4> Ops;
5637 
5638   for (const auto &OpIdx : enumerate(Op->ops())) {
5639     SDValue V = OpIdx.value();
5640     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
5641     // Pass through operands which aren't fixed-length vectors.
5642     if (!V.getValueType().isFixedLengthVector()) {
5643       Ops.push_back(V);
5644       continue;
5645     }
5646     // "cast" fixed length vector to a scalable vector.
5647     MVT OpVT = V.getSimpleValueType();
5648     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
5649     assert(useRVVForFixedLengthVectorVT(OpVT) &&
5650            "Only fixed length vectors are supported!");
5651     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
5652   }
5653 
5654   if (!VT.isFixedLengthVector())
5655     return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
5656 
5657   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5658 
5659   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
5660 
5661   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
5662 }
5663 
5664 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
5665                                             unsigned MaskOpc,
5666                                             unsigned VecOpc) const {
5667   MVT VT = Op.getSimpleValueType();
5668   if (VT.getVectorElementType() != MVT::i1)
5669     return lowerVPOp(Op, DAG, VecOpc);
5670 
5671   // It is safe to drop mask parameter as masked-off elements are undef.
5672   SDValue Op1 = Op->getOperand(0);
5673   SDValue Op2 = Op->getOperand(1);
5674   SDValue VL = Op->getOperand(3);
5675 
5676   MVT ContainerVT = VT;
5677   const bool IsFixed = VT.isFixedLengthVector();
5678   if (IsFixed) {
5679     ContainerVT = getContainerForFixedLengthVector(VT);
5680     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5681     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
5682   }
5683 
5684   SDLoc DL(Op);
5685   SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
5686   if (!IsFixed)
5687     return Val;
5688   return convertFromScalableVector(VT, Val, DAG, Subtarget);
5689 }
5690 
5691 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
5692 // matched to a RVV indexed load. The RVV indexed load instructions only
5693 // support the "unsigned unscaled" addressing mode; indices are implicitly
5694 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
5695 // signed or scaled indexing is extended to the XLEN value type and scaled
5696 // accordingly.
5697 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
5698                                                SelectionDAG &DAG) const {
5699   SDLoc DL(Op);
5700   MVT VT = Op.getSimpleValueType();
5701 
5702   const auto *MemSD = cast<MemSDNode>(Op.getNode());
5703   EVT MemVT = MemSD->getMemoryVT();
5704   MachineMemOperand *MMO = MemSD->getMemOperand();
5705   SDValue Chain = MemSD->getChain();
5706   SDValue BasePtr = MemSD->getBasePtr();
5707 
5708   ISD::LoadExtType LoadExtType;
5709   SDValue Index, Mask, PassThru, VL;
5710 
5711   if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
5712     Index = VPGN->getIndex();
5713     Mask = VPGN->getMask();
5714     PassThru = DAG.getUNDEF(VT);
5715     VL = VPGN->getVectorLength();
5716     // VP doesn't support extending loads.
5717     LoadExtType = ISD::NON_EXTLOAD;
5718   } else {
5719     // Else it must be a MGATHER.
5720     auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
5721     Index = MGN->getIndex();
5722     Mask = MGN->getMask();
5723     PassThru = MGN->getPassThru();
5724     LoadExtType = MGN->getExtensionType();
5725   }
5726 
5727   MVT IndexVT = Index.getSimpleValueType();
5728   MVT XLenVT = Subtarget.getXLenVT();
5729 
5730   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
5731          "Unexpected VTs!");
5732   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
5733   // Targets have to explicitly opt-in for extending vector loads.
5734   assert(LoadExtType == ISD::NON_EXTLOAD &&
5735          "Unexpected extending MGATHER/VP_GATHER");
5736   (void)LoadExtType;
5737 
5738   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
5739   // the selection of the masked intrinsics doesn't do this for us.
5740   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5741 
5742   MVT ContainerVT = VT;
5743   if (VT.isFixedLengthVector()) {
5744     // We need to use the larger of the result and index type to determine the
5745     // scalable type to use so we don't increase LMUL for any operand/result.
5746     if (VT.bitsGE(IndexVT)) {
5747       ContainerVT = getContainerForFixedLengthVector(VT);
5748       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
5749                                  ContainerVT.getVectorElementCount());
5750     } else {
5751       IndexVT = getContainerForFixedLengthVector(IndexVT);
5752       ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
5753                                      IndexVT.getVectorElementCount());
5754     }
5755 
5756     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
5757 
5758     if (!IsUnmasked) {
5759       MVT MaskVT =
5760           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5761       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5762       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
5763     }
5764   }
5765 
5766   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
5767       IndexVT = IndexVT.changeVectorElementType(XLenVT);
5768       Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
5769   }
5770 
5771   if (!VL)
5772     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5773 
5774   unsigned IntID =
5775       IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
5776   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5777   if (!IsUnmasked)
5778     Ops.push_back(PassThru);
5779   Ops.push_back(BasePtr);
5780   Ops.push_back(Index);
5781   if (!IsUnmasked)
5782     Ops.push_back(Mask);
5783   Ops.push_back(VL);
5784   if (!IsUnmasked)
5785     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
5786 
5787   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5788   SDValue Result =
5789       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
5790   Chain = Result.getValue(1);
5791 
5792   if (VT.isFixedLengthVector())
5793     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
5794 
5795   return DAG.getMergeValues({Result, Chain}, DL);
5796 }
5797 
5798 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
5799 // matched to a RVV indexed store. The RVV indexed store instructions only
5800 // support the "unsigned unscaled" addressing mode; indices are implicitly
5801 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
5802 // signed or scaled indexing is extended to the XLEN value type and scaled
5803 // accordingly.
5804 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
5805                                                 SelectionDAG &DAG) const {
5806   SDLoc DL(Op);
5807   const auto *MemSD = cast<MemSDNode>(Op.getNode());
5808   EVT MemVT = MemSD->getMemoryVT();
5809   MachineMemOperand *MMO = MemSD->getMemOperand();
5810   SDValue Chain = MemSD->getChain();
5811   SDValue BasePtr = MemSD->getBasePtr();
5812 
5813   bool IsTruncatingStore = false;
5814   SDValue Index, Mask, Val, VL;
5815 
5816   if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
5817     Index = VPSN->getIndex();
5818     Mask = VPSN->getMask();
5819     Val = VPSN->getValue();
5820     VL = VPSN->getVectorLength();
5821     // VP doesn't support truncating stores.
5822     IsTruncatingStore = false;
5823   } else {
5824     // Else it must be a MSCATTER.
5825     auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
5826     Index = MSN->getIndex();
5827     Mask = MSN->getMask();
5828     Val = MSN->getValue();
5829     IsTruncatingStore = MSN->isTruncatingStore();
5830   }
5831 
5832   MVT VT = Val.getSimpleValueType();
5833   MVT IndexVT = Index.getSimpleValueType();
5834   MVT XLenVT = Subtarget.getXLenVT();
5835 
5836   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
5837          "Unexpected VTs!");
5838   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
5839   // Targets have to explicitly opt-in for extending vector loads and
5840   // truncating vector stores.
5841   assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
5842   (void)IsTruncatingStore;
5843 
5844   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
5845   // the selection of the masked intrinsics doesn't do this for us.
5846   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5847 
5848   MVT ContainerVT = VT;
5849   if (VT.isFixedLengthVector()) {
5850     // We need to use the larger of the value and index type to determine the
5851     // scalable type to use so we don't increase LMUL for any operand/result.
5852     if (VT.bitsGE(IndexVT)) {
5853       ContainerVT = getContainerForFixedLengthVector(VT);
5854       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
5855                                  ContainerVT.getVectorElementCount());
5856     } else {
5857       IndexVT = getContainerForFixedLengthVector(IndexVT);
5858       ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
5859                                      IndexVT.getVectorElementCount());
5860     }
5861 
5862     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
5863     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
5864 
5865     if (!IsUnmasked) {
5866       MVT MaskVT =
5867           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5868       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5869     }
5870   }
5871 
5872   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
5873       IndexVT = IndexVT.changeVectorElementType(XLenVT);
5874       Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
5875   }
5876 
5877   if (!VL)
5878     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5879 
5880   unsigned IntID =
5881       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
5882   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5883   Ops.push_back(Val);
5884   Ops.push_back(BasePtr);
5885   Ops.push_back(Index);
5886   if (!IsUnmasked)
5887     Ops.push_back(Mask);
5888   Ops.push_back(VL);
5889 
5890   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
5891                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
5892 }
5893 
5894 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
5895                                                SelectionDAG &DAG) const {
5896   const MVT XLenVT = Subtarget.getXLenVT();
5897   SDLoc DL(Op);
5898   SDValue Chain = Op->getOperand(0);
5899   SDValue SysRegNo = DAG.getTargetConstant(
5900       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
5901   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
5902   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
5903 
5904   // Encoding used for rounding mode in RISCV differs from that used in
5905   // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
5906   // table, which consists of a sequence of 4-bit fields, each representing
5907   // corresponding FLT_ROUNDS mode.
5908   static const int Table =
5909       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
5910       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
5911       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
5912       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
5913       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
5914 
5915   SDValue Shift =
5916       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
5917   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
5918                                 DAG.getConstant(Table, DL, XLenVT), Shift);
5919   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
5920                                DAG.getConstant(7, DL, XLenVT));
5921 
5922   return DAG.getMergeValues({Masked, Chain}, DL);
5923 }
5924 
5925 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
5926                                                SelectionDAG &DAG) const {
5927   const MVT XLenVT = Subtarget.getXLenVT();
5928   SDLoc DL(Op);
5929   SDValue Chain = Op->getOperand(0);
5930   SDValue RMValue = Op->getOperand(1);
5931   SDValue SysRegNo = DAG.getTargetConstant(
5932       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
5933 
5934   // Encoding used for rounding mode in RISCV differs from that used in
5935   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
5936   // a table, which consists of a sequence of 4-bit fields, each representing
5937   // corresponding RISCV mode.
5938   static const unsigned Table =
5939       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
5940       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
5941       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
5942       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
5943       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
5944 
5945   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
5946                               DAG.getConstant(2, DL, XLenVT));
5947   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
5948                                 DAG.getConstant(Table, DL, XLenVT), Shift);
5949   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
5950                         DAG.getConstant(0x7, DL, XLenVT));
5951   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
5952                      RMValue);
5953 }
5954 
5955 static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) {
5956   switch (IntNo) {
5957   default:
5958     llvm_unreachable("Unexpected Intrinsic");
5959   case Intrinsic::riscv_grev:
5960     return RISCVISD::GREVW;
5961   case Intrinsic::riscv_gorc:
5962     return RISCVISD::GORCW;
5963   case Intrinsic::riscv_bcompress:
5964     return RISCVISD::BCOMPRESSW;
5965   case Intrinsic::riscv_bdecompress:
5966     return RISCVISD::BDECOMPRESSW;
5967   case Intrinsic::riscv_bfp:
5968     return RISCVISD::BFPW;
5969   case Intrinsic::riscv_fsl:
5970     return RISCVISD::FSLW;
5971   case Intrinsic::riscv_fsr:
5972     return RISCVISD::FSRW;
5973   }
5974 }
5975 
5976 // Converts the given intrinsic to a i64 operation with any extension.
5977 static SDValue customLegalizeToWOpByIntr(SDNode *N, SelectionDAG &DAG,
5978                                          unsigned IntNo) {
5979   SDLoc DL(N);
5980   RISCVISD::NodeType WOpcode = getRISCVWOpcodeByIntr(IntNo);
5981   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5982   SDValue NewOp2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
5983   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp1, NewOp2);
5984   // ReplaceNodeResults requires we maintain the same type for the return value.
5985   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
5986 }
5987 
5988 // Returns the opcode of the target-specific SDNode that implements the 32-bit
5989 // form of the given Opcode.
5990 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
5991   switch (Opcode) {
5992   default:
5993     llvm_unreachable("Unexpected opcode");
5994   case ISD::SHL:
5995     return RISCVISD::SLLW;
5996   case ISD::SRA:
5997     return RISCVISD::SRAW;
5998   case ISD::SRL:
5999     return RISCVISD::SRLW;
6000   case ISD::SDIV:
6001     return RISCVISD::DIVW;
6002   case ISD::UDIV:
6003     return RISCVISD::DIVUW;
6004   case ISD::UREM:
6005     return RISCVISD::REMUW;
6006   case ISD::ROTL:
6007     return RISCVISD::ROLW;
6008   case ISD::ROTR:
6009     return RISCVISD::RORW;
6010   case RISCVISD::GREV:
6011     return RISCVISD::GREVW;
6012   case RISCVISD::GORC:
6013     return RISCVISD::GORCW;
6014   }
6015 }
6016 
6017 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
6018 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
6019 // otherwise be promoted to i64, making it difficult to select the
6020 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
6021 // type i8/i16/i32 is lost.
6022 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
6023                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
6024   SDLoc DL(N);
6025   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
6026   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
6027   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
6028   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
6029   // ReplaceNodeResults requires we maintain the same type for the return value.
6030   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
6031 }
6032 
6033 // Converts the given 32-bit operation to a i64 operation with signed extension
6034 // semantic to reduce the signed extension instructions.
6035 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
6036   SDLoc DL(N);
6037   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6038   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6039   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
6040   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
6041                                DAG.getValueType(MVT::i32));
6042   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
6043 }
6044 
6045 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
6046                                              SmallVectorImpl<SDValue> &Results,
6047                                              SelectionDAG &DAG) const {
6048   SDLoc DL(N);
6049   switch (N->getOpcode()) {
6050   default:
6051     llvm_unreachable("Don't know how to custom type legalize this operation!");
6052   case ISD::STRICT_FP_TO_SINT:
6053   case ISD::STRICT_FP_TO_UINT:
6054   case ISD::FP_TO_SINT:
6055   case ISD::FP_TO_UINT: {
6056     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6057            "Unexpected custom legalisation");
6058     bool IsStrict = N->isStrictFPOpcode();
6059     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
6060                     N->getOpcode() == ISD::STRICT_FP_TO_SINT;
6061     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
6062     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
6063         TargetLowering::TypeSoftenFloat) {
6064       if (!isTypeLegal(Op0.getValueType()))
6065         return;
6066       if (IsStrict) {
6067         unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
6068                                 : RISCVISD::STRICT_FCVT_WU_RV64;
6069         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
6070         SDValue Res = DAG.getNode(
6071             Opc, DL, VTs, N->getOperand(0), Op0,
6072             DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
6073         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6074         Results.push_back(Res.getValue(1));
6075         return;
6076       }
6077       unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
6078       SDValue Res =
6079           DAG.getNode(Opc, DL, MVT::i64, Op0,
6080                       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
6081       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6082       return;
6083     }
6084     // If the FP type needs to be softened, emit a library call using the 'si'
6085     // version. If we left it to default legalization we'd end up with 'di'. If
6086     // the FP type doesn't need to be softened just let generic type
6087     // legalization promote the result type.
6088     RTLIB::Libcall LC;
6089     if (IsSigned)
6090       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
6091     else
6092       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
6093     MakeLibCallOptions CallOptions;
6094     EVT OpVT = Op0.getValueType();
6095     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
6096     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
6097     SDValue Result;
6098     std::tie(Result, Chain) =
6099         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
6100     Results.push_back(Result);
6101     if (IsStrict)
6102       Results.push_back(Chain);
6103     break;
6104   }
6105   case ISD::READCYCLECOUNTER: {
6106     assert(!Subtarget.is64Bit() &&
6107            "READCYCLECOUNTER only has custom type legalization on riscv32");
6108 
6109     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
6110     SDValue RCW =
6111         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
6112 
6113     Results.push_back(
6114         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
6115     Results.push_back(RCW.getValue(2));
6116     break;
6117   }
6118   case ISD::MUL: {
6119     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
6120     unsigned XLen = Subtarget.getXLen();
6121     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
6122     if (Size > XLen) {
6123       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
6124       SDValue LHS = N->getOperand(0);
6125       SDValue RHS = N->getOperand(1);
6126       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
6127 
6128       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
6129       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
6130       // We need exactly one side to be unsigned.
6131       if (LHSIsU == RHSIsU)
6132         return;
6133 
6134       auto MakeMULPair = [&](SDValue S, SDValue U) {
6135         MVT XLenVT = Subtarget.getXLenVT();
6136         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
6137         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
6138         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
6139         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
6140         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
6141       };
6142 
6143       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
6144       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
6145 
6146       // The other operand should be signed, but still prefer MULH when
6147       // possible.
6148       if (RHSIsU && LHSIsS && !RHSIsS)
6149         Results.push_back(MakeMULPair(LHS, RHS));
6150       else if (LHSIsU && RHSIsS && !LHSIsS)
6151         Results.push_back(MakeMULPair(RHS, LHS));
6152 
6153       return;
6154     }
6155     LLVM_FALLTHROUGH;
6156   }
6157   case ISD::ADD:
6158   case ISD::SUB:
6159     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6160            "Unexpected custom legalisation");
6161     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
6162     break;
6163   case ISD::SHL:
6164   case ISD::SRA:
6165   case ISD::SRL:
6166     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6167            "Unexpected custom legalisation");
6168     if (N->getOperand(1).getOpcode() != ISD::Constant) {
6169       Results.push_back(customLegalizeToWOp(N, DAG));
6170       break;
6171     }
6172 
6173     // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
6174     // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
6175     // shift amount.
6176     if (N->getOpcode() == ISD::SHL) {
6177       SDLoc DL(N);
6178       SDValue NewOp0 =
6179           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6180       SDValue NewOp1 =
6181           DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
6182       SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
6183       SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
6184                                    DAG.getValueType(MVT::i32));
6185       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6186     }
6187 
6188     break;
6189   case ISD::ROTL:
6190   case ISD::ROTR:
6191     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6192            "Unexpected custom legalisation");
6193     Results.push_back(customLegalizeToWOp(N, DAG));
6194     break;
6195   case ISD::CTTZ:
6196   case ISD::CTTZ_ZERO_UNDEF:
6197   case ISD::CTLZ:
6198   case ISD::CTLZ_ZERO_UNDEF: {
6199     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6200            "Unexpected custom legalisation");
6201 
6202     SDValue NewOp0 =
6203         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6204     bool IsCTZ =
6205         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
6206     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
6207     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
6208     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6209     return;
6210   }
6211   case ISD::SDIV:
6212   case ISD::UDIV:
6213   case ISD::UREM: {
6214     MVT VT = N->getSimpleValueType(0);
6215     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
6216            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
6217            "Unexpected custom legalisation");
6218     // Don't promote division/remainder by constant since we should expand those
6219     // to multiply by magic constant.
6220     // FIXME: What if the expansion is disabled for minsize.
6221     if (N->getOperand(1).getOpcode() == ISD::Constant)
6222       return;
6223 
6224     // If the input is i32, use ANY_EXTEND since the W instructions don't read
6225     // the upper 32 bits. For other types we need to sign or zero extend
6226     // based on the opcode.
6227     unsigned ExtOpc = ISD::ANY_EXTEND;
6228     if (VT != MVT::i32)
6229       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
6230                                            : ISD::ZERO_EXTEND;
6231 
6232     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
6233     break;
6234   }
6235   case ISD::UADDO:
6236   case ISD::USUBO: {
6237     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6238            "Unexpected custom legalisation");
6239     bool IsAdd = N->getOpcode() == ISD::UADDO;
6240     // Create an ADDW or SUBW.
6241     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6242     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6243     SDValue Res =
6244         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
6245     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
6246                       DAG.getValueType(MVT::i32));
6247 
6248     // Sign extend the LHS and perform an unsigned compare with the ADDW result.
6249     // Since the inputs are sign extended from i32, this is equivalent to
6250     // comparing the lower 32 bits.
6251     LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
6252     SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
6253                                     IsAdd ? ISD::SETULT : ISD::SETUGT);
6254 
6255     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6256     Results.push_back(Overflow);
6257     return;
6258   }
6259   case ISD::UADDSAT:
6260   case ISD::USUBSAT: {
6261     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6262            "Unexpected custom legalisation");
6263     if (Subtarget.hasStdExtZbb()) {
6264       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
6265       // sign extend allows overflow of the lower 32 bits to be detected on
6266       // the promoted size.
6267       SDValue LHS =
6268           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
6269       SDValue RHS =
6270           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
6271       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
6272       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6273       return;
6274     }
6275 
6276     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
6277     // promotion for UADDO/USUBO.
6278     Results.push_back(expandAddSubSat(N, DAG));
6279     return;
6280   }
6281   case ISD::BITCAST: {
6282     EVT VT = N->getValueType(0);
6283     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
6284     SDValue Op0 = N->getOperand(0);
6285     EVT Op0VT = Op0.getValueType();
6286     MVT XLenVT = Subtarget.getXLenVT();
6287     if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) {
6288       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
6289       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
6290     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
6291                Subtarget.hasStdExtF()) {
6292       SDValue FPConv =
6293           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
6294       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
6295     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
6296                isTypeLegal(Op0VT)) {
6297       // Custom-legalize bitcasts from fixed-length vector types to illegal
6298       // scalar types in order to improve codegen. Bitcast the vector to a
6299       // one-element vector type whose element type is the same as the result
6300       // type, and extract the first element.
6301       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6302       if (isTypeLegal(BVT)) {
6303         SDValue BVec = DAG.getBitcast(BVT, Op0);
6304         Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6305                                       DAG.getConstant(0, DL, XLenVT)));
6306       }
6307     }
6308     break;
6309   }
6310   case RISCVISD::GREV:
6311   case RISCVISD::GORC: {
6312     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6313            "Unexpected custom legalisation");
6314     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
6315     // This is similar to customLegalizeToWOp, except that we pass the second
6316     // operand (a TargetConstant) straight through: it is already of type
6317     // XLenVT.
6318     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
6319     SDValue NewOp0 =
6320         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6321     SDValue NewOp1 =
6322         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6323     SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
6324     // ReplaceNodeResults requires we maintain the same type for the return
6325     // value.
6326     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6327     break;
6328   }
6329   case RISCVISD::SHFL: {
6330     // There is no SHFLIW instruction, but we can just promote the operation.
6331     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6332            "Unexpected custom legalisation");
6333     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
6334     SDValue NewOp0 =
6335         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6336     SDValue NewOp1 =
6337         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6338     SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1);
6339     // ReplaceNodeResults requires we maintain the same type for the return
6340     // value.
6341     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6342     break;
6343   }
6344   case ISD::BSWAP:
6345   case ISD::BITREVERSE: {
6346     MVT VT = N->getSimpleValueType(0);
6347     MVT XLenVT = Subtarget.getXLenVT();
6348     assert((VT == MVT::i8 || VT == MVT::i16 ||
6349             (VT == MVT::i32 && Subtarget.is64Bit())) &&
6350            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
6351     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
6352     unsigned Imm = VT.getSizeInBits() - 1;
6353     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
6354     if (N->getOpcode() == ISD::BSWAP)
6355       Imm &= ~0x7U;
6356     unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV;
6357     SDValue GREVI =
6358         DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT));
6359     // ReplaceNodeResults requires we maintain the same type for the return
6360     // value.
6361     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI));
6362     break;
6363   }
6364   case ISD::FSHL:
6365   case ISD::FSHR: {
6366     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6367            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
6368     SDValue NewOp0 =
6369         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6370     SDValue NewOp1 =
6371         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6372     SDValue NewShAmt =
6373         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6374     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
6375     // Mask the shift amount to 5 bits to prevent accidentally setting bit 5.
6376     NewShAmt = DAG.getNode(ISD::AND, DL, MVT::i64, NewShAmt,
6377                            DAG.getConstant(0x1f, DL, MVT::i64));
6378     // fshl and fshr concatenate their operands in the same order. fsrw and fslw
6379     // instruction use different orders. fshl will return its first operand for
6380     // shift of zero, fshr will return its second operand. fsl and fsr both
6381     // return rs1 so the ISD nodes need to have different operand orders.
6382     // Shift amount is in rs2.
6383     unsigned Opc = RISCVISD::FSLW;
6384     if (N->getOpcode() == ISD::FSHR) {
6385       std::swap(NewOp0, NewOp1);
6386       Opc = RISCVISD::FSRW;
6387     }
6388     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewShAmt);
6389     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
6390     break;
6391   }
6392   case ISD::EXTRACT_VECTOR_ELT: {
6393     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
6394     // type is illegal (currently only vXi64 RV32).
6395     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
6396     // transferred to the destination register. We issue two of these from the
6397     // upper- and lower- halves of the SEW-bit vector element, slid down to the
6398     // first element.
6399     SDValue Vec = N->getOperand(0);
6400     SDValue Idx = N->getOperand(1);
6401 
6402     // The vector type hasn't been legalized yet so we can't issue target
6403     // specific nodes if it needs legalization.
6404     // FIXME: We would manually legalize if it's important.
6405     if (!isTypeLegal(Vec.getValueType()))
6406       return;
6407 
6408     MVT VecVT = Vec.getSimpleValueType();
6409 
6410     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
6411            VecVT.getVectorElementType() == MVT::i64 &&
6412            "Unexpected EXTRACT_VECTOR_ELT legalization");
6413 
6414     // If this is a fixed vector, we need to convert it to a scalable vector.
6415     MVT ContainerVT = VecVT;
6416     if (VecVT.isFixedLengthVector()) {
6417       ContainerVT = getContainerForFixedLengthVector(VecVT);
6418       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6419     }
6420 
6421     MVT XLenVT = Subtarget.getXLenVT();
6422 
6423     // Use a VL of 1 to avoid processing more elements than we need.
6424     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
6425     SDValue VL = DAG.getConstant(1, DL, XLenVT);
6426     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
6427 
6428     // Unless the index is known to be 0, we must slide the vector down to get
6429     // the desired element into index 0.
6430     if (!isNullConstant(Idx)) {
6431       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
6432                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
6433     }
6434 
6435     // Extract the lower XLEN bits of the correct vector element.
6436     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
6437 
6438     // To extract the upper XLEN bits of the vector element, shift the first
6439     // element right by 32 bits and re-extract the lower XLEN bits.
6440     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6441                                      DAG.getConstant(32, DL, XLenVT), VL);
6442     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
6443                                  ThirtyTwoV, Mask, VL);
6444 
6445     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
6446 
6447     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
6448     break;
6449   }
6450   case ISD::INTRINSIC_WO_CHAIN: {
6451     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
6452     switch (IntNo) {
6453     default:
6454       llvm_unreachable(
6455           "Don't know how to custom type legalize this intrinsic!");
6456     case Intrinsic::riscv_grev:
6457     case Intrinsic::riscv_gorc:
6458     case Intrinsic::riscv_bcompress:
6459     case Intrinsic::riscv_bdecompress:
6460     case Intrinsic::riscv_bfp: {
6461       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6462              "Unexpected custom legalisation");
6463       Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
6464       break;
6465     }
6466     case Intrinsic::riscv_fsl:
6467     case Intrinsic::riscv_fsr: {
6468       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6469              "Unexpected custom legalisation");
6470       SDValue NewOp1 =
6471           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6472       SDValue NewOp2 =
6473           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6474       SDValue NewOp3 =
6475           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3));
6476       unsigned Opc = getRISCVWOpcodeByIntr(IntNo);
6477       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2, NewOp3);
6478       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6479       break;
6480     }
6481     case Intrinsic::riscv_orc_b: {
6482       // Lower to the GORCI encoding for orc.b with the operand extended.
6483       SDValue NewOp =
6484           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6485       // If Zbp is enabled, use GORCIW which will sign extend the result.
6486       unsigned Opc =
6487           Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC;
6488       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp,
6489                                 DAG.getConstant(7, DL, MVT::i64));
6490       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6491       return;
6492     }
6493     case Intrinsic::riscv_shfl:
6494     case Intrinsic::riscv_unshfl: {
6495       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6496              "Unexpected custom legalisation");
6497       SDValue NewOp1 =
6498           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6499       SDValue NewOp2 =
6500           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
6501       unsigned Opc =
6502           IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
6503       if (isa<ConstantSDNode>(N->getOperand(2))) {
6504         NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
6505                              DAG.getConstant(0xf, DL, MVT::i64));
6506         Opc =
6507             IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
6508       }
6509       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
6510       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6511       break;
6512     }
6513     case Intrinsic::riscv_vmv_x_s: {
6514       EVT VT = N->getValueType(0);
6515       MVT XLenVT = Subtarget.getXLenVT();
6516       if (VT.bitsLT(XLenVT)) {
6517         // Simple case just extract using vmv.x.s and truncate.
6518         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
6519                                       Subtarget.getXLenVT(), N->getOperand(1));
6520         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
6521         return;
6522       }
6523 
6524       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
6525              "Unexpected custom legalization");
6526 
6527       // We need to do the move in two steps.
6528       SDValue Vec = N->getOperand(1);
6529       MVT VecVT = Vec.getSimpleValueType();
6530 
6531       // First extract the lower XLEN bits of the element.
6532       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
6533 
6534       // To extract the upper XLEN bits of the vector element, shift the first
6535       // element right by 32 bits and re-extract the lower XLEN bits.
6536       SDValue VL = DAG.getConstant(1, DL, XLenVT);
6537       MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
6538       SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
6539       SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
6540                                        DAG.getConstant(32, DL, XLenVT), VL);
6541       SDValue LShr32 =
6542           DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
6543       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
6544 
6545       Results.push_back(
6546           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
6547       break;
6548     }
6549     }
6550     break;
6551   }
6552   case ISD::VECREDUCE_ADD:
6553   case ISD::VECREDUCE_AND:
6554   case ISD::VECREDUCE_OR:
6555   case ISD::VECREDUCE_XOR:
6556   case ISD::VECREDUCE_SMAX:
6557   case ISD::VECREDUCE_UMAX:
6558   case ISD::VECREDUCE_SMIN:
6559   case ISD::VECREDUCE_UMIN:
6560     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
6561       Results.push_back(V);
6562     break;
6563   case ISD::VP_REDUCE_ADD:
6564   case ISD::VP_REDUCE_AND:
6565   case ISD::VP_REDUCE_OR:
6566   case ISD::VP_REDUCE_XOR:
6567   case ISD::VP_REDUCE_SMAX:
6568   case ISD::VP_REDUCE_UMAX:
6569   case ISD::VP_REDUCE_SMIN:
6570   case ISD::VP_REDUCE_UMIN:
6571     if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
6572       Results.push_back(V);
6573     break;
6574   case ISD::FLT_ROUNDS_: {
6575     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
6576     SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
6577     Results.push_back(Res.getValue(0));
6578     Results.push_back(Res.getValue(1));
6579     break;
6580   }
6581   }
6582 }
6583 
6584 // A structure to hold one of the bit-manipulation patterns below. Together, a
6585 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
6586 //   (or (and (shl x, 1), 0xAAAAAAAA),
6587 //       (and (srl x, 1), 0x55555555))
6588 struct RISCVBitmanipPat {
6589   SDValue Op;
6590   unsigned ShAmt;
6591   bool IsSHL;
6592 
6593   bool formsPairWith(const RISCVBitmanipPat &Other) const {
6594     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
6595   }
6596 };
6597 
6598 // Matches patterns of the form
6599 //   (and (shl x, C2), (C1 << C2))
6600 //   (and (srl x, C2), C1)
6601 //   (shl (and x, C1), C2)
6602 //   (srl (and x, (C1 << C2)), C2)
6603 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
6604 // The expected masks for each shift amount are specified in BitmanipMasks where
6605 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
6606 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
6607 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
6608 // XLen is 64.
6609 static Optional<RISCVBitmanipPat>
6610 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
6611   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
6612          "Unexpected number of masks");
6613   Optional<uint64_t> Mask;
6614   // Optionally consume a mask around the shift operation.
6615   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
6616     Mask = Op.getConstantOperandVal(1);
6617     Op = Op.getOperand(0);
6618   }
6619   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
6620     return None;
6621   bool IsSHL = Op.getOpcode() == ISD::SHL;
6622 
6623   if (!isa<ConstantSDNode>(Op.getOperand(1)))
6624     return None;
6625   uint64_t ShAmt = Op.getConstantOperandVal(1);
6626 
6627   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
6628   if (ShAmt >= Width || !isPowerOf2_64(ShAmt))
6629     return None;
6630   // If we don't have enough masks for 64 bit, then we must be trying to
6631   // match SHFL so we're only allowed to shift 1/4 of the width.
6632   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
6633     return None;
6634 
6635   SDValue Src = Op.getOperand(0);
6636 
6637   // The expected mask is shifted left when the AND is found around SHL
6638   // patterns.
6639   //   ((x >> 1) & 0x55555555)
6640   //   ((x << 1) & 0xAAAAAAAA)
6641   bool SHLExpMask = IsSHL;
6642 
6643   if (!Mask) {
6644     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
6645     // the mask is all ones: consume that now.
6646     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
6647       Mask = Src.getConstantOperandVal(1);
6648       Src = Src.getOperand(0);
6649       // The expected mask is now in fact shifted left for SRL, so reverse the
6650       // decision.
6651       //   ((x & 0xAAAAAAAA) >> 1)
6652       //   ((x & 0x55555555) << 1)
6653       SHLExpMask = !SHLExpMask;
6654     } else {
6655       // Use a default shifted mask of all-ones if there's no AND, truncated
6656       // down to the expected width. This simplifies the logic later on.
6657       Mask = maskTrailingOnes<uint64_t>(Width);
6658       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
6659     }
6660   }
6661 
6662   unsigned MaskIdx = Log2_32(ShAmt);
6663   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
6664 
6665   if (SHLExpMask)
6666     ExpMask <<= ShAmt;
6667 
6668   if (Mask != ExpMask)
6669     return None;
6670 
6671   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
6672 }
6673 
6674 // Matches any of the following bit-manipulation patterns:
6675 //   (and (shl x, 1), (0x55555555 << 1))
6676 //   (and (srl x, 1), 0x55555555)
6677 //   (shl (and x, 0x55555555), 1)
6678 //   (srl (and x, (0x55555555 << 1)), 1)
6679 // where the shift amount and mask may vary thus:
6680 //   [1]  = 0x55555555 / 0xAAAAAAAA
6681 //   [2]  = 0x33333333 / 0xCCCCCCCC
6682 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
6683 //   [8]  = 0x00FF00FF / 0xFF00FF00
6684 //   [16] = 0x0000FFFF / 0xFFFFFFFF
6685 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
6686 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
6687   // These are the unshifted masks which we use to match bit-manipulation
6688   // patterns. They may be shifted left in certain circumstances.
6689   static const uint64_t BitmanipMasks[] = {
6690       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
6691       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
6692 
6693   return matchRISCVBitmanipPat(Op, BitmanipMasks);
6694 }
6695 
6696 // Match the following pattern as a GREVI(W) operation
6697 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
6698 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
6699                                const RISCVSubtarget &Subtarget) {
6700   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6701   EVT VT = Op.getValueType();
6702 
6703   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
6704     auto LHS = matchGREVIPat(Op.getOperand(0));
6705     auto RHS = matchGREVIPat(Op.getOperand(1));
6706     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
6707       SDLoc DL(Op);
6708       return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op,
6709                          DAG.getConstant(LHS->ShAmt, DL, VT));
6710     }
6711   }
6712   return SDValue();
6713 }
6714 
6715 // Matches any the following pattern as a GORCI(W) operation
6716 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
6717 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
6718 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
6719 // Note that with the variant of 3.,
6720 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
6721 // the inner pattern will first be matched as GREVI and then the outer
6722 // pattern will be matched to GORC via the first rule above.
6723 // 4.  (or (rotl/rotr x, bitwidth/2), x)
6724 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
6725                                const RISCVSubtarget &Subtarget) {
6726   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6727   EVT VT = Op.getValueType();
6728 
6729   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
6730     SDLoc DL(Op);
6731     SDValue Op0 = Op.getOperand(0);
6732     SDValue Op1 = Op.getOperand(1);
6733 
6734     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
6735       if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X &&
6736           isa<ConstantSDNode>(Reverse.getOperand(1)) &&
6737           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
6738         return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1));
6739       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
6740       if ((Reverse.getOpcode() == ISD::ROTL ||
6741            Reverse.getOpcode() == ISD::ROTR) &&
6742           Reverse.getOperand(0) == X &&
6743           isa<ConstantSDNode>(Reverse.getOperand(1))) {
6744         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
6745         if (RotAmt == (VT.getSizeInBits() / 2))
6746           return DAG.getNode(RISCVISD::GORC, DL, VT, X,
6747                              DAG.getConstant(RotAmt, DL, VT));
6748       }
6749       return SDValue();
6750     };
6751 
6752     // Check for either commutable permutation of (or (GREVI x, shamt), x)
6753     if (SDValue V = MatchOROfReverse(Op0, Op1))
6754       return V;
6755     if (SDValue V = MatchOROfReverse(Op1, Op0))
6756       return V;
6757 
6758     // OR is commutable so canonicalize its OR operand to the left
6759     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
6760       std::swap(Op0, Op1);
6761     if (Op0.getOpcode() != ISD::OR)
6762       return SDValue();
6763     SDValue OrOp0 = Op0.getOperand(0);
6764     SDValue OrOp1 = Op0.getOperand(1);
6765     auto LHS = matchGREVIPat(OrOp0);
6766     // OR is commutable so swap the operands and try again: x might have been
6767     // on the left
6768     if (!LHS) {
6769       std::swap(OrOp0, OrOp1);
6770       LHS = matchGREVIPat(OrOp0);
6771     }
6772     auto RHS = matchGREVIPat(Op1);
6773     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
6774       return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op,
6775                          DAG.getConstant(LHS->ShAmt, DL, VT));
6776     }
6777   }
6778   return SDValue();
6779 }
6780 
6781 // Matches any of the following bit-manipulation patterns:
6782 //   (and (shl x, 1), (0x22222222 << 1))
6783 //   (and (srl x, 1), 0x22222222)
6784 //   (shl (and x, 0x22222222), 1)
6785 //   (srl (and x, (0x22222222 << 1)), 1)
6786 // where the shift amount and mask may vary thus:
6787 //   [1]  = 0x22222222 / 0x44444444
6788 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
6789 //   [4]  = 0x00F000F0 / 0x0F000F00
6790 //   [8]  = 0x0000FF00 / 0x00FF0000
6791 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
6792 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
6793   // These are the unshifted masks which we use to match bit-manipulation
6794   // patterns. They may be shifted left in certain circumstances.
6795   static const uint64_t BitmanipMasks[] = {
6796       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
6797       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
6798 
6799   return matchRISCVBitmanipPat(Op, BitmanipMasks);
6800 }
6801 
6802 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
6803 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
6804                                const RISCVSubtarget &Subtarget) {
6805   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6806   EVT VT = Op.getValueType();
6807 
6808   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
6809     return SDValue();
6810 
6811   SDValue Op0 = Op.getOperand(0);
6812   SDValue Op1 = Op.getOperand(1);
6813 
6814   // Or is commutable so canonicalize the second OR to the LHS.
6815   if (Op0.getOpcode() != ISD::OR)
6816     std::swap(Op0, Op1);
6817   if (Op0.getOpcode() != ISD::OR)
6818     return SDValue();
6819 
6820   // We found an inner OR, so our operands are the operands of the inner OR
6821   // and the other operand of the outer OR.
6822   SDValue A = Op0.getOperand(0);
6823   SDValue B = Op0.getOperand(1);
6824   SDValue C = Op1;
6825 
6826   auto Match1 = matchSHFLPat(A);
6827   auto Match2 = matchSHFLPat(B);
6828 
6829   // If neither matched, we failed.
6830   if (!Match1 && !Match2)
6831     return SDValue();
6832 
6833   // We had at least one match. if one failed, try the remaining C operand.
6834   if (!Match1) {
6835     std::swap(A, C);
6836     Match1 = matchSHFLPat(A);
6837     if (!Match1)
6838       return SDValue();
6839   } else if (!Match2) {
6840     std::swap(B, C);
6841     Match2 = matchSHFLPat(B);
6842     if (!Match2)
6843       return SDValue();
6844   }
6845   assert(Match1 && Match2);
6846 
6847   // Make sure our matches pair up.
6848   if (!Match1->formsPairWith(*Match2))
6849     return SDValue();
6850 
6851   // All the remains is to make sure C is an AND with the same input, that masks
6852   // out the bits that are being shuffled.
6853   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
6854       C.getOperand(0) != Match1->Op)
6855     return SDValue();
6856 
6857   uint64_t Mask = C.getConstantOperandVal(1);
6858 
6859   static const uint64_t BitmanipMasks[] = {
6860       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
6861       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
6862   };
6863 
6864   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
6865   unsigned MaskIdx = Log2_32(Match1->ShAmt);
6866   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
6867 
6868   if (Mask != ExpMask)
6869     return SDValue();
6870 
6871   SDLoc DL(Op);
6872   return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op,
6873                      DAG.getConstant(Match1->ShAmt, DL, VT));
6874 }
6875 
6876 // Optimize (add (shl x, c0), (shl y, c1)) ->
6877 //          (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
6878 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
6879                                   const RISCVSubtarget &Subtarget) {
6880   // Perform this optimization only in the zba extension.
6881   if (!Subtarget.hasStdExtZba())
6882     return SDValue();
6883 
6884   // Skip for vector types and larger types.
6885   EVT VT = N->getValueType(0);
6886   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
6887     return SDValue();
6888 
6889   // The two operand nodes must be SHL and have no other use.
6890   SDValue N0 = N->getOperand(0);
6891   SDValue N1 = N->getOperand(1);
6892   if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
6893       !N0->hasOneUse() || !N1->hasOneUse())
6894     return SDValue();
6895 
6896   // Check c0 and c1.
6897   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
6898   auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
6899   if (!N0C || !N1C)
6900     return SDValue();
6901   int64_t C0 = N0C->getSExtValue();
6902   int64_t C1 = N1C->getSExtValue();
6903   if (C0 <= 0 || C1 <= 0)
6904     return SDValue();
6905 
6906   // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
6907   int64_t Bits = std::min(C0, C1);
6908   int64_t Diff = std::abs(C0 - C1);
6909   if (Diff != 1 && Diff != 2 && Diff != 3)
6910     return SDValue();
6911 
6912   // Build nodes.
6913   SDLoc DL(N);
6914   SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
6915   SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
6916   SDValue NA0 =
6917       DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
6918   SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
6919   return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
6920 }
6921 
6922 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
6923 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
6924 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
6925 // not undo itself, but they are redundant.
6926 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
6927   SDValue Src = N->getOperand(0);
6928 
6929   if (Src.getOpcode() != N->getOpcode())
6930     return SDValue();
6931 
6932   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
6933       !isa<ConstantSDNode>(Src.getOperand(1)))
6934     return SDValue();
6935 
6936   unsigned ShAmt1 = N->getConstantOperandVal(1);
6937   unsigned ShAmt2 = Src.getConstantOperandVal(1);
6938   Src = Src.getOperand(0);
6939 
6940   unsigned CombinedShAmt;
6941   if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW)
6942     CombinedShAmt = ShAmt1 | ShAmt2;
6943   else
6944     CombinedShAmt = ShAmt1 ^ ShAmt2;
6945 
6946   if (CombinedShAmt == 0)
6947     return Src;
6948 
6949   SDLoc DL(N);
6950   return DAG.getNode(
6951       N->getOpcode(), DL, N->getValueType(0), Src,
6952       DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
6953 }
6954 
6955 // Combine a constant select operand into its use:
6956 //
6957 // (and (select cond, -1, c), x)
6958 //   -> (select cond, x, (and x, c))  [AllOnes=1]
6959 // (or  (select cond, 0, c), x)
6960 //   -> (select cond, x, (or x, c))  [AllOnes=0]
6961 // (xor (select cond, 0, c), x)
6962 //   -> (select cond, x, (xor x, c))  [AllOnes=0]
6963 // (add (select cond, 0, c), x)
6964 //   -> (select cond, x, (add x, c))  [AllOnes=0]
6965 // (sub x, (select cond, 0, c))
6966 //   -> (select cond, x, (sub x, c))  [AllOnes=0]
6967 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
6968                                    SelectionDAG &DAG, bool AllOnes) {
6969   EVT VT = N->getValueType(0);
6970 
6971   // Skip vectors.
6972   if (VT.isVector())
6973     return SDValue();
6974 
6975   if ((Slct.getOpcode() != ISD::SELECT &&
6976        Slct.getOpcode() != RISCVISD::SELECT_CC) ||
6977       !Slct.hasOneUse())
6978     return SDValue();
6979 
6980   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
6981     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
6982   };
6983 
6984   bool SwapSelectOps;
6985   unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
6986   SDValue TrueVal = Slct.getOperand(1 + OpOffset);
6987   SDValue FalseVal = Slct.getOperand(2 + OpOffset);
6988   SDValue NonConstantVal;
6989   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
6990     SwapSelectOps = false;
6991     NonConstantVal = FalseVal;
6992   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
6993     SwapSelectOps = true;
6994     NonConstantVal = TrueVal;
6995   } else
6996     return SDValue();
6997 
6998   // Slct is now know to be the desired identity constant when CC is true.
6999   TrueVal = OtherOp;
7000   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
7001   // Unless SwapSelectOps says the condition should be false.
7002   if (SwapSelectOps)
7003     std::swap(TrueVal, FalseVal);
7004 
7005   if (Slct.getOpcode() == RISCVISD::SELECT_CC)
7006     return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
7007                        {Slct.getOperand(0), Slct.getOperand(1),
7008                         Slct.getOperand(2), TrueVal, FalseVal});
7009 
7010   return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
7011                      {Slct.getOperand(0), TrueVal, FalseVal});
7012 }
7013 
7014 // Attempt combineSelectAndUse on each operand of a commutative operator N.
7015 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
7016                                               bool AllOnes) {
7017   SDValue N0 = N->getOperand(0);
7018   SDValue N1 = N->getOperand(1);
7019   if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes))
7020     return Result;
7021   if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes))
7022     return Result;
7023   return SDValue();
7024 }
7025 
7026 // Transform (add (mul x, c0), c1) ->
7027 //           (add (mul (add x, c1/c0), c0), c1%c0).
7028 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
7029 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
7030 // to an infinite loop in DAGCombine if transformed.
7031 // Or transform (add (mul x, c0), c1) ->
7032 //              (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
7033 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
7034 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
7035 // lead to an infinite loop in DAGCombine if transformed.
7036 // Or transform (add (mul x, c0), c1) ->
7037 //              (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
7038 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
7039 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
7040 // lead to an infinite loop in DAGCombine if transformed.
7041 // Or transform (add (mul x, c0), c1) ->
7042 //              (mul (add x, c1/c0), c0).
7043 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
7044 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
7045                                      const RISCVSubtarget &Subtarget) {
7046   // Skip for vector types and larger types.
7047   EVT VT = N->getValueType(0);
7048   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
7049     return SDValue();
7050   // The first operand node must be a MUL and has no other use.
7051   SDValue N0 = N->getOperand(0);
7052   if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
7053     return SDValue();
7054   // Check if c0 and c1 match above conditions.
7055   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
7056   auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
7057   if (!N0C || !N1C)
7058     return SDValue();
7059   int64_t C0 = N0C->getSExtValue();
7060   int64_t C1 = N1C->getSExtValue();
7061   int64_t CA, CB;
7062   if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
7063     return SDValue();
7064   // Search for proper CA (non-zero) and CB that both are simm12.
7065   if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
7066       !isInt<12>(C0 * (C1 / C0))) {
7067     CA = C1 / C0;
7068     CB = C1 % C0;
7069   } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
7070              isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
7071     CA = C1 / C0 + 1;
7072     CB = C1 % C0 - C0;
7073   } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
7074              isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
7075     CA = C1 / C0 - 1;
7076     CB = C1 % C0 + C0;
7077   } else
7078     return SDValue();
7079   // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
7080   SDLoc DL(N);
7081   SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
7082                              DAG.getConstant(CA, DL, VT));
7083   SDValue New1 =
7084       DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
7085   return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
7086 }
7087 
7088 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
7089                                  const RISCVSubtarget &Subtarget) {
7090   if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
7091     return V;
7092   if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
7093     return V;
7094   // fold (add (select lhs, rhs, cc, 0, y), x) ->
7095   //      (select lhs, rhs, cc, x, (add x, y))
7096   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7097 }
7098 
7099 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
7100   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
7101   //      (select lhs, rhs, cc, x, (sub x, y))
7102   SDValue N0 = N->getOperand(0);
7103   SDValue N1 = N->getOperand(1);
7104   return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
7105 }
7106 
7107 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) {
7108   // fold (and (select lhs, rhs, cc, -1, y), x) ->
7109   //      (select lhs, rhs, cc, x, (and x, y))
7110   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
7111 }
7112 
7113 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
7114                                 const RISCVSubtarget &Subtarget) {
7115   if (Subtarget.hasStdExtZbp()) {
7116     if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
7117       return GREV;
7118     if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget))
7119       return GORC;
7120     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget))
7121       return SHFL;
7122   }
7123 
7124   // fold (or (select cond, 0, y), x) ->
7125   //      (select cond, x, (or x, y))
7126   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7127 }
7128 
7129 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
7130   // fold (xor (select cond, 0, y), x) ->
7131   //      (select cond, x, (xor x, y))
7132   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7133 }
7134 
7135 // Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
7136 // has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free
7137 // by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be
7138 // removed during type legalization leaving an ADD/SUB/MUL use that won't use
7139 // ADDW/SUBW/MULW.
7140 static SDValue performANY_EXTENDCombine(SDNode *N,
7141                                         TargetLowering::DAGCombinerInfo &DCI,
7142                                         const RISCVSubtarget &Subtarget) {
7143   if (!Subtarget.is64Bit())
7144     return SDValue();
7145 
7146   SelectionDAG &DAG = DCI.DAG;
7147 
7148   SDValue Src = N->getOperand(0);
7149   EVT VT = N->getValueType(0);
7150   if (VT != MVT::i64 || Src.getValueType() != MVT::i32)
7151     return SDValue();
7152 
7153   // The opcode must be one that can implicitly sign_extend.
7154   // FIXME: Additional opcodes.
7155   switch (Src.getOpcode()) {
7156   default:
7157     return SDValue();
7158   case ISD::MUL:
7159     if (!Subtarget.hasStdExtM())
7160       return SDValue();
7161     LLVM_FALLTHROUGH;
7162   case ISD::ADD:
7163   case ISD::SUB:
7164     break;
7165   }
7166 
7167   // Only handle cases where the result is used by a CopyToReg. That likely
7168   // means the value is a liveout of the basic block. This helps prevent
7169   // infinite combine loops like PR51206.
7170   if (none_of(N->uses(),
7171               [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; }))
7172     return SDValue();
7173 
7174   SmallVector<SDNode *, 4> SetCCs;
7175   for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
7176                             UE = Src.getNode()->use_end();
7177        UI != UE; ++UI) {
7178     SDNode *User = *UI;
7179     if (User == N)
7180       continue;
7181     if (UI.getUse().getResNo() != Src.getResNo())
7182       continue;
7183     // All i32 setccs are legalized by sign extending operands.
7184     if (User->getOpcode() == ISD::SETCC) {
7185       SetCCs.push_back(User);
7186       continue;
7187     }
7188     // We don't know if we can extend this user.
7189     break;
7190   }
7191 
7192   // If we don't have any SetCCs, this isn't worthwhile.
7193   if (SetCCs.empty())
7194     return SDValue();
7195 
7196   SDLoc DL(N);
7197   SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src);
7198   DCI.CombineTo(N, SExt);
7199 
7200   // Promote all the setccs.
7201   for (SDNode *SetCC : SetCCs) {
7202     SmallVector<SDValue, 4> Ops;
7203 
7204     for (unsigned j = 0; j != 2; ++j) {
7205       SDValue SOp = SetCC->getOperand(j);
7206       if (SOp == Src)
7207         Ops.push_back(SExt);
7208       else
7209         Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp));
7210     }
7211 
7212     Ops.push_back(SetCC->getOperand(2));
7213     DCI.CombineTo(SetCC,
7214                   DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7215   }
7216   return SDValue(N, 0);
7217 }
7218 
7219 // Try to form VWMUL or VWMULU.
7220 // FIXME: Support VWMULSU.
7221 static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
7222                                     SelectionDAG &DAG) {
7223   assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
7224   bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
7225   bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
7226   if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
7227     return SDValue();
7228 
7229   SDValue Mask = N->getOperand(2);
7230   SDValue VL = N->getOperand(3);
7231 
7232   // Make sure the mask and VL match.
7233   if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)
7234     return SDValue();
7235 
7236   MVT VT = N->getSimpleValueType(0);
7237 
7238   // Determine the narrow size for a widening multiply.
7239   unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
7240   MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
7241                                   VT.getVectorElementCount());
7242 
7243   SDLoc DL(N);
7244 
7245   // See if the other operand is the same opcode.
7246   if (Op0.getOpcode() == Op1.getOpcode()) {
7247     if (!Op1.hasOneUse())
7248       return SDValue();
7249 
7250     // Make sure the mask and VL match.
7251     if (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
7252       return SDValue();
7253 
7254     Op1 = Op1.getOperand(0);
7255   } else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
7256     // The operand is a splat of a scalar.
7257 
7258     // The VL must be the same.
7259     if (Op1.getOperand(1) != VL)
7260       return SDValue();
7261 
7262     // Get the scalar value.
7263     Op1 = Op1.getOperand(0);
7264 
7265     // See if have enough sign bits or zero bits in the scalar to use a
7266     // widening multiply by splatting to smaller element size.
7267     unsigned EltBits = VT.getScalarSizeInBits();
7268     unsigned ScalarBits = Op1.getValueSizeInBits();
7269     // Make sure we're getting all element bits from the scalar register.
7270     // FIXME: Support implicit sign extension of vmv.v.x?
7271     if (ScalarBits < EltBits)
7272       return SDValue();
7273 
7274     if (IsSignExt) {
7275       if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
7276         return SDValue();
7277     } else {
7278       APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
7279       if (!DAG.MaskedValueIsZero(Op1, Mask))
7280         return SDValue();
7281     }
7282 
7283     Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL);
7284   } else
7285     return SDValue();
7286 
7287   Op0 = Op0.getOperand(0);
7288 
7289   // Re-introduce narrower extends if needed.
7290   unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
7291   if (Op0.getValueType() != NarrowVT)
7292     Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
7293   if (Op1.getValueType() != NarrowVT)
7294     Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
7295 
7296   unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
7297   return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
7298 }
7299 
7300 static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
7301   switch (Op.getOpcode()) {
7302   case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
7303   case ISD::FTRUNC:     return RISCVFPRndMode::RTZ;
7304   case ISD::FFLOOR:     return RISCVFPRndMode::RDN;
7305   case ISD::FCEIL:      return RISCVFPRndMode::RUP;
7306   case ISD::FROUND:     return RISCVFPRndMode::RMM;
7307   }
7308 
7309   return RISCVFPRndMode::Invalid;
7310 }
7311 
7312 // Fold
7313 //   (fp_to_int (froundeven X)) -> fcvt X, rne
7314 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
7315 //   (fp_to_int (ffloor X))     -> fcvt X, rdn
7316 //   (fp_to_int (fceil X))      -> fcvt X, rup
7317 //   (fp_to_int (fround X))     -> fcvt X, rmm
7318 static SDValue performFP_TO_INTCombine(SDNode *N,
7319                                        TargetLowering::DAGCombinerInfo &DCI,
7320                                        const RISCVSubtarget &Subtarget) {
7321   SelectionDAG &DAG = DCI.DAG;
7322   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7323   MVT XLenVT = Subtarget.getXLenVT();
7324 
7325   // Only handle XLen or i32 types. Other types narrower than XLen will
7326   // eventually be legalized to XLenVT.
7327   EVT VT = N->getValueType(0);
7328   if (VT != MVT::i32 && VT != XLenVT)
7329     return SDValue();
7330 
7331   SDValue Src = N->getOperand(0);
7332 
7333   // Ensure the FP type is also legal.
7334   if (!TLI.isTypeLegal(Src.getValueType()))
7335     return SDValue();
7336 
7337   // Don't do this for f16 with Zfhmin and not Zfh.
7338   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
7339     return SDValue();
7340 
7341   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
7342   if (FRM == RISCVFPRndMode::Invalid)
7343     return SDValue();
7344 
7345   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
7346 
7347   unsigned Opc;
7348   if (VT == XLenVT)
7349     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
7350   else
7351     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
7352 
7353   SDLoc DL(N);
7354   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
7355                                 DAG.getTargetConstant(FRM, DL, XLenVT));
7356   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
7357 }
7358 
7359 // Fold
7360 //   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
7361 //   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))
7362 //   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))
7363 //   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))
7364 //   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))
7365 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
7366                                        TargetLowering::DAGCombinerInfo &DCI,
7367                                        const RISCVSubtarget &Subtarget) {
7368   SelectionDAG &DAG = DCI.DAG;
7369   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7370   MVT XLenVT = Subtarget.getXLenVT();
7371 
7372   // Only handle XLen types. Other types narrower than XLen will eventually be
7373   // legalized to XLenVT.
7374   EVT DstVT = N->getValueType(0);
7375   if (DstVT != XLenVT)
7376     return SDValue();
7377 
7378   SDValue Src = N->getOperand(0);
7379 
7380   // Ensure the FP type is also legal.
7381   if (!TLI.isTypeLegal(Src.getValueType()))
7382     return SDValue();
7383 
7384   // Don't do this for f16 with Zfhmin and not Zfh.
7385   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
7386     return SDValue();
7387 
7388   EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7389 
7390   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
7391   if (FRM == RISCVFPRndMode::Invalid)
7392     return SDValue();
7393 
7394   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
7395 
7396   unsigned Opc;
7397   if (SatVT == DstVT)
7398     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
7399   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
7400     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
7401   else
7402     return SDValue();
7403   // FIXME: Support other SatVTs by clamping before or after the conversion.
7404 
7405   Src = Src.getOperand(0);
7406 
7407   SDLoc DL(N);
7408   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
7409                                 DAG.getTargetConstant(FRM, DL, XLenVT));
7410 
7411   // RISCV FP-to-int conversions saturate to the destination register size, but
7412   // don't produce 0 for nan.
7413   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
7414   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
7415 }
7416 
7417 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
7418                                                DAGCombinerInfo &DCI) const {
7419   SelectionDAG &DAG = DCI.DAG;
7420 
7421   // Helper to call SimplifyDemandedBits on an operand of N where only some low
7422   // bits are demanded. N will be added to the Worklist if it was not deleted.
7423   // Caller should return SDValue(N, 0) if this returns true.
7424   auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
7425     SDValue Op = N->getOperand(OpNo);
7426     APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
7427     if (!SimplifyDemandedBits(Op, Mask, DCI))
7428       return false;
7429 
7430     if (N->getOpcode() != ISD::DELETED_NODE)
7431       DCI.AddToWorklist(N);
7432     return true;
7433   };
7434 
7435   switch (N->getOpcode()) {
7436   default:
7437     break;
7438   case RISCVISD::SplitF64: {
7439     SDValue Op0 = N->getOperand(0);
7440     // If the input to SplitF64 is just BuildPairF64 then the operation is
7441     // redundant. Instead, use BuildPairF64's operands directly.
7442     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
7443       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
7444 
7445     SDLoc DL(N);
7446 
7447     // It's cheaper to materialise two 32-bit integers than to load a double
7448     // from the constant pool and transfer it to integer registers through the
7449     // stack.
7450     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
7451       APInt V = C->getValueAPF().bitcastToAPInt();
7452       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
7453       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
7454       return DCI.CombineTo(N, Lo, Hi);
7455     }
7456 
7457     // This is a target-specific version of a DAGCombine performed in
7458     // DAGCombiner::visitBITCAST. It performs the equivalent of:
7459     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7460     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7461     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
7462         !Op0.getNode()->hasOneUse())
7463       break;
7464     SDValue NewSplitF64 =
7465         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
7466                     Op0.getOperand(0));
7467     SDValue Lo = NewSplitF64.getValue(0);
7468     SDValue Hi = NewSplitF64.getValue(1);
7469     APInt SignBit = APInt::getSignMask(32);
7470     if (Op0.getOpcode() == ISD::FNEG) {
7471       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
7472                                   DAG.getConstant(SignBit, DL, MVT::i32));
7473       return DCI.CombineTo(N, Lo, NewHi);
7474     }
7475     assert(Op0.getOpcode() == ISD::FABS);
7476     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
7477                                 DAG.getConstant(~SignBit, DL, MVT::i32));
7478     return DCI.CombineTo(N, Lo, NewHi);
7479   }
7480   case RISCVISD::SLLW:
7481   case RISCVISD::SRAW:
7482   case RISCVISD::SRLW:
7483   case RISCVISD::ROLW:
7484   case RISCVISD::RORW: {
7485     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
7486     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7487         SimplifyDemandedLowBitsHelper(1, 5))
7488       return SDValue(N, 0);
7489     break;
7490   }
7491   case RISCVISD::CLZW:
7492   case RISCVISD::CTZW: {
7493     // Only the lower 32 bits of the first operand are read
7494     if (SimplifyDemandedLowBitsHelper(0, 32))
7495       return SDValue(N, 0);
7496     break;
7497   }
7498   case RISCVISD::GREV:
7499   case RISCVISD::GORC: {
7500     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
7501     unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
7502     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
7503     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth)))
7504       return SDValue(N, 0);
7505 
7506     return combineGREVI_GORCI(N, DAG);
7507   }
7508   case RISCVISD::GREVW:
7509   case RISCVISD::GORCW: {
7510     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
7511     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7512         SimplifyDemandedLowBitsHelper(1, 5))
7513       return SDValue(N, 0);
7514 
7515     return combineGREVI_GORCI(N, DAG);
7516   }
7517   case RISCVISD::SHFL:
7518   case RISCVISD::UNSHFL: {
7519     // Only the lower log2(Bitwidth)-1 bits of the the shift amount are read.
7520     unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
7521     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
7522     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) - 1))
7523       return SDValue(N, 0);
7524 
7525     break;
7526   }
7527   case RISCVISD::SHFLW:
7528   case RISCVISD::UNSHFLW: {
7529     // Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
7530     SDValue LHS = N->getOperand(0);
7531     SDValue RHS = N->getOperand(1);
7532     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
7533     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
7534     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7535         SimplifyDemandedLowBitsHelper(1, 4))
7536       return SDValue(N, 0);
7537 
7538     break;
7539   }
7540   case RISCVISD::BCOMPRESSW:
7541   case RISCVISD::BDECOMPRESSW: {
7542     // Only the lower 32 bits of LHS and RHS are read.
7543     if (SimplifyDemandedLowBitsHelper(0, 32) ||
7544         SimplifyDemandedLowBitsHelper(1, 32))
7545       return SDValue(N, 0);
7546 
7547     break;
7548   }
7549   case RISCVISD::FMV_X_ANYEXTH:
7550   case RISCVISD::FMV_X_ANYEXTW_RV64: {
7551     SDLoc DL(N);
7552     SDValue Op0 = N->getOperand(0);
7553     MVT VT = N->getSimpleValueType(0);
7554     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
7555     // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
7556     // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
7557     if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
7558          Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
7559         (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
7560          Op0->getOpcode() == RISCVISD::FMV_H_X)) {
7561       assert(Op0.getOperand(0).getValueType() == VT &&
7562              "Unexpected value type!");
7563       return Op0.getOperand(0);
7564     }
7565 
7566     // This is a target-specific version of a DAGCombine performed in
7567     // DAGCombiner::visitBITCAST. It performs the equivalent of:
7568     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7569     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7570     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
7571         !Op0.getNode()->hasOneUse())
7572       break;
7573     SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
7574     unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
7575     APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits());
7576     if (Op0.getOpcode() == ISD::FNEG)
7577       return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
7578                          DAG.getConstant(SignBit, DL, VT));
7579 
7580     assert(Op0.getOpcode() == ISD::FABS);
7581     return DAG.getNode(ISD::AND, DL, VT, NewFMV,
7582                        DAG.getConstant(~SignBit, DL, VT));
7583   }
7584   case ISD::ADD:
7585     return performADDCombine(N, DAG, Subtarget);
7586   case ISD::SUB:
7587     return performSUBCombine(N, DAG);
7588   case ISD::AND:
7589     return performANDCombine(N, DAG);
7590   case ISD::OR:
7591     return performORCombine(N, DAG, Subtarget);
7592   case ISD::XOR:
7593     return performXORCombine(N, DAG);
7594   case ISD::ANY_EXTEND:
7595     return performANY_EXTENDCombine(N, DCI, Subtarget);
7596   case ISD::ZERO_EXTEND:
7597     // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
7598     // type legalization. This is safe because fp_to_uint produces poison if
7599     // it overflows.
7600     if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
7601       SDValue Src = N->getOperand(0);
7602       if (Src.getOpcode() == ISD::FP_TO_UINT &&
7603           isTypeLegal(Src.getOperand(0).getValueType()))
7604         return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
7605                            Src.getOperand(0));
7606       if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
7607           isTypeLegal(Src.getOperand(1).getValueType())) {
7608         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
7609         SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
7610                                   Src.getOperand(0), Src.getOperand(1));
7611         DCI.CombineTo(N, Res);
7612         DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
7613         DCI.recursivelyDeleteUnusedNodes(Src.getNode());
7614         return SDValue(N, 0); // Return N so it doesn't get rechecked.
7615       }
7616     }
7617     return SDValue();
7618   case RISCVISD::SELECT_CC: {
7619     // Transform
7620     SDValue LHS = N->getOperand(0);
7621     SDValue RHS = N->getOperand(1);
7622     SDValue TrueV = N->getOperand(3);
7623     SDValue FalseV = N->getOperand(4);
7624 
7625     // If the True and False values are the same, we don't need a select_cc.
7626     if (TrueV == FalseV)
7627       return TrueV;
7628 
7629     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
7630     if (!ISD::isIntEqualitySetCC(CCVal))
7631       break;
7632 
7633     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
7634     //      (select_cc X, Y, lt, trueV, falseV)
7635     // Sometimes the setcc is introduced after select_cc has been formed.
7636     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
7637         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
7638       // If we're looking for eq 0 instead of ne 0, we need to invert the
7639       // condition.
7640       bool Invert = CCVal == ISD::SETEQ;
7641       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
7642       if (Invert)
7643         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7644 
7645       SDLoc DL(N);
7646       RHS = LHS.getOperand(1);
7647       LHS = LHS.getOperand(0);
7648       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7649 
7650       SDValue TargetCC = DAG.getCondCode(CCVal);
7651       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
7652                          {LHS, RHS, TargetCC, TrueV, FalseV});
7653     }
7654 
7655     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
7656     //      (select_cc X, Y, eq/ne, trueV, falseV)
7657     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
7658       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
7659                          {LHS.getOperand(0), LHS.getOperand(1),
7660                           N->getOperand(2), TrueV, FalseV});
7661     // (select_cc X, 1, setne, trueV, falseV) ->
7662     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
7663     // This can occur when legalizing some floating point comparisons.
7664     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
7665     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
7666       SDLoc DL(N);
7667       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7668       SDValue TargetCC = DAG.getCondCode(CCVal);
7669       RHS = DAG.getConstant(0, DL, LHS.getValueType());
7670       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
7671                          {LHS, RHS, TargetCC, TrueV, FalseV});
7672     }
7673 
7674     break;
7675   }
7676   case RISCVISD::BR_CC: {
7677     SDValue LHS = N->getOperand(1);
7678     SDValue RHS = N->getOperand(2);
7679     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
7680     if (!ISD::isIntEqualitySetCC(CCVal))
7681       break;
7682 
7683     // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
7684     //      (br_cc X, Y, lt, dest)
7685     // Sometimes the setcc is introduced after br_cc has been formed.
7686     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
7687         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
7688       // If we're looking for eq 0 instead of ne 0, we need to invert the
7689       // condition.
7690       bool Invert = CCVal == ISD::SETEQ;
7691       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
7692       if (Invert)
7693         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7694 
7695       SDLoc DL(N);
7696       RHS = LHS.getOperand(1);
7697       LHS = LHS.getOperand(0);
7698       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7699 
7700       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
7701                          N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
7702                          N->getOperand(4));
7703     }
7704 
7705     // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
7706     //      (br_cc X, Y, eq/ne, trueV, falseV)
7707     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
7708       return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
7709                          N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
7710                          N->getOperand(3), N->getOperand(4));
7711 
7712     // (br_cc X, 1, setne, br_cc) ->
7713     // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
7714     // This can occur when legalizing some floating point comparisons.
7715     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
7716     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
7717       SDLoc DL(N);
7718       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7719       SDValue TargetCC = DAG.getCondCode(CCVal);
7720       RHS = DAG.getConstant(0, DL, LHS.getValueType());
7721       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
7722                          N->getOperand(0), LHS, RHS, TargetCC,
7723                          N->getOperand(4));
7724     }
7725     break;
7726   }
7727   case ISD::FP_TO_SINT:
7728   case ISD::FP_TO_UINT:
7729     return performFP_TO_INTCombine(N, DCI, Subtarget);
7730   case ISD::FP_TO_SINT_SAT:
7731   case ISD::FP_TO_UINT_SAT:
7732     return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
7733   case ISD::FCOPYSIGN: {
7734     EVT VT = N->getValueType(0);
7735     if (!VT.isVector())
7736       break;
7737     // There is a form of VFSGNJ which injects the negated sign of its second
7738     // operand. Try and bubble any FNEG up after the extend/round to produce
7739     // this optimized pattern. Avoid modifying cases where FP_ROUND and
7740     // TRUNC=1.
7741     SDValue In2 = N->getOperand(1);
7742     // Avoid cases where the extend/round has multiple uses, as duplicating
7743     // those is typically more expensive than removing a fneg.
7744     if (!In2.hasOneUse())
7745       break;
7746     if (In2.getOpcode() != ISD::FP_EXTEND &&
7747         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
7748       break;
7749     In2 = In2.getOperand(0);
7750     if (In2.getOpcode() != ISD::FNEG)
7751       break;
7752     SDLoc DL(N);
7753     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
7754     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
7755                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
7756   }
7757   case ISD::MGATHER:
7758   case ISD::MSCATTER:
7759   case ISD::VP_GATHER:
7760   case ISD::VP_SCATTER: {
7761     if (!DCI.isBeforeLegalize())
7762       break;
7763     SDValue Index, ScaleOp;
7764     bool IsIndexScaled = false;
7765     bool IsIndexSigned = false;
7766     if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
7767       Index = VPGSN->getIndex();
7768       ScaleOp = VPGSN->getScale();
7769       IsIndexScaled = VPGSN->isIndexScaled();
7770       IsIndexSigned = VPGSN->isIndexSigned();
7771     } else {
7772       const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
7773       Index = MGSN->getIndex();
7774       ScaleOp = MGSN->getScale();
7775       IsIndexScaled = MGSN->isIndexScaled();
7776       IsIndexSigned = MGSN->isIndexSigned();
7777     }
7778     EVT IndexVT = Index.getValueType();
7779     MVT XLenVT = Subtarget.getXLenVT();
7780     // RISCV indexed loads only support the "unsigned unscaled" addressing
7781     // mode, so anything else must be manually legalized.
7782     bool NeedsIdxLegalization =
7783         IsIndexScaled ||
7784         (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
7785     if (!NeedsIdxLegalization)
7786       break;
7787 
7788     SDLoc DL(N);
7789 
7790     // Any index legalization should first promote to XLenVT, so we don't lose
7791     // bits when scaling. This may create an illegal index type so we let
7792     // LLVM's legalization take care of the splitting.
7793     // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
7794     if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
7795       IndexVT = IndexVT.changeVectorElementType(XLenVT);
7796       Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
7797                           DL, IndexVT, Index);
7798     }
7799 
7800     unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
7801     if (IsIndexScaled && Scale != 1) {
7802       // Manually scale the indices by the element size.
7803       // TODO: Sanitize the scale operand here?
7804       // TODO: For VP nodes, should we use VP_SHL here?
7805       assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
7806       SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
7807       Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
7808     }
7809 
7810     ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
7811     if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
7812       return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
7813                              {VPGN->getChain(), VPGN->getBasePtr(), Index,
7814                               VPGN->getScale(), VPGN->getMask(),
7815                               VPGN->getVectorLength()},
7816                              VPGN->getMemOperand(), NewIndexTy);
7817     if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
7818       return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
7819                               {VPSN->getChain(), VPSN->getValue(),
7820                                VPSN->getBasePtr(), Index, VPSN->getScale(),
7821                                VPSN->getMask(), VPSN->getVectorLength()},
7822                               VPSN->getMemOperand(), NewIndexTy);
7823     if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
7824       return DAG.getMaskedGather(
7825           N->getVTList(), MGN->getMemoryVT(), DL,
7826           {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
7827            MGN->getBasePtr(), Index, MGN->getScale()},
7828           MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
7829     const auto *MSN = cast<MaskedScatterSDNode>(N);
7830     return DAG.getMaskedScatter(
7831         N->getVTList(), MSN->getMemoryVT(), DL,
7832         {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
7833          Index, MSN->getScale()},
7834         MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
7835   }
7836   case RISCVISD::SRA_VL:
7837   case RISCVISD::SRL_VL:
7838   case RISCVISD::SHL_VL: {
7839     SDValue ShAmt = N->getOperand(1);
7840     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
7841       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
7842       SDLoc DL(N);
7843       SDValue VL = N->getOperand(3);
7844       EVT VT = N->getValueType(0);
7845       ShAmt =
7846           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL);
7847       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
7848                          N->getOperand(2), N->getOperand(3));
7849     }
7850     break;
7851   }
7852   case ISD::SRA:
7853   case ISD::SRL:
7854   case ISD::SHL: {
7855     SDValue ShAmt = N->getOperand(1);
7856     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
7857       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
7858       SDLoc DL(N);
7859       EVT VT = N->getValueType(0);
7860       ShAmt =
7861           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0));
7862       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
7863     }
7864     break;
7865   }
7866   case RISCVISD::MUL_VL: {
7867     SDValue Op0 = N->getOperand(0);
7868     SDValue Op1 = N->getOperand(1);
7869     if (SDValue V = combineMUL_VLToVWMUL(N, Op0, Op1, DAG))
7870       return V;
7871     if (SDValue V = combineMUL_VLToVWMUL(N, Op1, Op0, DAG))
7872       return V;
7873     return SDValue();
7874   }
7875   case ISD::STORE: {
7876     auto *Store = cast<StoreSDNode>(N);
7877     SDValue Val = Store->getValue();
7878     // Combine store of vmv.x.s to vse with VL of 1.
7879     // FIXME: Support FP.
7880     if (Val.getOpcode() == RISCVISD::VMV_X_S) {
7881       SDValue Src = Val.getOperand(0);
7882       EVT VecVT = Src.getValueType();
7883       EVT MemVT = Store->getMemoryVT();
7884       // The memory VT and the element type must match.
7885       if (VecVT.getVectorElementType() == MemVT) {
7886         SDLoc DL(N);
7887         MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
7888         return DAG.getStoreVP(
7889             Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
7890             DAG.getConstant(1, DL, MaskVT),
7891             DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
7892             Store->getMemOperand(), Store->getAddressingMode(),
7893             Store->isTruncatingStore(), /*IsCompress*/ false);
7894       }
7895     }
7896 
7897     break;
7898   }
7899   }
7900 
7901   return SDValue();
7902 }
7903 
7904 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
7905     const SDNode *N, CombineLevel Level) const {
7906   // The following folds are only desirable if `(OP _, c1 << c2)` can be
7907   // materialised in fewer instructions than `(OP _, c1)`:
7908   //
7909   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7910   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7911   SDValue N0 = N->getOperand(0);
7912   EVT Ty = N0.getValueType();
7913   if (Ty.isScalarInteger() &&
7914       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
7915     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
7916     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
7917     if (C1 && C2) {
7918       const APInt &C1Int = C1->getAPIntValue();
7919       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
7920 
7921       // We can materialise `c1 << c2` into an add immediate, so it's "free",
7922       // and the combine should happen, to potentially allow further combines
7923       // later.
7924       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
7925           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
7926         return true;
7927 
7928       // We can materialise `c1` in an add immediate, so it's "free", and the
7929       // combine should be prevented.
7930       if (C1Int.getMinSignedBits() <= 64 &&
7931           isLegalAddImmediate(C1Int.getSExtValue()))
7932         return false;
7933 
7934       // Neither constant will fit into an immediate, so find materialisation
7935       // costs.
7936       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
7937                                               Subtarget.getFeatureBits(),
7938                                               /*CompressionCost*/true);
7939       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
7940           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
7941           /*CompressionCost*/true);
7942 
7943       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
7944       // combine should be prevented.
7945       if (C1Cost < ShiftedC1Cost)
7946         return false;
7947     }
7948   }
7949   return true;
7950 }
7951 
7952 bool RISCVTargetLowering::targetShrinkDemandedConstant(
7953     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
7954     TargetLoweringOpt &TLO) const {
7955   // Delay this optimization as late as possible.
7956   if (!TLO.LegalOps)
7957     return false;
7958 
7959   EVT VT = Op.getValueType();
7960   if (VT.isVector())
7961     return false;
7962 
7963   // Only handle AND for now.
7964   if (Op.getOpcode() != ISD::AND)
7965     return false;
7966 
7967   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
7968   if (!C)
7969     return false;
7970 
7971   const APInt &Mask = C->getAPIntValue();
7972 
7973   // Clear all non-demanded bits initially.
7974   APInt ShrunkMask = Mask & DemandedBits;
7975 
7976   // Try to make a smaller immediate by setting undemanded bits.
7977 
7978   APInt ExpandedMask = Mask | ~DemandedBits;
7979 
7980   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
7981     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
7982   };
7983   auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
7984     if (NewMask == Mask)
7985       return true;
7986     SDLoc DL(Op);
7987     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
7988     SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
7989     return TLO.CombineTo(Op, NewOp);
7990   };
7991 
7992   // If the shrunk mask fits in sign extended 12 bits, let the target
7993   // independent code apply it.
7994   if (ShrunkMask.isSignedIntN(12))
7995     return false;
7996 
7997   // Preserve (and X, 0xffff) when zext.h is supported.
7998   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
7999     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
8000     if (IsLegalMask(NewMask))
8001       return UseMask(NewMask);
8002   }
8003 
8004   // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
8005   if (VT == MVT::i64) {
8006     APInt NewMask = APInt(64, 0xffffffff);
8007     if (IsLegalMask(NewMask))
8008       return UseMask(NewMask);
8009   }
8010 
8011   // For the remaining optimizations, we need to be able to make a negative
8012   // number through a combination of mask and undemanded bits.
8013   if (!ExpandedMask.isNegative())
8014     return false;
8015 
8016   // What is the fewest number of bits we need to represent the negative number.
8017   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
8018 
8019   // Try to make a 12 bit negative immediate. If that fails try to make a 32
8020   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
8021   APInt NewMask = ShrunkMask;
8022   if (MinSignedBits <= 12)
8023     NewMask.setBitsFrom(11);
8024   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
8025     NewMask.setBitsFrom(31);
8026   else
8027     return false;
8028 
8029   // Check that our new mask is a subset of the demanded mask.
8030   assert(IsLegalMask(NewMask));
8031   return UseMask(NewMask);
8032 }
8033 
8034 static void computeGREV(APInt &Src, unsigned ShAmt) {
8035   ShAmt &= Src.getBitWidth() - 1;
8036   uint64_t x = Src.getZExtValue();
8037   if (ShAmt & 1)
8038     x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
8039   if (ShAmt & 2)
8040     x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2);
8041   if (ShAmt & 4)
8042     x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4);
8043   if (ShAmt & 8)
8044     x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8);
8045   if (ShAmt & 16)
8046     x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16);
8047   if (ShAmt & 32)
8048     x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32);
8049   Src = x;
8050 }
8051 
8052 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
8053                                                         KnownBits &Known,
8054                                                         const APInt &DemandedElts,
8055                                                         const SelectionDAG &DAG,
8056                                                         unsigned Depth) const {
8057   unsigned BitWidth = Known.getBitWidth();
8058   unsigned Opc = Op.getOpcode();
8059   assert((Opc >= ISD::BUILTIN_OP_END ||
8060           Opc == ISD::INTRINSIC_WO_CHAIN ||
8061           Opc == ISD::INTRINSIC_W_CHAIN ||
8062           Opc == ISD::INTRINSIC_VOID) &&
8063          "Should use MaskedValueIsZero if you don't know whether Op"
8064          " is a target node!");
8065 
8066   Known.resetAll();
8067   switch (Opc) {
8068   default: break;
8069   case RISCVISD::SELECT_CC: {
8070     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
8071     // If we don't know any bits, early out.
8072     if (Known.isUnknown())
8073       break;
8074     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
8075 
8076     // Only known if known in both the LHS and RHS.
8077     Known = KnownBits::commonBits(Known, Known2);
8078     break;
8079   }
8080   case RISCVISD::REMUW: {
8081     KnownBits Known2;
8082     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
8083     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
8084     // We only care about the lower 32 bits.
8085     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
8086     // Restore the original width by sign extending.
8087     Known = Known.sext(BitWidth);
8088     break;
8089   }
8090   case RISCVISD::DIVUW: {
8091     KnownBits Known2;
8092     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
8093     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
8094     // We only care about the lower 32 bits.
8095     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
8096     // Restore the original width by sign extending.
8097     Known = Known.sext(BitWidth);
8098     break;
8099   }
8100   case RISCVISD::CTZW: {
8101     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
8102     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
8103     unsigned LowBits = Log2_32(PossibleTZ) + 1;
8104     Known.Zero.setBitsFrom(LowBits);
8105     break;
8106   }
8107   case RISCVISD::CLZW: {
8108     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
8109     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
8110     unsigned LowBits = Log2_32(PossibleLZ) + 1;
8111     Known.Zero.setBitsFrom(LowBits);
8112     break;
8113   }
8114   case RISCVISD::GREV:
8115   case RISCVISD::GREVW: {
8116     if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8117       Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
8118       if (Opc == RISCVISD::GREVW)
8119         Known = Known.trunc(32);
8120       unsigned ShAmt = C->getZExtValue();
8121       computeGREV(Known.Zero, ShAmt);
8122       computeGREV(Known.One, ShAmt);
8123       if (Opc == RISCVISD::GREVW)
8124         Known = Known.sext(BitWidth);
8125     }
8126     break;
8127   }
8128   case RISCVISD::READ_VLENB:
8129     // We assume VLENB is at least 16 bytes.
8130     Known.Zero.setLowBits(4);
8131     // We assume VLENB is no more than 65536 / 8 bytes.
8132     Known.Zero.setBitsFrom(14);
8133     break;
8134   case ISD::INTRINSIC_W_CHAIN: {
8135     unsigned IntNo = Op.getConstantOperandVal(1);
8136     switch (IntNo) {
8137     default:
8138       // We can't do anything for most intrinsics.
8139       break;
8140     case Intrinsic::riscv_vsetvli:
8141     case Intrinsic::riscv_vsetvlimax:
8142       // Assume that VL output is positive and would fit in an int32_t.
8143       // TODO: VLEN might be capped at 16 bits in a future V spec update.
8144       if (BitWidth >= 32)
8145         Known.Zero.setBitsFrom(31);
8146       break;
8147     }
8148     break;
8149   }
8150   }
8151 }
8152 
8153 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
8154     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8155     unsigned Depth) const {
8156   switch (Op.getOpcode()) {
8157   default:
8158     break;
8159   case RISCVISD::SELECT_CC: {
8160     unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
8161     if (Tmp == 1) return 1;  // Early out.
8162     unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
8163     return std::min(Tmp, Tmp2);
8164   }
8165   case RISCVISD::SLLW:
8166   case RISCVISD::SRAW:
8167   case RISCVISD::SRLW:
8168   case RISCVISD::DIVW:
8169   case RISCVISD::DIVUW:
8170   case RISCVISD::REMUW:
8171   case RISCVISD::ROLW:
8172   case RISCVISD::RORW:
8173   case RISCVISD::GREVW:
8174   case RISCVISD::GORCW:
8175   case RISCVISD::FSLW:
8176   case RISCVISD::FSRW:
8177   case RISCVISD::SHFLW:
8178   case RISCVISD::UNSHFLW:
8179   case RISCVISD::BCOMPRESSW:
8180   case RISCVISD::BDECOMPRESSW:
8181   case RISCVISD::BFPW:
8182   case RISCVISD::FCVT_W_RV64:
8183   case RISCVISD::FCVT_WU_RV64:
8184   case RISCVISD::STRICT_FCVT_W_RV64:
8185   case RISCVISD::STRICT_FCVT_WU_RV64:
8186     // TODO: As the result is sign-extended, this is conservatively correct. A
8187     // more precise answer could be calculated for SRAW depending on known
8188     // bits in the shift amount.
8189     return 33;
8190   case RISCVISD::SHFL:
8191   case RISCVISD::UNSHFL: {
8192     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
8193     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
8194     // will stay within the upper 32 bits. If there were more than 32 sign bits
8195     // before there will be at least 33 sign bits after.
8196     if (Op.getValueType() == MVT::i64 &&
8197         isa<ConstantSDNode>(Op.getOperand(1)) &&
8198         (Op.getConstantOperandVal(1) & 0x10) == 0) {
8199       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
8200       if (Tmp > 32)
8201         return 33;
8202     }
8203     break;
8204   }
8205   case RISCVISD::VMV_X_S:
8206     // The number of sign bits of the scalar result is computed by obtaining the
8207     // element type of the input vector operand, subtracting its width from the
8208     // XLEN, and then adding one (sign bit within the element type). If the
8209     // element type is wider than XLen, the least-significant XLEN bits are
8210     // taken.
8211     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
8212       return 1;
8213     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
8214   }
8215 
8216   return 1;
8217 }
8218 
8219 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
8220                                                   MachineBasicBlock *BB) {
8221   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
8222 
8223   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
8224   // Should the count have wrapped while it was being read, we need to try
8225   // again.
8226   // ...
8227   // read:
8228   // rdcycleh x3 # load high word of cycle
8229   // rdcycle  x2 # load low word of cycle
8230   // rdcycleh x4 # load high word of cycle
8231   // bne x3, x4, read # check if high word reads match, otherwise try again
8232   // ...
8233 
8234   MachineFunction &MF = *BB->getParent();
8235   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8236   MachineFunction::iterator It = ++BB->getIterator();
8237 
8238   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
8239   MF.insert(It, LoopMBB);
8240 
8241   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
8242   MF.insert(It, DoneMBB);
8243 
8244   // Transfer the remainder of BB and its successor edges to DoneMBB.
8245   DoneMBB->splice(DoneMBB->begin(), BB,
8246                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
8247   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
8248 
8249   BB->addSuccessor(LoopMBB);
8250 
8251   MachineRegisterInfo &RegInfo = MF.getRegInfo();
8252   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
8253   Register LoReg = MI.getOperand(0).getReg();
8254   Register HiReg = MI.getOperand(1).getReg();
8255   DebugLoc DL = MI.getDebugLoc();
8256 
8257   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8258   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
8259       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
8260       .addReg(RISCV::X0);
8261   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
8262       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
8263       .addReg(RISCV::X0);
8264   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
8265       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
8266       .addReg(RISCV::X0);
8267 
8268   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
8269       .addReg(HiReg)
8270       .addReg(ReadAgainReg)
8271       .addMBB(LoopMBB);
8272 
8273   LoopMBB->addSuccessor(LoopMBB);
8274   LoopMBB->addSuccessor(DoneMBB);
8275 
8276   MI.eraseFromParent();
8277 
8278   return DoneMBB;
8279 }
8280 
8281 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
8282                                              MachineBasicBlock *BB) {
8283   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
8284 
8285   MachineFunction &MF = *BB->getParent();
8286   DebugLoc DL = MI.getDebugLoc();
8287   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
8288   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
8289   Register LoReg = MI.getOperand(0).getReg();
8290   Register HiReg = MI.getOperand(1).getReg();
8291   Register SrcReg = MI.getOperand(2).getReg();
8292   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
8293   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
8294 
8295   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
8296                           RI);
8297   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
8298   MachineMemOperand *MMOLo =
8299       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
8300   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
8301       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
8302   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
8303       .addFrameIndex(FI)
8304       .addImm(0)
8305       .addMemOperand(MMOLo);
8306   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
8307       .addFrameIndex(FI)
8308       .addImm(4)
8309       .addMemOperand(MMOHi);
8310   MI.eraseFromParent(); // The pseudo instruction is gone now.
8311   return BB;
8312 }
8313 
8314 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
8315                                                  MachineBasicBlock *BB) {
8316   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
8317          "Unexpected instruction");
8318 
8319   MachineFunction &MF = *BB->getParent();
8320   DebugLoc DL = MI.getDebugLoc();
8321   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
8322   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
8323   Register DstReg = MI.getOperand(0).getReg();
8324   Register LoReg = MI.getOperand(1).getReg();
8325   Register HiReg = MI.getOperand(2).getReg();
8326   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
8327   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
8328 
8329   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
8330   MachineMemOperand *MMOLo =
8331       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
8332   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
8333       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
8334   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
8335       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
8336       .addFrameIndex(FI)
8337       .addImm(0)
8338       .addMemOperand(MMOLo);
8339   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
8340       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
8341       .addFrameIndex(FI)
8342       .addImm(4)
8343       .addMemOperand(MMOHi);
8344   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
8345   MI.eraseFromParent(); // The pseudo instruction is gone now.
8346   return BB;
8347 }
8348 
8349 static bool isSelectPseudo(MachineInstr &MI) {
8350   switch (MI.getOpcode()) {
8351   default:
8352     return false;
8353   case RISCV::Select_GPR_Using_CC_GPR:
8354   case RISCV::Select_FPR16_Using_CC_GPR:
8355   case RISCV::Select_FPR32_Using_CC_GPR:
8356   case RISCV::Select_FPR64_Using_CC_GPR:
8357     return true;
8358   }
8359 }
8360 
8361 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
8362                                         unsigned RelOpcode, unsigned EqOpcode,
8363                                         const RISCVSubtarget &Subtarget) {
8364   DebugLoc DL = MI.getDebugLoc();
8365   Register DstReg = MI.getOperand(0).getReg();
8366   Register Src1Reg = MI.getOperand(1).getReg();
8367   Register Src2Reg = MI.getOperand(2).getReg();
8368   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
8369   Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
8370   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
8371 
8372   // Save the current FFLAGS.
8373   BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
8374 
8375   auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
8376                  .addReg(Src1Reg)
8377                  .addReg(Src2Reg);
8378   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
8379     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
8380 
8381   // Restore the FFLAGS.
8382   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
8383       .addReg(SavedFFlags, RegState::Kill);
8384 
8385   // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
8386   auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
8387                   .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
8388                   .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
8389   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
8390     MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
8391 
8392   // Erase the pseudoinstruction.
8393   MI.eraseFromParent();
8394   return BB;
8395 }
8396 
8397 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
8398                                            MachineBasicBlock *BB,
8399                                            const RISCVSubtarget &Subtarget) {
8400   // To "insert" Select_* instructions, we actually have to insert the triangle
8401   // control-flow pattern.  The incoming instructions know the destination vreg
8402   // to set, the condition code register to branch on, the true/false values to
8403   // select between, and the condcode to use to select the appropriate branch.
8404   //
8405   // We produce the following control flow:
8406   //     HeadMBB
8407   //     |  \
8408   //     |  IfFalseMBB
8409   //     | /
8410   //    TailMBB
8411   //
8412   // When we find a sequence of selects we attempt to optimize their emission
8413   // by sharing the control flow. Currently we only handle cases where we have
8414   // multiple selects with the exact same condition (same LHS, RHS and CC).
8415   // The selects may be interleaved with other instructions if the other
8416   // instructions meet some requirements we deem safe:
8417   // - They are debug instructions. Otherwise,
8418   // - They do not have side-effects, do not access memory and their inputs do
8419   //   not depend on the results of the select pseudo-instructions.
8420   // The TrueV/FalseV operands of the selects cannot depend on the result of
8421   // previous selects in the sequence.
8422   // These conditions could be further relaxed. See the X86 target for a
8423   // related approach and more information.
8424   Register LHS = MI.getOperand(1).getReg();
8425   Register RHS = MI.getOperand(2).getReg();
8426   auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
8427 
8428   SmallVector<MachineInstr *, 4> SelectDebugValues;
8429   SmallSet<Register, 4> SelectDests;
8430   SelectDests.insert(MI.getOperand(0).getReg());
8431 
8432   MachineInstr *LastSelectPseudo = &MI;
8433 
8434   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
8435        SequenceMBBI != E; ++SequenceMBBI) {
8436     if (SequenceMBBI->isDebugInstr())
8437       continue;
8438     else if (isSelectPseudo(*SequenceMBBI)) {
8439       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
8440           SequenceMBBI->getOperand(2).getReg() != RHS ||
8441           SequenceMBBI->getOperand(3).getImm() != CC ||
8442           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
8443           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
8444         break;
8445       LastSelectPseudo = &*SequenceMBBI;
8446       SequenceMBBI->collectDebugValues(SelectDebugValues);
8447       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
8448     } else {
8449       if (SequenceMBBI->hasUnmodeledSideEffects() ||
8450           SequenceMBBI->mayLoadOrStore())
8451         break;
8452       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
8453             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
8454           }))
8455         break;
8456     }
8457   }
8458 
8459   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
8460   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8461   DebugLoc DL = MI.getDebugLoc();
8462   MachineFunction::iterator I = ++BB->getIterator();
8463 
8464   MachineBasicBlock *HeadMBB = BB;
8465   MachineFunction *F = BB->getParent();
8466   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
8467   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
8468 
8469   F->insert(I, IfFalseMBB);
8470   F->insert(I, TailMBB);
8471 
8472   // Transfer debug instructions associated with the selects to TailMBB.
8473   for (MachineInstr *DebugInstr : SelectDebugValues) {
8474     TailMBB->push_back(DebugInstr->removeFromParent());
8475   }
8476 
8477   // Move all instructions after the sequence to TailMBB.
8478   TailMBB->splice(TailMBB->end(), HeadMBB,
8479                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
8480   // Update machine-CFG edges by transferring all successors of the current
8481   // block to the new block which will contain the Phi nodes for the selects.
8482   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
8483   // Set the successors for HeadMBB.
8484   HeadMBB->addSuccessor(IfFalseMBB);
8485   HeadMBB->addSuccessor(TailMBB);
8486 
8487   // Insert appropriate branch.
8488   BuildMI(HeadMBB, DL, TII.getBrCond(CC))
8489     .addReg(LHS)
8490     .addReg(RHS)
8491     .addMBB(TailMBB);
8492 
8493   // IfFalseMBB just falls through to TailMBB.
8494   IfFalseMBB->addSuccessor(TailMBB);
8495 
8496   // Create PHIs for all of the select pseudo-instructions.
8497   auto SelectMBBI = MI.getIterator();
8498   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
8499   auto InsertionPoint = TailMBB->begin();
8500   while (SelectMBBI != SelectEnd) {
8501     auto Next = std::next(SelectMBBI);
8502     if (isSelectPseudo(*SelectMBBI)) {
8503       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
8504       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
8505               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
8506           .addReg(SelectMBBI->getOperand(4).getReg())
8507           .addMBB(HeadMBB)
8508           .addReg(SelectMBBI->getOperand(5).getReg())
8509           .addMBB(IfFalseMBB);
8510       SelectMBBI->eraseFromParent();
8511     }
8512     SelectMBBI = Next;
8513   }
8514 
8515   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
8516   return TailMBB;
8517 }
8518 
8519 MachineBasicBlock *
8520 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
8521                                                  MachineBasicBlock *BB) const {
8522   switch (MI.getOpcode()) {
8523   default:
8524     llvm_unreachable("Unexpected instr type to insert");
8525   case RISCV::ReadCycleWide:
8526     assert(!Subtarget.is64Bit() &&
8527            "ReadCycleWrite is only to be used on riscv32");
8528     return emitReadCycleWidePseudo(MI, BB);
8529   case RISCV::Select_GPR_Using_CC_GPR:
8530   case RISCV::Select_FPR16_Using_CC_GPR:
8531   case RISCV::Select_FPR32_Using_CC_GPR:
8532   case RISCV::Select_FPR64_Using_CC_GPR:
8533     return emitSelectPseudo(MI, BB, Subtarget);
8534   case RISCV::BuildPairF64Pseudo:
8535     return emitBuildPairF64Pseudo(MI, BB);
8536   case RISCV::SplitF64Pseudo:
8537     return emitSplitF64Pseudo(MI, BB);
8538   case RISCV::PseudoQuietFLE_H:
8539     return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
8540   case RISCV::PseudoQuietFLT_H:
8541     return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
8542   case RISCV::PseudoQuietFLE_S:
8543     return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
8544   case RISCV::PseudoQuietFLT_S:
8545     return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
8546   case RISCV::PseudoQuietFLE_D:
8547     return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
8548   case RISCV::PseudoQuietFLT_D:
8549     return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
8550   }
8551 }
8552 
8553 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
8554                                                         SDNode *Node) const {
8555   // Add FRM dependency to any instructions with dynamic rounding mode.
8556   unsigned Opc = MI.getOpcode();
8557   auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
8558   if (Idx < 0)
8559     return;
8560   if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
8561     return;
8562   // If the instruction already reads FRM, don't add another read.
8563   if (MI.readsRegister(RISCV::FRM))
8564     return;
8565   MI.addOperand(
8566       MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
8567 }
8568 
8569 // Calling Convention Implementation.
8570 // The expectations for frontend ABI lowering vary from target to target.
8571 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
8572 // details, but this is a longer term goal. For now, we simply try to keep the
8573 // role of the frontend as simple and well-defined as possible. The rules can
8574 // be summarised as:
8575 // * Never split up large scalar arguments. We handle them here.
8576 // * If a hardfloat calling convention is being used, and the struct may be
8577 // passed in a pair of registers (fp+fp, int+fp), and both registers are
8578 // available, then pass as two separate arguments. If either the GPRs or FPRs
8579 // are exhausted, then pass according to the rule below.
8580 // * If a struct could never be passed in registers or directly in a stack
8581 // slot (as it is larger than 2*XLEN and the floating point rules don't
8582 // apply), then pass it using a pointer with the byval attribute.
8583 // * If a struct is less than 2*XLEN, then coerce to either a two-element
8584 // word-sized array or a 2*XLEN scalar (depending on alignment).
8585 // * The frontend can determine whether a struct is returned by reference or
8586 // not based on its size and fields. If it will be returned by reference, the
8587 // frontend must modify the prototype so a pointer with the sret annotation is
8588 // passed as the first argument. This is not necessary for large scalar
8589 // returns.
8590 // * Struct return values and varargs should be coerced to structs containing
8591 // register-size fields in the same situations they would be for fixed
8592 // arguments.
8593 
8594 static const MCPhysReg ArgGPRs[] = {
8595   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
8596   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
8597 };
8598 static const MCPhysReg ArgFPR16s[] = {
8599   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
8600   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
8601 };
8602 static const MCPhysReg ArgFPR32s[] = {
8603   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
8604   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
8605 };
8606 static const MCPhysReg ArgFPR64s[] = {
8607   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
8608   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
8609 };
8610 // This is an interim calling convention and it may be changed in the future.
8611 static const MCPhysReg ArgVRs[] = {
8612     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
8613     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
8614     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
8615 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
8616                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
8617                                      RISCV::V20M2, RISCV::V22M2};
8618 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
8619                                      RISCV::V20M4};
8620 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
8621 
8622 // Pass a 2*XLEN argument that has been split into two XLEN values through
8623 // registers or the stack as necessary.
8624 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
8625                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
8626                                 MVT ValVT2, MVT LocVT2,
8627                                 ISD::ArgFlagsTy ArgFlags2) {
8628   unsigned XLenInBytes = XLen / 8;
8629   if (Register Reg = State.AllocateReg(ArgGPRs)) {
8630     // At least one half can be passed via register.
8631     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
8632                                      VA1.getLocVT(), CCValAssign::Full));
8633   } else {
8634     // Both halves must be passed on the stack, with proper alignment.
8635     Align StackAlign =
8636         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
8637     State.addLoc(
8638         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
8639                             State.AllocateStack(XLenInBytes, StackAlign),
8640                             VA1.getLocVT(), CCValAssign::Full));
8641     State.addLoc(CCValAssign::getMem(
8642         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
8643         LocVT2, CCValAssign::Full));
8644     return false;
8645   }
8646 
8647   if (Register Reg = State.AllocateReg(ArgGPRs)) {
8648     // The second half can also be passed via register.
8649     State.addLoc(
8650         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
8651   } else {
8652     // The second half is passed via the stack, without additional alignment.
8653     State.addLoc(CCValAssign::getMem(
8654         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
8655         LocVT2, CCValAssign::Full));
8656   }
8657 
8658   return false;
8659 }
8660 
8661 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
8662                                Optional<unsigned> FirstMaskArgument,
8663                                CCState &State, const RISCVTargetLowering &TLI) {
8664   const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
8665   if (RC == &RISCV::VRRegClass) {
8666     // Assign the first mask argument to V0.
8667     // This is an interim calling convention and it may be changed in the
8668     // future.
8669     if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue())
8670       return State.AllocateReg(RISCV::V0);
8671     return State.AllocateReg(ArgVRs);
8672   }
8673   if (RC == &RISCV::VRM2RegClass)
8674     return State.AllocateReg(ArgVRM2s);
8675   if (RC == &RISCV::VRM4RegClass)
8676     return State.AllocateReg(ArgVRM4s);
8677   if (RC == &RISCV::VRM8RegClass)
8678     return State.AllocateReg(ArgVRM8s);
8679   llvm_unreachable("Unhandled register class for ValueType");
8680 }
8681 
8682 // Implements the RISC-V calling convention. Returns true upon failure.
8683 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
8684                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
8685                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
8686                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
8687                      Optional<unsigned> FirstMaskArgument) {
8688   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
8689   assert(XLen == 32 || XLen == 64);
8690   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
8691 
8692   // Any return value split in to more than two values can't be returned
8693   // directly. Vectors are returned via the available vector registers.
8694   if (!LocVT.isVector() && IsRet && ValNo > 1)
8695     return true;
8696 
8697   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
8698   // variadic argument, or if no F16/F32 argument registers are available.
8699   bool UseGPRForF16_F32 = true;
8700   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
8701   // variadic argument, or if no F64 argument registers are available.
8702   bool UseGPRForF64 = true;
8703 
8704   switch (ABI) {
8705   default:
8706     llvm_unreachable("Unexpected ABI");
8707   case RISCVABI::ABI_ILP32:
8708   case RISCVABI::ABI_LP64:
8709     break;
8710   case RISCVABI::ABI_ILP32F:
8711   case RISCVABI::ABI_LP64F:
8712     UseGPRForF16_F32 = !IsFixed;
8713     break;
8714   case RISCVABI::ABI_ILP32D:
8715   case RISCVABI::ABI_LP64D:
8716     UseGPRForF16_F32 = !IsFixed;
8717     UseGPRForF64 = !IsFixed;
8718     break;
8719   }
8720 
8721   // FPR16, FPR32, and FPR64 alias each other.
8722   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
8723     UseGPRForF16_F32 = true;
8724     UseGPRForF64 = true;
8725   }
8726 
8727   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
8728   // similar local variables rather than directly checking against the target
8729   // ABI.
8730 
8731   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
8732     LocVT = XLenVT;
8733     LocInfo = CCValAssign::BCvt;
8734   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
8735     LocVT = MVT::i64;
8736     LocInfo = CCValAssign::BCvt;
8737   }
8738 
8739   // If this is a variadic argument, the RISC-V calling convention requires
8740   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
8741   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
8742   // be used regardless of whether the original argument was split during
8743   // legalisation or not. The argument will not be passed by registers if the
8744   // original type is larger than 2*XLEN, so the register alignment rule does
8745   // not apply.
8746   unsigned TwoXLenInBytes = (2 * XLen) / 8;
8747   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
8748       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
8749     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
8750     // Skip 'odd' register if necessary.
8751     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
8752       State.AllocateReg(ArgGPRs);
8753   }
8754 
8755   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8756   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8757       State.getPendingArgFlags();
8758 
8759   assert(PendingLocs.size() == PendingArgFlags.size() &&
8760          "PendingLocs and PendingArgFlags out of sync");
8761 
8762   // Handle passing f64 on RV32D with a soft float ABI or when floating point
8763   // registers are exhausted.
8764   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
8765     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
8766            "Can't lower f64 if it is split");
8767     // Depending on available argument GPRS, f64 may be passed in a pair of
8768     // GPRs, split between a GPR and the stack, or passed completely on the
8769     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8770     // cases.
8771     Register Reg = State.AllocateReg(ArgGPRs);
8772     LocVT = MVT::i32;
8773     if (!Reg) {
8774       unsigned StackOffset = State.AllocateStack(8, Align(8));
8775       State.addLoc(
8776           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8777       return false;
8778     }
8779     if (!State.AllocateReg(ArgGPRs))
8780       State.AllocateStack(4, Align(4));
8781     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8782     return false;
8783   }
8784 
8785   // Fixed-length vectors are located in the corresponding scalable-vector
8786   // container types.
8787   if (ValVT.isFixedLengthVector())
8788     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
8789 
8790   // Split arguments might be passed indirectly, so keep track of the pending
8791   // values. Split vectors are passed via a mix of registers and indirectly, so
8792   // treat them as we would any other argument.
8793   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
8794     LocVT = XLenVT;
8795     LocInfo = CCValAssign::Indirect;
8796     PendingLocs.push_back(
8797         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
8798     PendingArgFlags.push_back(ArgFlags);
8799     if (!ArgFlags.isSplitEnd()) {
8800       return false;
8801     }
8802   }
8803 
8804   // If the split argument only had two elements, it should be passed directly
8805   // in registers or on the stack.
8806   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
8807       PendingLocs.size() <= 2) {
8808     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
8809     // Apply the normal calling convention rules to the first half of the
8810     // split argument.
8811     CCValAssign VA = PendingLocs[0];
8812     ISD::ArgFlagsTy AF = PendingArgFlags[0];
8813     PendingLocs.clear();
8814     PendingArgFlags.clear();
8815     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
8816                                ArgFlags);
8817   }
8818 
8819   // Allocate to a register if possible, or else a stack slot.
8820   Register Reg;
8821   unsigned StoreSizeBytes = XLen / 8;
8822   Align StackAlign = Align(XLen / 8);
8823 
8824   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
8825     Reg = State.AllocateReg(ArgFPR16s);
8826   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
8827     Reg = State.AllocateReg(ArgFPR32s);
8828   else if (ValVT == MVT::f64 && !UseGPRForF64)
8829     Reg = State.AllocateReg(ArgFPR64s);
8830   else if (ValVT.isVector()) {
8831     Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
8832     if (!Reg) {
8833       // For return values, the vector must be passed fully via registers or
8834       // via the stack.
8835       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
8836       // but we're using all of them.
8837       if (IsRet)
8838         return true;
8839       // Try using a GPR to pass the address
8840       if ((Reg = State.AllocateReg(ArgGPRs))) {
8841         LocVT = XLenVT;
8842         LocInfo = CCValAssign::Indirect;
8843       } else if (ValVT.isScalableVector()) {
8844         LocVT = XLenVT;
8845         LocInfo = CCValAssign::Indirect;
8846       } else {
8847         // Pass fixed-length vectors on the stack.
8848         LocVT = ValVT;
8849         StoreSizeBytes = ValVT.getStoreSize();
8850         // Align vectors to their element sizes, being careful for vXi1
8851         // vectors.
8852         StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
8853       }
8854     }
8855   } else {
8856     Reg = State.AllocateReg(ArgGPRs);
8857   }
8858 
8859   unsigned StackOffset =
8860       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
8861 
8862   // If we reach this point and PendingLocs is non-empty, we must be at the
8863   // end of a split argument that must be passed indirectly.
8864   if (!PendingLocs.empty()) {
8865     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
8866     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
8867 
8868     for (auto &It : PendingLocs) {
8869       if (Reg)
8870         It.convertToReg(Reg);
8871       else
8872         It.convertToMem(StackOffset);
8873       State.addLoc(It);
8874     }
8875     PendingLocs.clear();
8876     PendingArgFlags.clear();
8877     return false;
8878   }
8879 
8880   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
8881           (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
8882          "Expected an XLenVT or vector types at this stage");
8883 
8884   if (Reg) {
8885     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8886     return false;
8887   }
8888 
8889   // When a floating-point value is passed on the stack, no bit-conversion is
8890   // needed.
8891   if (ValVT.isFloatingPoint()) {
8892     LocVT = ValVT;
8893     LocInfo = CCValAssign::Full;
8894   }
8895   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8896   return false;
8897 }
8898 
8899 template <typename ArgTy>
8900 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
8901   for (const auto &ArgIdx : enumerate(Args)) {
8902     MVT ArgVT = ArgIdx.value().VT;
8903     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
8904       return ArgIdx.index();
8905   }
8906   return None;
8907 }
8908 
8909 void RISCVTargetLowering::analyzeInputArgs(
8910     MachineFunction &MF, CCState &CCInfo,
8911     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8912     RISCVCCAssignFn Fn) const {
8913   unsigned NumArgs = Ins.size();
8914   FunctionType *FType = MF.getFunction().getFunctionType();
8915 
8916   Optional<unsigned> FirstMaskArgument;
8917   if (Subtarget.hasVInstructions())
8918     FirstMaskArgument = preAssignMask(Ins);
8919 
8920   for (unsigned i = 0; i != NumArgs; ++i) {
8921     MVT ArgVT = Ins[i].VT;
8922     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
8923 
8924     Type *ArgTy = nullptr;
8925     if (IsRet)
8926       ArgTy = FType->getReturnType();
8927     else if (Ins[i].isOrigArg())
8928       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8929 
8930     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
8931     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
8932            ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
8933            FirstMaskArgument)) {
8934       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
8935                         << EVT(ArgVT).getEVTString() << '\n');
8936       llvm_unreachable(nullptr);
8937     }
8938   }
8939 }
8940 
8941 void RISCVTargetLowering::analyzeOutputArgs(
8942     MachineFunction &MF, CCState &CCInfo,
8943     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8944     CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
8945   unsigned NumArgs = Outs.size();
8946 
8947   Optional<unsigned> FirstMaskArgument;
8948   if (Subtarget.hasVInstructions())
8949     FirstMaskArgument = preAssignMask(Outs);
8950 
8951   for (unsigned i = 0; i != NumArgs; i++) {
8952     MVT ArgVT = Outs[i].VT;
8953     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
8954     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8955 
8956     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
8957     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
8958            ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
8959            FirstMaskArgument)) {
8960       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
8961                         << EVT(ArgVT).getEVTString() << "\n");
8962       llvm_unreachable(nullptr);
8963     }
8964   }
8965 }
8966 
8967 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8968 // values.
8969 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
8970                                    const CCValAssign &VA, const SDLoc &DL,
8971                                    const RISCVSubtarget &Subtarget) {
8972   switch (VA.getLocInfo()) {
8973   default:
8974     llvm_unreachable("Unexpected CCValAssign::LocInfo");
8975   case CCValAssign::Full:
8976     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
8977       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
8978     break;
8979   case CCValAssign::BCvt:
8980     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
8981       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
8982     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8983       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
8984     else
8985       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8986     break;
8987   }
8988   return Val;
8989 }
8990 
8991 // The caller is responsible for loading the full value if the argument is
8992 // passed with CCValAssign::Indirect.
8993 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
8994                                 const CCValAssign &VA, const SDLoc &DL,
8995                                 const RISCVTargetLowering &TLI) {
8996   MachineFunction &MF = DAG.getMachineFunction();
8997   MachineRegisterInfo &RegInfo = MF.getRegInfo();
8998   EVT LocVT = VA.getLocVT();
8999   SDValue Val;
9000   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
9001   Register VReg = RegInfo.createVirtualRegister(RC);
9002   RegInfo.addLiveIn(VA.getLocReg(), VReg);
9003   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
9004 
9005   if (VA.getLocInfo() == CCValAssign::Indirect)
9006     return Val;
9007 
9008   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
9009 }
9010 
9011 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
9012                                    const CCValAssign &VA, const SDLoc &DL,
9013                                    const RISCVSubtarget &Subtarget) {
9014   EVT LocVT = VA.getLocVT();
9015 
9016   switch (VA.getLocInfo()) {
9017   default:
9018     llvm_unreachable("Unexpected CCValAssign::LocInfo");
9019   case CCValAssign::Full:
9020     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
9021       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
9022     break;
9023   case CCValAssign::BCvt:
9024     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
9025       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
9026     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9027       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
9028     else
9029       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
9030     break;
9031   }
9032   return Val;
9033 }
9034 
9035 // The caller is responsible for loading the full value if the argument is
9036 // passed with CCValAssign::Indirect.
9037 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
9038                                 const CCValAssign &VA, const SDLoc &DL) {
9039   MachineFunction &MF = DAG.getMachineFunction();
9040   MachineFrameInfo &MFI = MF.getFrameInfo();
9041   EVT LocVT = VA.getLocVT();
9042   EVT ValVT = VA.getValVT();
9043   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
9044   if (ValVT.isScalableVector()) {
9045     // When the value is a scalable vector, we save the pointer which points to
9046     // the scalable vector value in the stack. The ValVT will be the pointer
9047     // type, instead of the scalable vector type.
9048     ValVT = LocVT;
9049   }
9050   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
9051                                  /*IsImmutable=*/true);
9052   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
9053   SDValue Val;
9054 
9055   ISD::LoadExtType ExtType;
9056   switch (VA.getLocInfo()) {
9057   default:
9058     llvm_unreachable("Unexpected CCValAssign::LocInfo");
9059   case CCValAssign::Full:
9060   case CCValAssign::Indirect:
9061   case CCValAssign::BCvt:
9062     ExtType = ISD::NON_EXTLOAD;
9063     break;
9064   }
9065   Val = DAG.getExtLoad(
9066       ExtType, DL, LocVT, Chain, FIN,
9067       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
9068   return Val;
9069 }
9070 
9071 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
9072                                        const CCValAssign &VA, const SDLoc &DL) {
9073   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
9074          "Unexpected VA");
9075   MachineFunction &MF = DAG.getMachineFunction();
9076   MachineFrameInfo &MFI = MF.getFrameInfo();
9077   MachineRegisterInfo &RegInfo = MF.getRegInfo();
9078 
9079   if (VA.isMemLoc()) {
9080     // f64 is passed on the stack.
9081     int FI =
9082         MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
9083     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9084     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
9085                        MachinePointerInfo::getFixedStack(MF, FI));
9086   }
9087 
9088   assert(VA.isRegLoc() && "Expected register VA assignment");
9089 
9090   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
9091   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
9092   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
9093   SDValue Hi;
9094   if (VA.getLocReg() == RISCV::X17) {
9095     // Second half of f64 is passed on the stack.
9096     int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
9097     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9098     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
9099                      MachinePointerInfo::getFixedStack(MF, FI));
9100   } else {
9101     // Second half of f64 is passed in another GPR.
9102     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
9103     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
9104     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
9105   }
9106   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
9107 }
9108 
9109 // FastCC has less than 1% performance improvement for some particular
9110 // benchmark. But theoretically, it may has benenfit for some cases.
9111 static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
9112                             unsigned ValNo, MVT ValVT, MVT LocVT,
9113                             CCValAssign::LocInfo LocInfo,
9114                             ISD::ArgFlagsTy ArgFlags, CCState &State,
9115                             bool IsFixed, bool IsRet, Type *OrigTy,
9116                             const RISCVTargetLowering &TLI,
9117                             Optional<unsigned> FirstMaskArgument) {
9118 
9119   // X5 and X6 might be used for save-restore libcall.
9120   static const MCPhysReg GPRList[] = {
9121       RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
9122       RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
9123       RISCV::X29, RISCV::X30, RISCV::X31};
9124 
9125   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9126     if (unsigned Reg = State.AllocateReg(GPRList)) {
9127       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9128       return false;
9129     }
9130   }
9131 
9132   if (LocVT == MVT::f16) {
9133     static const MCPhysReg FPR16List[] = {
9134         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
9135         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
9136         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
9137         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
9138     if (unsigned Reg = State.AllocateReg(FPR16List)) {
9139       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9140       return false;
9141     }
9142   }
9143 
9144   if (LocVT == MVT::f32) {
9145     static const MCPhysReg FPR32List[] = {
9146         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
9147         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
9148         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
9149         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
9150     if (unsigned Reg = State.AllocateReg(FPR32List)) {
9151       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9152       return false;
9153     }
9154   }
9155 
9156   if (LocVT == MVT::f64) {
9157     static const MCPhysReg FPR64List[] = {
9158         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
9159         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
9160         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
9161         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
9162     if (unsigned Reg = State.AllocateReg(FPR64List)) {
9163       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9164       return false;
9165     }
9166   }
9167 
9168   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
9169     unsigned Offset4 = State.AllocateStack(4, Align(4));
9170     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
9171     return false;
9172   }
9173 
9174   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
9175     unsigned Offset5 = State.AllocateStack(8, Align(8));
9176     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
9177     return false;
9178   }
9179 
9180   if (LocVT.isVector()) {
9181     if (unsigned Reg =
9182             allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
9183       // Fixed-length vectors are located in the corresponding scalable-vector
9184       // container types.
9185       if (ValVT.isFixedLengthVector())
9186         LocVT = TLI.getContainerForFixedLengthVector(LocVT);
9187       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9188     } else {
9189       // Try and pass the address via a "fast" GPR.
9190       if (unsigned GPRReg = State.AllocateReg(GPRList)) {
9191         LocInfo = CCValAssign::Indirect;
9192         LocVT = TLI.getSubtarget().getXLenVT();
9193         State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
9194       } else if (ValVT.isFixedLengthVector()) {
9195         auto StackAlign =
9196             MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
9197         unsigned StackOffset =
9198             State.AllocateStack(ValVT.getStoreSize(), StackAlign);
9199         State.addLoc(
9200             CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9201       } else {
9202         // Can't pass scalable vectors on the stack.
9203         return true;
9204       }
9205     }
9206 
9207     return false;
9208   }
9209 
9210   return true; // CC didn't match.
9211 }
9212 
9213 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
9214                          CCValAssign::LocInfo LocInfo,
9215                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
9216 
9217   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9218     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
9219     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
9220     static const MCPhysReg GPRList[] = {
9221         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
9222         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
9223     if (unsigned Reg = State.AllocateReg(GPRList)) {
9224       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9225       return false;
9226     }
9227   }
9228 
9229   if (LocVT == MVT::f32) {
9230     // Pass in STG registers: F1, ..., F6
9231     //                        fs0 ... fs5
9232     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
9233                                           RISCV::F18_F, RISCV::F19_F,
9234                                           RISCV::F20_F, RISCV::F21_F};
9235     if (unsigned Reg = State.AllocateReg(FPR32List)) {
9236       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9237       return false;
9238     }
9239   }
9240 
9241   if (LocVT == MVT::f64) {
9242     // Pass in STG registers: D1, ..., D6
9243     //                        fs6 ... fs11
9244     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
9245                                           RISCV::F24_D, RISCV::F25_D,
9246                                           RISCV::F26_D, RISCV::F27_D};
9247     if (unsigned Reg = State.AllocateReg(FPR64List)) {
9248       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9249       return false;
9250     }
9251   }
9252 
9253   report_fatal_error("No registers left in GHC calling convention");
9254   return true;
9255 }
9256 
9257 // Transform physical registers into virtual registers.
9258 SDValue RISCVTargetLowering::LowerFormalArguments(
9259     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9260     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9261     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9262 
9263   MachineFunction &MF = DAG.getMachineFunction();
9264 
9265   switch (CallConv) {
9266   default:
9267     report_fatal_error("Unsupported calling convention");
9268   case CallingConv::C:
9269   case CallingConv::Fast:
9270     break;
9271   case CallingConv::GHC:
9272     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
9273         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
9274       report_fatal_error(
9275         "GHC calling convention requires the F and D instruction set extensions");
9276   }
9277 
9278   const Function &Func = MF.getFunction();
9279   if (Func.hasFnAttribute("interrupt")) {
9280     if (!Func.arg_empty())
9281       report_fatal_error(
9282         "Functions with the interrupt attribute cannot have arguments!");
9283 
9284     StringRef Kind =
9285       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
9286 
9287     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
9288       report_fatal_error(
9289         "Function interrupt attribute argument not supported!");
9290   }
9291 
9292   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9293   MVT XLenVT = Subtarget.getXLenVT();
9294   unsigned XLenInBytes = Subtarget.getXLen() / 8;
9295   // Used with vargs to acumulate store chains.
9296   std::vector<SDValue> OutChains;
9297 
9298   // Assign locations to all of the incoming arguments.
9299   SmallVector<CCValAssign, 16> ArgLocs;
9300   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9301 
9302   if (CallConv == CallingConv::GHC)
9303     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
9304   else
9305     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
9306                      CallConv == CallingConv::Fast ? CC_RISCV_FastCC
9307                                                    : CC_RISCV);
9308 
9309   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
9310     CCValAssign &VA = ArgLocs[i];
9311     SDValue ArgValue;
9312     // Passing f64 on RV32D with a soft float ABI must be handled as a special
9313     // case.
9314     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
9315       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
9316     else if (VA.isRegLoc())
9317       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
9318     else
9319       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9320 
9321     if (VA.getLocInfo() == CCValAssign::Indirect) {
9322       // If the original argument was split and passed by reference (e.g. i128
9323       // on RV32), we need to load all parts of it here (using the same
9324       // address). Vectors may be partly split to registers and partly to the
9325       // stack, in which case the base address is partly offset and subsequent
9326       // stores are relative to that.
9327       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
9328                                    MachinePointerInfo()));
9329       unsigned ArgIndex = Ins[i].OrigArgIndex;
9330       unsigned ArgPartOffset = Ins[i].PartOffset;
9331       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
9332       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
9333         CCValAssign &PartVA = ArgLocs[i + 1];
9334         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
9335         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9336         if (PartVA.getValVT().isScalableVector())
9337           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
9338         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
9339         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
9340                                      MachinePointerInfo()));
9341         ++i;
9342       }
9343       continue;
9344     }
9345     InVals.push_back(ArgValue);
9346   }
9347 
9348   if (IsVarArg) {
9349     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
9350     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
9351     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
9352     MachineFrameInfo &MFI = MF.getFrameInfo();
9353     MachineRegisterInfo &RegInfo = MF.getRegInfo();
9354     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
9355 
9356     // Offset of the first variable argument from stack pointer, and size of
9357     // the vararg save area. For now, the varargs save area is either zero or
9358     // large enough to hold a0-a7.
9359     int VaArgOffset, VarArgsSaveSize;
9360 
9361     // If all registers are allocated, then all varargs must be passed on the
9362     // stack and we don't need to save any argregs.
9363     if (ArgRegs.size() == Idx) {
9364       VaArgOffset = CCInfo.getNextStackOffset();
9365       VarArgsSaveSize = 0;
9366     } else {
9367       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
9368       VaArgOffset = -VarArgsSaveSize;
9369     }
9370 
9371     // Record the frame index of the first variable argument
9372     // which is a value necessary to VASTART.
9373     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
9374     RVFI->setVarArgsFrameIndex(FI);
9375 
9376     // If saving an odd number of registers then create an extra stack slot to
9377     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
9378     // offsets to even-numbered registered remain 2*XLEN-aligned.
9379     if (Idx % 2) {
9380       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
9381       VarArgsSaveSize += XLenInBytes;
9382     }
9383 
9384     // Copy the integer registers that may have been used for passing varargs
9385     // to the vararg save area.
9386     for (unsigned I = Idx; I < ArgRegs.size();
9387          ++I, VaArgOffset += XLenInBytes) {
9388       const Register Reg = RegInfo.createVirtualRegister(RC);
9389       RegInfo.addLiveIn(ArgRegs[I], Reg);
9390       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
9391       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
9392       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9393       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
9394                                    MachinePointerInfo::getFixedStack(MF, FI));
9395       cast<StoreSDNode>(Store.getNode())
9396           ->getMemOperand()
9397           ->setValue((Value *)nullptr);
9398       OutChains.push_back(Store);
9399     }
9400     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
9401   }
9402 
9403   // All stores are grouped in one node to allow the matching between
9404   // the size of Ins and InVals. This only happens for vararg functions.
9405   if (!OutChains.empty()) {
9406     OutChains.push_back(Chain);
9407     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
9408   }
9409 
9410   return Chain;
9411 }
9412 
9413 /// isEligibleForTailCallOptimization - Check whether the call is eligible
9414 /// for tail call optimization.
9415 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
9416 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
9417     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
9418     const SmallVector<CCValAssign, 16> &ArgLocs) const {
9419 
9420   auto &Callee = CLI.Callee;
9421   auto CalleeCC = CLI.CallConv;
9422   auto &Outs = CLI.Outs;
9423   auto &Caller = MF.getFunction();
9424   auto CallerCC = Caller.getCallingConv();
9425 
9426   // Exception-handling functions need a special set of instructions to
9427   // indicate a return to the hardware. Tail-calling another function would
9428   // probably break this.
9429   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
9430   // should be expanded as new function attributes are introduced.
9431   if (Caller.hasFnAttribute("interrupt"))
9432     return false;
9433 
9434   // Do not tail call opt if the stack is used to pass parameters.
9435   if (CCInfo.getNextStackOffset() != 0)
9436     return false;
9437 
9438   // Do not tail call opt if any parameters need to be passed indirectly.
9439   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
9440   // passed indirectly. So the address of the value will be passed in a
9441   // register, or if not available, then the address is put on the stack. In
9442   // order to pass indirectly, space on the stack often needs to be allocated
9443   // in order to store the value. In this case the CCInfo.getNextStackOffset()
9444   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
9445   // are passed CCValAssign::Indirect.
9446   for (auto &VA : ArgLocs)
9447     if (VA.getLocInfo() == CCValAssign::Indirect)
9448       return false;
9449 
9450   // Do not tail call opt if either caller or callee uses struct return
9451   // semantics.
9452   auto IsCallerStructRet = Caller.hasStructRetAttr();
9453   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
9454   if (IsCallerStructRet || IsCalleeStructRet)
9455     return false;
9456 
9457   // Externally-defined functions with weak linkage should not be
9458   // tail-called. The behaviour of branch instructions in this situation (as
9459   // used for tail calls) is implementation-defined, so we cannot rely on the
9460   // linker replacing the tail call with a return.
9461   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
9462     const GlobalValue *GV = G->getGlobal();
9463     if (GV->hasExternalWeakLinkage())
9464       return false;
9465   }
9466 
9467   // The callee has to preserve all registers the caller needs to preserve.
9468   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9469   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
9470   if (CalleeCC != CallerCC) {
9471     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
9472     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9473       return false;
9474   }
9475 
9476   // Byval parameters hand the function a pointer directly into the stack area
9477   // we want to reuse during a tail call. Working around this *is* possible
9478   // but less efficient and uglier in LowerCall.
9479   for (auto &Arg : Outs)
9480     if (Arg.Flags.isByVal())
9481       return false;
9482 
9483   return true;
9484 }
9485 
9486 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
9487   return DAG.getDataLayout().getPrefTypeAlign(
9488       VT.getTypeForEVT(*DAG.getContext()));
9489 }
9490 
9491 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
9492 // and output parameter nodes.
9493 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
9494                                        SmallVectorImpl<SDValue> &InVals) const {
9495   SelectionDAG &DAG = CLI.DAG;
9496   SDLoc &DL = CLI.DL;
9497   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
9498   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
9499   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
9500   SDValue Chain = CLI.Chain;
9501   SDValue Callee = CLI.Callee;
9502   bool &IsTailCall = CLI.IsTailCall;
9503   CallingConv::ID CallConv = CLI.CallConv;
9504   bool IsVarArg = CLI.IsVarArg;
9505   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9506   MVT XLenVT = Subtarget.getXLenVT();
9507 
9508   MachineFunction &MF = DAG.getMachineFunction();
9509 
9510   // Analyze the operands of the call, assigning locations to each operand.
9511   SmallVector<CCValAssign, 16> ArgLocs;
9512   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9513 
9514   if (CallConv == CallingConv::GHC)
9515     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
9516   else
9517     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
9518                       CallConv == CallingConv::Fast ? CC_RISCV_FastCC
9519                                                     : CC_RISCV);
9520 
9521   // Check if it's really possible to do a tail call.
9522   if (IsTailCall)
9523     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
9524 
9525   if (IsTailCall)
9526     ++NumTailCalls;
9527   else if (CLI.CB && CLI.CB->isMustTailCall())
9528     report_fatal_error("failed to perform tail call elimination on a call "
9529                        "site marked musttail");
9530 
9531   // Get a count of how many bytes are to be pushed on the stack.
9532   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
9533 
9534   // Create local copies for byval args
9535   SmallVector<SDValue, 8> ByValArgs;
9536   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9537     ISD::ArgFlagsTy Flags = Outs[i].Flags;
9538     if (!Flags.isByVal())
9539       continue;
9540 
9541     SDValue Arg = OutVals[i];
9542     unsigned Size = Flags.getByValSize();
9543     Align Alignment = Flags.getNonZeroByValAlign();
9544 
9545     int FI =
9546         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
9547     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9548     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
9549 
9550     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
9551                           /*IsVolatile=*/false,
9552                           /*AlwaysInline=*/false, IsTailCall,
9553                           MachinePointerInfo(), MachinePointerInfo());
9554     ByValArgs.push_back(FIPtr);
9555   }
9556 
9557   if (!IsTailCall)
9558     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
9559 
9560   // Copy argument values to their designated locations.
9561   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
9562   SmallVector<SDValue, 8> MemOpChains;
9563   SDValue StackPtr;
9564   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
9565     CCValAssign &VA = ArgLocs[i];
9566     SDValue ArgValue = OutVals[i];
9567     ISD::ArgFlagsTy Flags = Outs[i].Flags;
9568 
9569     // Handle passing f64 on RV32D with a soft float ABI as a special case.
9570     bool IsF64OnRV32DSoftABI =
9571         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
9572     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
9573       SDValue SplitF64 = DAG.getNode(
9574           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
9575       SDValue Lo = SplitF64.getValue(0);
9576       SDValue Hi = SplitF64.getValue(1);
9577 
9578       Register RegLo = VA.getLocReg();
9579       RegsToPass.push_back(std::make_pair(RegLo, Lo));
9580 
9581       if (RegLo == RISCV::X17) {
9582         // Second half of f64 is passed on the stack.
9583         // Work out the address of the stack slot.
9584         if (!StackPtr.getNode())
9585           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
9586         // Emit the store.
9587         MemOpChains.push_back(
9588             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
9589       } else {
9590         // Second half of f64 is passed in another GPR.
9591         assert(RegLo < RISCV::X31 && "Invalid register pair");
9592         Register RegHigh = RegLo + 1;
9593         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
9594       }
9595       continue;
9596     }
9597 
9598     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
9599     // as any other MemLoc.
9600 
9601     // Promote the value if needed.
9602     // For now, only handle fully promoted and indirect arguments.
9603     if (VA.getLocInfo() == CCValAssign::Indirect) {
9604       // Store the argument in a stack slot and pass its address.
9605       Align StackAlign =
9606           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
9607                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
9608       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
9609       // If the original argument was split (e.g. i128), we need
9610       // to store the required parts of it here (and pass just one address).
9611       // Vectors may be partly split to registers and partly to the stack, in
9612       // which case the base address is partly offset and subsequent stores are
9613       // relative to that.
9614       unsigned ArgIndex = Outs[i].OrigArgIndex;
9615       unsigned ArgPartOffset = Outs[i].PartOffset;
9616       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
9617       // Calculate the total size to store. We don't have access to what we're
9618       // actually storing other than performing the loop and collecting the
9619       // info.
9620       SmallVector<std::pair<SDValue, SDValue>> Parts;
9621       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
9622         SDValue PartValue = OutVals[i + 1];
9623         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
9624         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9625         EVT PartVT = PartValue.getValueType();
9626         if (PartVT.isScalableVector())
9627           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
9628         StoredSize += PartVT.getStoreSize();
9629         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
9630         Parts.push_back(std::make_pair(PartValue, Offset));
9631         ++i;
9632       }
9633       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
9634       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
9635       MemOpChains.push_back(
9636           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
9637                        MachinePointerInfo::getFixedStack(MF, FI)));
9638       for (const auto &Part : Parts) {
9639         SDValue PartValue = Part.first;
9640         SDValue PartOffset = Part.second;
9641         SDValue Address =
9642             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
9643         MemOpChains.push_back(
9644             DAG.getStore(Chain, DL, PartValue, Address,
9645                          MachinePointerInfo::getFixedStack(MF, FI)));
9646       }
9647       ArgValue = SpillSlot;
9648     } else {
9649       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
9650     }
9651 
9652     // Use local copy if it is a byval arg.
9653     if (Flags.isByVal())
9654       ArgValue = ByValArgs[j++];
9655 
9656     if (VA.isRegLoc()) {
9657       // Queue up the argument copies and emit them at the end.
9658       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
9659     } else {
9660       assert(VA.isMemLoc() && "Argument not register or memory");
9661       assert(!IsTailCall && "Tail call not allowed if stack is used "
9662                             "for passing parameters");
9663 
9664       // Work out the address of the stack slot.
9665       if (!StackPtr.getNode())
9666         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
9667       SDValue Address =
9668           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
9669                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
9670 
9671       // Emit the store.
9672       MemOpChains.push_back(
9673           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
9674     }
9675   }
9676 
9677   // Join the stores, which are independent of one another.
9678   if (!MemOpChains.empty())
9679     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
9680 
9681   SDValue Glue;
9682 
9683   // Build a sequence of copy-to-reg nodes, chained and glued together.
9684   for (auto &Reg : RegsToPass) {
9685     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
9686     Glue = Chain.getValue(1);
9687   }
9688 
9689   // Validate that none of the argument registers have been marked as
9690   // reserved, if so report an error. Do the same for the return address if this
9691   // is not a tailcall.
9692   validateCCReservedRegs(RegsToPass, MF);
9693   if (!IsTailCall &&
9694       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
9695     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
9696         MF.getFunction(),
9697         "Return address register required, but has been reserved."});
9698 
9699   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
9700   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
9701   // split it and then direct call can be matched by PseudoCALL.
9702   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
9703     const GlobalValue *GV = S->getGlobal();
9704 
9705     unsigned OpFlags = RISCVII::MO_CALL;
9706     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
9707       OpFlags = RISCVII::MO_PLT;
9708 
9709     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
9710   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
9711     unsigned OpFlags = RISCVII::MO_CALL;
9712 
9713     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
9714                                                  nullptr))
9715       OpFlags = RISCVII::MO_PLT;
9716 
9717     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
9718   }
9719 
9720   // The first call operand is the chain and the second is the target address.
9721   SmallVector<SDValue, 8> Ops;
9722   Ops.push_back(Chain);
9723   Ops.push_back(Callee);
9724 
9725   // Add argument registers to the end of the list so that they are
9726   // known live into the call.
9727   for (auto &Reg : RegsToPass)
9728     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
9729 
9730   if (!IsTailCall) {
9731     // Add a register mask operand representing the call-preserved registers.
9732     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
9733     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
9734     assert(Mask && "Missing call preserved mask for calling convention");
9735     Ops.push_back(DAG.getRegisterMask(Mask));
9736   }
9737 
9738   // Glue the call to the argument copies, if any.
9739   if (Glue.getNode())
9740     Ops.push_back(Glue);
9741 
9742   // Emit the call.
9743   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
9744 
9745   if (IsTailCall) {
9746     MF.getFrameInfo().setHasTailCall();
9747     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
9748   }
9749 
9750   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
9751   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
9752   Glue = Chain.getValue(1);
9753 
9754   // Mark the end of the call, which is glued to the call itself.
9755   Chain = DAG.getCALLSEQ_END(Chain,
9756                              DAG.getConstant(NumBytes, DL, PtrVT, true),
9757                              DAG.getConstant(0, DL, PtrVT, true),
9758                              Glue, DL);
9759   Glue = Chain.getValue(1);
9760 
9761   // Assign locations to each value returned by this call.
9762   SmallVector<CCValAssign, 16> RVLocs;
9763   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
9764   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
9765 
9766   // Copy all of the result registers out of their specified physreg.
9767   for (auto &VA : RVLocs) {
9768     // Copy the value out
9769     SDValue RetValue =
9770         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
9771     // Glue the RetValue to the end of the call sequence
9772     Chain = RetValue.getValue(1);
9773     Glue = RetValue.getValue(2);
9774 
9775     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9776       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
9777       SDValue RetValue2 =
9778           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
9779       Chain = RetValue2.getValue(1);
9780       Glue = RetValue2.getValue(2);
9781       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
9782                              RetValue2);
9783     }
9784 
9785     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
9786 
9787     InVals.push_back(RetValue);
9788   }
9789 
9790   return Chain;
9791 }
9792 
9793 bool RISCVTargetLowering::CanLowerReturn(
9794     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
9795     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
9796   SmallVector<CCValAssign, 16> RVLocs;
9797   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
9798 
9799   Optional<unsigned> FirstMaskArgument;
9800   if (Subtarget.hasVInstructions())
9801     FirstMaskArgument = preAssignMask(Outs);
9802 
9803   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9804     MVT VT = Outs[i].VT;
9805     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
9806     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
9807     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
9808                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
9809                  *this, FirstMaskArgument))
9810       return false;
9811   }
9812   return true;
9813 }
9814 
9815 SDValue
9816 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
9817                                  bool IsVarArg,
9818                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
9819                                  const SmallVectorImpl<SDValue> &OutVals,
9820                                  const SDLoc &DL, SelectionDAG &DAG) const {
9821   const MachineFunction &MF = DAG.getMachineFunction();
9822   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
9823 
9824   // Stores the assignment of the return value to a location.
9825   SmallVector<CCValAssign, 16> RVLocs;
9826 
9827   // Info about the registers and stack slot.
9828   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
9829                  *DAG.getContext());
9830 
9831   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
9832                     nullptr, CC_RISCV);
9833 
9834   if (CallConv == CallingConv::GHC && !RVLocs.empty())
9835     report_fatal_error("GHC functions return void only");
9836 
9837   SDValue Glue;
9838   SmallVector<SDValue, 4> RetOps(1, Chain);
9839 
9840   // Copy the result values into the output registers.
9841   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
9842     SDValue Val = OutVals[i];
9843     CCValAssign &VA = RVLocs[i];
9844     assert(VA.isRegLoc() && "Can only return in registers!");
9845 
9846     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9847       // Handle returning f64 on RV32D with a soft float ABI.
9848       assert(VA.isRegLoc() && "Expected return via registers");
9849       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
9850                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
9851       SDValue Lo = SplitF64.getValue(0);
9852       SDValue Hi = SplitF64.getValue(1);
9853       Register RegLo = VA.getLocReg();
9854       assert(RegLo < RISCV::X31 && "Invalid register pair");
9855       Register RegHi = RegLo + 1;
9856 
9857       if (STI.isRegisterReservedByUser(RegLo) ||
9858           STI.isRegisterReservedByUser(RegHi))
9859         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
9860             MF.getFunction(),
9861             "Return value register required, but has been reserved."});
9862 
9863       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
9864       Glue = Chain.getValue(1);
9865       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
9866       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
9867       Glue = Chain.getValue(1);
9868       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
9869     } else {
9870       // Handle a 'normal' return.
9871       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
9872       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
9873 
9874       if (STI.isRegisterReservedByUser(VA.getLocReg()))
9875         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
9876             MF.getFunction(),
9877             "Return value register required, but has been reserved."});
9878 
9879       // Guarantee that all emitted copies are stuck together.
9880       Glue = Chain.getValue(1);
9881       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
9882     }
9883   }
9884 
9885   RetOps[0] = Chain; // Update chain.
9886 
9887   // Add the glue node if we have it.
9888   if (Glue.getNode()) {
9889     RetOps.push_back(Glue);
9890   }
9891 
9892   unsigned RetOpc = RISCVISD::RET_FLAG;
9893   // Interrupt service routines use different return instructions.
9894   const Function &Func = DAG.getMachineFunction().getFunction();
9895   if (Func.hasFnAttribute("interrupt")) {
9896     if (!Func.getReturnType()->isVoidTy())
9897       report_fatal_error(
9898           "Functions with the interrupt attribute must have void return type!");
9899 
9900     MachineFunction &MF = DAG.getMachineFunction();
9901     StringRef Kind =
9902       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
9903 
9904     if (Kind == "user")
9905       RetOpc = RISCVISD::URET_FLAG;
9906     else if (Kind == "supervisor")
9907       RetOpc = RISCVISD::SRET_FLAG;
9908     else
9909       RetOpc = RISCVISD::MRET_FLAG;
9910   }
9911 
9912   return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
9913 }
9914 
9915 void RISCVTargetLowering::validateCCReservedRegs(
9916     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
9917     MachineFunction &MF) const {
9918   const Function &F = MF.getFunction();
9919   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
9920 
9921   if (llvm::any_of(Regs, [&STI](auto Reg) {
9922         return STI.isRegisterReservedByUser(Reg.first);
9923       }))
9924     F.getContext().diagnose(DiagnosticInfoUnsupported{
9925         F, "Argument register required, but has been reserved."});
9926 }
9927 
9928 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
9929   return CI->isTailCall();
9930 }
9931 
9932 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
9933 #define NODE_NAME_CASE(NODE)                                                   \
9934   case RISCVISD::NODE:                                                         \
9935     return "RISCVISD::" #NODE;
9936   // clang-format off
9937   switch ((RISCVISD::NodeType)Opcode) {
9938   case RISCVISD::FIRST_NUMBER:
9939     break;
9940   NODE_NAME_CASE(RET_FLAG)
9941   NODE_NAME_CASE(URET_FLAG)
9942   NODE_NAME_CASE(SRET_FLAG)
9943   NODE_NAME_CASE(MRET_FLAG)
9944   NODE_NAME_CASE(CALL)
9945   NODE_NAME_CASE(SELECT_CC)
9946   NODE_NAME_CASE(BR_CC)
9947   NODE_NAME_CASE(BuildPairF64)
9948   NODE_NAME_CASE(SplitF64)
9949   NODE_NAME_CASE(TAIL)
9950   NODE_NAME_CASE(MULHSU)
9951   NODE_NAME_CASE(SLLW)
9952   NODE_NAME_CASE(SRAW)
9953   NODE_NAME_CASE(SRLW)
9954   NODE_NAME_CASE(DIVW)
9955   NODE_NAME_CASE(DIVUW)
9956   NODE_NAME_CASE(REMUW)
9957   NODE_NAME_CASE(ROLW)
9958   NODE_NAME_CASE(RORW)
9959   NODE_NAME_CASE(CLZW)
9960   NODE_NAME_CASE(CTZW)
9961   NODE_NAME_CASE(FSLW)
9962   NODE_NAME_CASE(FSRW)
9963   NODE_NAME_CASE(FSL)
9964   NODE_NAME_CASE(FSR)
9965   NODE_NAME_CASE(FMV_H_X)
9966   NODE_NAME_CASE(FMV_X_ANYEXTH)
9967   NODE_NAME_CASE(FMV_W_X_RV64)
9968   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
9969   NODE_NAME_CASE(FCVT_X)
9970   NODE_NAME_CASE(FCVT_XU)
9971   NODE_NAME_CASE(FCVT_W_RV64)
9972   NODE_NAME_CASE(FCVT_WU_RV64)
9973   NODE_NAME_CASE(STRICT_FCVT_W_RV64)
9974   NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
9975   NODE_NAME_CASE(READ_CYCLE_WIDE)
9976   NODE_NAME_CASE(GREV)
9977   NODE_NAME_CASE(GREVW)
9978   NODE_NAME_CASE(GORC)
9979   NODE_NAME_CASE(GORCW)
9980   NODE_NAME_CASE(SHFL)
9981   NODE_NAME_CASE(SHFLW)
9982   NODE_NAME_CASE(UNSHFL)
9983   NODE_NAME_CASE(UNSHFLW)
9984   NODE_NAME_CASE(BFP)
9985   NODE_NAME_CASE(BFPW)
9986   NODE_NAME_CASE(BCOMPRESS)
9987   NODE_NAME_CASE(BCOMPRESSW)
9988   NODE_NAME_CASE(BDECOMPRESS)
9989   NODE_NAME_CASE(BDECOMPRESSW)
9990   NODE_NAME_CASE(VMV_V_X_VL)
9991   NODE_NAME_CASE(VFMV_V_F_VL)
9992   NODE_NAME_CASE(VMV_X_S)
9993   NODE_NAME_CASE(VMV_S_X_VL)
9994   NODE_NAME_CASE(VFMV_S_F_VL)
9995   NODE_NAME_CASE(SPLAT_VECTOR_I64)
9996   NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
9997   NODE_NAME_CASE(READ_VLENB)
9998   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
9999   NODE_NAME_CASE(VSLIDEUP_VL)
10000   NODE_NAME_CASE(VSLIDE1UP_VL)
10001   NODE_NAME_CASE(VSLIDEDOWN_VL)
10002   NODE_NAME_CASE(VSLIDE1DOWN_VL)
10003   NODE_NAME_CASE(VID_VL)
10004   NODE_NAME_CASE(VFNCVT_ROD_VL)
10005   NODE_NAME_CASE(VECREDUCE_ADD_VL)
10006   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
10007   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
10008   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
10009   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
10010   NODE_NAME_CASE(VECREDUCE_AND_VL)
10011   NODE_NAME_CASE(VECREDUCE_OR_VL)
10012   NODE_NAME_CASE(VECREDUCE_XOR_VL)
10013   NODE_NAME_CASE(VECREDUCE_FADD_VL)
10014   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
10015   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
10016   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
10017   NODE_NAME_CASE(ADD_VL)
10018   NODE_NAME_CASE(AND_VL)
10019   NODE_NAME_CASE(MUL_VL)
10020   NODE_NAME_CASE(OR_VL)
10021   NODE_NAME_CASE(SDIV_VL)
10022   NODE_NAME_CASE(SHL_VL)
10023   NODE_NAME_CASE(SREM_VL)
10024   NODE_NAME_CASE(SRA_VL)
10025   NODE_NAME_CASE(SRL_VL)
10026   NODE_NAME_CASE(SUB_VL)
10027   NODE_NAME_CASE(UDIV_VL)
10028   NODE_NAME_CASE(UREM_VL)
10029   NODE_NAME_CASE(XOR_VL)
10030   NODE_NAME_CASE(SADDSAT_VL)
10031   NODE_NAME_CASE(UADDSAT_VL)
10032   NODE_NAME_CASE(SSUBSAT_VL)
10033   NODE_NAME_CASE(USUBSAT_VL)
10034   NODE_NAME_CASE(FADD_VL)
10035   NODE_NAME_CASE(FSUB_VL)
10036   NODE_NAME_CASE(FMUL_VL)
10037   NODE_NAME_CASE(FDIV_VL)
10038   NODE_NAME_CASE(FNEG_VL)
10039   NODE_NAME_CASE(FABS_VL)
10040   NODE_NAME_CASE(FSQRT_VL)
10041   NODE_NAME_CASE(FMA_VL)
10042   NODE_NAME_CASE(FCOPYSIGN_VL)
10043   NODE_NAME_CASE(SMIN_VL)
10044   NODE_NAME_CASE(SMAX_VL)
10045   NODE_NAME_CASE(UMIN_VL)
10046   NODE_NAME_CASE(UMAX_VL)
10047   NODE_NAME_CASE(FMINNUM_VL)
10048   NODE_NAME_CASE(FMAXNUM_VL)
10049   NODE_NAME_CASE(MULHS_VL)
10050   NODE_NAME_CASE(MULHU_VL)
10051   NODE_NAME_CASE(FP_TO_SINT_VL)
10052   NODE_NAME_CASE(FP_TO_UINT_VL)
10053   NODE_NAME_CASE(SINT_TO_FP_VL)
10054   NODE_NAME_CASE(UINT_TO_FP_VL)
10055   NODE_NAME_CASE(FP_EXTEND_VL)
10056   NODE_NAME_CASE(FP_ROUND_VL)
10057   NODE_NAME_CASE(VWMUL_VL)
10058   NODE_NAME_CASE(VWMULU_VL)
10059   NODE_NAME_CASE(VWADDU_VL)
10060   NODE_NAME_CASE(SETCC_VL)
10061   NODE_NAME_CASE(VSELECT_VL)
10062   NODE_NAME_CASE(VMAND_VL)
10063   NODE_NAME_CASE(VMOR_VL)
10064   NODE_NAME_CASE(VMXOR_VL)
10065   NODE_NAME_CASE(VMCLR_VL)
10066   NODE_NAME_CASE(VMSET_VL)
10067   NODE_NAME_CASE(VRGATHER_VX_VL)
10068   NODE_NAME_CASE(VRGATHER_VV_VL)
10069   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
10070   NODE_NAME_CASE(VSEXT_VL)
10071   NODE_NAME_CASE(VZEXT_VL)
10072   NODE_NAME_CASE(VCPOP_VL)
10073   NODE_NAME_CASE(VLE_VL)
10074   NODE_NAME_CASE(VSE_VL)
10075   NODE_NAME_CASE(READ_CSR)
10076   NODE_NAME_CASE(WRITE_CSR)
10077   NODE_NAME_CASE(SWAP_CSR)
10078   }
10079   // clang-format on
10080   return nullptr;
10081 #undef NODE_NAME_CASE
10082 }
10083 
10084 /// getConstraintType - Given a constraint letter, return the type of
10085 /// constraint it is for this target.
10086 RISCVTargetLowering::ConstraintType
10087 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
10088   if (Constraint.size() == 1) {
10089     switch (Constraint[0]) {
10090     default:
10091       break;
10092     case 'f':
10093       return C_RegisterClass;
10094     case 'I':
10095     case 'J':
10096     case 'K':
10097       return C_Immediate;
10098     case 'A':
10099       return C_Memory;
10100     case 'S': // A symbolic address
10101       return C_Other;
10102     }
10103   } else {
10104     if (Constraint == "vr" || Constraint == "vm")
10105       return C_RegisterClass;
10106   }
10107   return TargetLowering::getConstraintType(Constraint);
10108 }
10109 
10110 std::pair<unsigned, const TargetRegisterClass *>
10111 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
10112                                                   StringRef Constraint,
10113                                                   MVT VT) const {
10114   // First, see if this is a constraint that directly corresponds to a
10115   // RISCV register class.
10116   if (Constraint.size() == 1) {
10117     switch (Constraint[0]) {
10118     case 'r':
10119       // TODO: Support fixed vectors up to XLen for P extension?
10120       if (VT.isVector())
10121         break;
10122       return std::make_pair(0U, &RISCV::GPRRegClass);
10123     case 'f':
10124       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
10125         return std::make_pair(0U, &RISCV::FPR16RegClass);
10126       if (Subtarget.hasStdExtF() && VT == MVT::f32)
10127         return std::make_pair(0U, &RISCV::FPR32RegClass);
10128       if (Subtarget.hasStdExtD() && VT == MVT::f64)
10129         return std::make_pair(0U, &RISCV::FPR64RegClass);
10130       break;
10131     default:
10132       break;
10133     }
10134   } else if (Constraint == "vr") {
10135     for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
10136                            &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
10137       if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
10138         return std::make_pair(0U, RC);
10139     }
10140   } else if (Constraint == "vm") {
10141     if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
10142       return std::make_pair(0U, &RISCV::VMV0RegClass);
10143   }
10144 
10145   // Clang will correctly decode the usage of register name aliases into their
10146   // official names. However, other frontends like `rustc` do not. This allows
10147   // users of these frontends to use the ABI names for registers in LLVM-style
10148   // register constraints.
10149   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
10150                                .Case("{zero}", RISCV::X0)
10151                                .Case("{ra}", RISCV::X1)
10152                                .Case("{sp}", RISCV::X2)
10153                                .Case("{gp}", RISCV::X3)
10154                                .Case("{tp}", RISCV::X4)
10155                                .Case("{t0}", RISCV::X5)
10156                                .Case("{t1}", RISCV::X6)
10157                                .Case("{t2}", RISCV::X7)
10158                                .Cases("{s0}", "{fp}", RISCV::X8)
10159                                .Case("{s1}", RISCV::X9)
10160                                .Case("{a0}", RISCV::X10)
10161                                .Case("{a1}", RISCV::X11)
10162                                .Case("{a2}", RISCV::X12)
10163                                .Case("{a3}", RISCV::X13)
10164                                .Case("{a4}", RISCV::X14)
10165                                .Case("{a5}", RISCV::X15)
10166                                .Case("{a6}", RISCV::X16)
10167                                .Case("{a7}", RISCV::X17)
10168                                .Case("{s2}", RISCV::X18)
10169                                .Case("{s3}", RISCV::X19)
10170                                .Case("{s4}", RISCV::X20)
10171                                .Case("{s5}", RISCV::X21)
10172                                .Case("{s6}", RISCV::X22)
10173                                .Case("{s7}", RISCV::X23)
10174                                .Case("{s8}", RISCV::X24)
10175                                .Case("{s9}", RISCV::X25)
10176                                .Case("{s10}", RISCV::X26)
10177                                .Case("{s11}", RISCV::X27)
10178                                .Case("{t3}", RISCV::X28)
10179                                .Case("{t4}", RISCV::X29)
10180                                .Case("{t5}", RISCV::X30)
10181                                .Case("{t6}", RISCV::X31)
10182                                .Default(RISCV::NoRegister);
10183   if (XRegFromAlias != RISCV::NoRegister)
10184     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
10185 
10186   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
10187   // TableGen record rather than the AsmName to choose registers for InlineAsm
10188   // constraints, plus we want to match those names to the widest floating point
10189   // register type available, manually select floating point registers here.
10190   //
10191   // The second case is the ABI name of the register, so that frontends can also
10192   // use the ABI names in register constraint lists.
10193   if (Subtarget.hasStdExtF()) {
10194     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
10195                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
10196                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
10197                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
10198                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
10199                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
10200                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
10201                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
10202                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
10203                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
10204                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
10205                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
10206                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
10207                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
10208                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
10209                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
10210                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
10211                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
10212                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
10213                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
10214                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
10215                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
10216                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
10217                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
10218                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
10219                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
10220                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
10221                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
10222                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
10223                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
10224                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
10225                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
10226                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
10227                         .Default(RISCV::NoRegister);
10228     if (FReg != RISCV::NoRegister) {
10229       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
10230       if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
10231         unsigned RegNo = FReg - RISCV::F0_F;
10232         unsigned DReg = RISCV::F0_D + RegNo;
10233         return std::make_pair(DReg, &RISCV::FPR64RegClass);
10234       }
10235       if (VT == MVT::f32 || VT == MVT::Other)
10236         return std::make_pair(FReg, &RISCV::FPR32RegClass);
10237       if (Subtarget.hasStdExtZfh() && VT == MVT::f16) {
10238         unsigned RegNo = FReg - RISCV::F0_F;
10239         unsigned HReg = RISCV::F0_H + RegNo;
10240         return std::make_pair(HReg, &RISCV::FPR16RegClass);
10241       }
10242     }
10243   }
10244 
10245   if (Subtarget.hasVInstructions()) {
10246     Register VReg = StringSwitch<Register>(Constraint.lower())
10247                         .Case("{v0}", RISCV::V0)
10248                         .Case("{v1}", RISCV::V1)
10249                         .Case("{v2}", RISCV::V2)
10250                         .Case("{v3}", RISCV::V3)
10251                         .Case("{v4}", RISCV::V4)
10252                         .Case("{v5}", RISCV::V5)
10253                         .Case("{v6}", RISCV::V6)
10254                         .Case("{v7}", RISCV::V7)
10255                         .Case("{v8}", RISCV::V8)
10256                         .Case("{v9}", RISCV::V9)
10257                         .Case("{v10}", RISCV::V10)
10258                         .Case("{v11}", RISCV::V11)
10259                         .Case("{v12}", RISCV::V12)
10260                         .Case("{v13}", RISCV::V13)
10261                         .Case("{v14}", RISCV::V14)
10262                         .Case("{v15}", RISCV::V15)
10263                         .Case("{v16}", RISCV::V16)
10264                         .Case("{v17}", RISCV::V17)
10265                         .Case("{v18}", RISCV::V18)
10266                         .Case("{v19}", RISCV::V19)
10267                         .Case("{v20}", RISCV::V20)
10268                         .Case("{v21}", RISCV::V21)
10269                         .Case("{v22}", RISCV::V22)
10270                         .Case("{v23}", RISCV::V23)
10271                         .Case("{v24}", RISCV::V24)
10272                         .Case("{v25}", RISCV::V25)
10273                         .Case("{v26}", RISCV::V26)
10274                         .Case("{v27}", RISCV::V27)
10275                         .Case("{v28}", RISCV::V28)
10276                         .Case("{v29}", RISCV::V29)
10277                         .Case("{v30}", RISCV::V30)
10278                         .Case("{v31}", RISCV::V31)
10279                         .Default(RISCV::NoRegister);
10280     if (VReg != RISCV::NoRegister) {
10281       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
10282         return std::make_pair(VReg, &RISCV::VMRegClass);
10283       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
10284         return std::make_pair(VReg, &RISCV::VRRegClass);
10285       for (const auto *RC :
10286            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
10287         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
10288           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
10289           return std::make_pair(VReg, RC);
10290         }
10291       }
10292     }
10293   }
10294 
10295   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10296 }
10297 
10298 unsigned
10299 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
10300   // Currently only support length 1 constraints.
10301   if (ConstraintCode.size() == 1) {
10302     switch (ConstraintCode[0]) {
10303     case 'A':
10304       return InlineAsm::Constraint_A;
10305     default:
10306       break;
10307     }
10308   }
10309 
10310   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
10311 }
10312 
10313 void RISCVTargetLowering::LowerAsmOperandForConstraint(
10314     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
10315     SelectionDAG &DAG) const {
10316   // Currently only support length 1 constraints.
10317   if (Constraint.length() == 1) {
10318     switch (Constraint[0]) {
10319     case 'I':
10320       // Validate & create a 12-bit signed immediate operand.
10321       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10322         uint64_t CVal = C->getSExtValue();
10323         if (isInt<12>(CVal))
10324           Ops.push_back(
10325               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
10326       }
10327       return;
10328     case 'J':
10329       // Validate & create an integer zero operand.
10330       if (auto *C = dyn_cast<ConstantSDNode>(Op))
10331         if (C->getZExtValue() == 0)
10332           Ops.push_back(
10333               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
10334       return;
10335     case 'K':
10336       // Validate & create a 5-bit unsigned immediate operand.
10337       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10338         uint64_t CVal = C->getZExtValue();
10339         if (isUInt<5>(CVal))
10340           Ops.push_back(
10341               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
10342       }
10343       return;
10344     case 'S':
10345       if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
10346         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
10347                                                  GA->getValueType(0)));
10348       } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
10349         Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
10350                                                 BA->getValueType(0)));
10351       }
10352       return;
10353     default:
10354       break;
10355     }
10356   }
10357   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
10358 }
10359 
10360 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
10361                                                    Instruction *Inst,
10362                                                    AtomicOrdering Ord) const {
10363   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
10364     return Builder.CreateFence(Ord);
10365   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
10366     return Builder.CreateFence(AtomicOrdering::Release);
10367   return nullptr;
10368 }
10369 
10370 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
10371                                                     Instruction *Inst,
10372                                                     AtomicOrdering Ord) const {
10373   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
10374     return Builder.CreateFence(AtomicOrdering::Acquire);
10375   return nullptr;
10376 }
10377 
10378 TargetLowering::AtomicExpansionKind
10379 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
10380   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
10381   // point operations can't be used in an lr/sc sequence without breaking the
10382   // forward-progress guarantee.
10383   if (AI->isFloatingPointOperation())
10384     return AtomicExpansionKind::CmpXChg;
10385 
10386   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10387   if (Size == 8 || Size == 16)
10388     return AtomicExpansionKind::MaskedIntrinsic;
10389   return AtomicExpansionKind::None;
10390 }
10391 
10392 static Intrinsic::ID
10393 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
10394   if (XLen == 32) {
10395     switch (BinOp) {
10396     default:
10397       llvm_unreachable("Unexpected AtomicRMW BinOp");
10398     case AtomicRMWInst::Xchg:
10399       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
10400     case AtomicRMWInst::Add:
10401       return Intrinsic::riscv_masked_atomicrmw_add_i32;
10402     case AtomicRMWInst::Sub:
10403       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
10404     case AtomicRMWInst::Nand:
10405       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
10406     case AtomicRMWInst::Max:
10407       return Intrinsic::riscv_masked_atomicrmw_max_i32;
10408     case AtomicRMWInst::Min:
10409       return Intrinsic::riscv_masked_atomicrmw_min_i32;
10410     case AtomicRMWInst::UMax:
10411       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
10412     case AtomicRMWInst::UMin:
10413       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
10414     }
10415   }
10416 
10417   if (XLen == 64) {
10418     switch (BinOp) {
10419     default:
10420       llvm_unreachable("Unexpected AtomicRMW BinOp");
10421     case AtomicRMWInst::Xchg:
10422       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
10423     case AtomicRMWInst::Add:
10424       return Intrinsic::riscv_masked_atomicrmw_add_i64;
10425     case AtomicRMWInst::Sub:
10426       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
10427     case AtomicRMWInst::Nand:
10428       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
10429     case AtomicRMWInst::Max:
10430       return Intrinsic::riscv_masked_atomicrmw_max_i64;
10431     case AtomicRMWInst::Min:
10432       return Intrinsic::riscv_masked_atomicrmw_min_i64;
10433     case AtomicRMWInst::UMax:
10434       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
10435     case AtomicRMWInst::UMin:
10436       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
10437     }
10438   }
10439 
10440   llvm_unreachable("Unexpected XLen\n");
10441 }
10442 
10443 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
10444     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10445     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10446   unsigned XLen = Subtarget.getXLen();
10447   Value *Ordering =
10448       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
10449   Type *Tys[] = {AlignedAddr->getType()};
10450   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
10451       AI->getModule(),
10452       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
10453 
10454   if (XLen == 64) {
10455     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
10456     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10457     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
10458   }
10459 
10460   Value *Result;
10461 
10462   // Must pass the shift amount needed to sign extend the loaded value prior
10463   // to performing a signed comparison for min/max. ShiftAmt is the number of
10464   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
10465   // is the number of bits to left+right shift the value in order to
10466   // sign-extend.
10467   if (AI->getOperation() == AtomicRMWInst::Min ||
10468       AI->getOperation() == AtomicRMWInst::Max) {
10469     const DataLayout &DL = AI->getModule()->getDataLayout();
10470     unsigned ValWidth =
10471         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
10472     Value *SextShamt =
10473         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
10474     Result = Builder.CreateCall(LrwOpScwLoop,
10475                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
10476   } else {
10477     Result =
10478         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
10479   }
10480 
10481   if (XLen == 64)
10482     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10483   return Result;
10484 }
10485 
10486 TargetLowering::AtomicExpansionKind
10487 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
10488     AtomicCmpXchgInst *CI) const {
10489   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
10490   if (Size == 8 || Size == 16)
10491     return AtomicExpansionKind::MaskedIntrinsic;
10492   return AtomicExpansionKind::None;
10493 }
10494 
10495 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
10496     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
10497     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
10498   unsigned XLen = Subtarget.getXLen();
10499   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
10500   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
10501   if (XLen == 64) {
10502     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
10503     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
10504     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10505     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
10506   }
10507   Type *Tys[] = {AlignedAddr->getType()};
10508   Function *MaskedCmpXchg =
10509       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
10510   Value *Result = Builder.CreateCall(
10511       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
10512   if (XLen == 64)
10513     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10514   return Result;
10515 }
10516 
10517 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
10518   return false;
10519 }
10520 
10521 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
10522                                                EVT VT) const {
10523   if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
10524     return false;
10525 
10526   switch (FPVT.getSimpleVT().SimpleTy) {
10527   case MVT::f16:
10528     return Subtarget.hasStdExtZfh();
10529   case MVT::f32:
10530     return Subtarget.hasStdExtF();
10531   case MVT::f64:
10532     return Subtarget.hasStdExtD();
10533   default:
10534     return false;
10535   }
10536 }
10537 
10538 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
10539   // If we are using the small code model, we can reduce size of jump table
10540   // entry to 4 bytes.
10541   if (Subtarget.is64Bit() && !isPositionIndependent() &&
10542       getTargetMachine().getCodeModel() == CodeModel::Small) {
10543     return MachineJumpTableInfo::EK_Custom32;
10544   }
10545   return TargetLowering::getJumpTableEncoding();
10546 }
10547 
10548 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
10549     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
10550     unsigned uid, MCContext &Ctx) const {
10551   assert(Subtarget.is64Bit() && !isPositionIndependent() &&
10552          getTargetMachine().getCodeModel() == CodeModel::Small);
10553   return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
10554 }
10555 
10556 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
10557                                                      EVT VT) const {
10558   VT = VT.getScalarType();
10559 
10560   if (!VT.isSimple())
10561     return false;
10562 
10563   switch (VT.getSimpleVT().SimpleTy) {
10564   case MVT::f16:
10565     return Subtarget.hasStdExtZfh();
10566   case MVT::f32:
10567     return Subtarget.hasStdExtF();
10568   case MVT::f64:
10569     return Subtarget.hasStdExtD();
10570   default:
10571     break;
10572   }
10573 
10574   return false;
10575 }
10576 
10577 Register RISCVTargetLowering::getExceptionPointerRegister(
10578     const Constant *PersonalityFn) const {
10579   return RISCV::X10;
10580 }
10581 
10582 Register RISCVTargetLowering::getExceptionSelectorRegister(
10583     const Constant *PersonalityFn) const {
10584   return RISCV::X11;
10585 }
10586 
10587 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
10588   // Return false to suppress the unnecessary extensions if the LibCall
10589   // arguments or return value is f32 type for LP64 ABI.
10590   RISCVABI::ABI ABI = Subtarget.getTargetABI();
10591   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
10592     return false;
10593 
10594   return true;
10595 }
10596 
10597 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
10598   if (Subtarget.is64Bit() && Type == MVT::i32)
10599     return true;
10600 
10601   return IsSigned;
10602 }
10603 
10604 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
10605                                                  SDValue C) const {
10606   // Check integral scalar types.
10607   if (VT.isScalarInteger()) {
10608     // Omit the optimization if the sub target has the M extension and the data
10609     // size exceeds XLen.
10610     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
10611       return false;
10612     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
10613       // Break the MUL to a SLLI and an ADD/SUB.
10614       const APInt &Imm = ConstNode->getAPIntValue();
10615       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
10616           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
10617         return true;
10618       // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
10619       if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
10620           ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
10621            (Imm - 8).isPowerOf2()))
10622         return true;
10623       // Omit the following optimization if the sub target has the M extension
10624       // and the data size >= XLen.
10625       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
10626         return false;
10627       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
10628       // a pair of LUI/ADDI.
10629       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
10630         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
10631         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
10632             (1 - ImmS).isPowerOf2())
10633         return true;
10634       }
10635     }
10636   }
10637 
10638   return false;
10639 }
10640 
10641 bool RISCVTargetLowering::isMulAddWithConstProfitable(
10642     const SDValue &AddNode, const SDValue &ConstNode) const {
10643   // Let the DAGCombiner decide for vectors.
10644   EVT VT = AddNode.getValueType();
10645   if (VT.isVector())
10646     return true;
10647 
10648   // Let the DAGCombiner decide for larger types.
10649   if (VT.getScalarSizeInBits() > Subtarget.getXLen())
10650     return true;
10651 
10652   // It is worse if c1 is simm12 while c1*c2 is not.
10653   ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
10654   ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
10655   const APInt &C1 = C1Node->getAPIntValue();
10656   const APInt &C2 = C2Node->getAPIntValue();
10657   if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
10658     return false;
10659 
10660   // Default to true and let the DAGCombiner decide.
10661   return true;
10662 }
10663 
10664 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
10665     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
10666     bool *Fast) const {
10667   if (!VT.isVector())
10668     return false;
10669 
10670   EVT ElemVT = VT.getVectorElementType();
10671   if (Alignment >= ElemVT.getStoreSize()) {
10672     if (Fast)
10673       *Fast = true;
10674     return true;
10675   }
10676 
10677   return false;
10678 }
10679 
10680 bool RISCVTargetLowering::splitValueIntoRegisterParts(
10681     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
10682     unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
10683   bool IsABIRegCopy = CC.hasValue();
10684   EVT ValueVT = Val.getValueType();
10685   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
10686     // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
10687     // and cast to f32.
10688     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
10689     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
10690     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
10691                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
10692     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
10693     Parts[0] = Val;
10694     return true;
10695   }
10696 
10697   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
10698     LLVMContext &Context = *DAG.getContext();
10699     EVT ValueEltVT = ValueVT.getVectorElementType();
10700     EVT PartEltVT = PartVT.getVectorElementType();
10701     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
10702     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
10703     if (PartVTBitSize % ValueVTBitSize == 0) {
10704       assert(PartVTBitSize >= ValueVTBitSize);
10705       // If the element types are different, bitcast to the same element type of
10706       // PartVT first.
10707       // Give an example here, we want copy a <vscale x 1 x i8> value to
10708       // <vscale x 4 x i16>.
10709       // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
10710       // subvector, then we can bitcast to <vscale x 4 x i16>.
10711       if (ValueEltVT != PartEltVT) {
10712         if (PartVTBitSize > ValueVTBitSize) {
10713           unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
10714           assert(Count != 0 && "The number of element should not be zero.");
10715           EVT SameEltTypeVT =
10716               EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
10717           Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
10718                             DAG.getUNDEF(SameEltTypeVT), Val,
10719                             DAG.getVectorIdxConstant(0, DL));
10720         }
10721         Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
10722       } else {
10723         Val =
10724             DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
10725                         Val, DAG.getVectorIdxConstant(0, DL));
10726       }
10727       Parts[0] = Val;
10728       return true;
10729     }
10730   }
10731   return false;
10732 }
10733 
10734 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
10735     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
10736     MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
10737   bool IsABIRegCopy = CC.hasValue();
10738   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
10739     SDValue Val = Parts[0];
10740 
10741     // Cast the f32 to i32, truncate to i16, and cast back to f16.
10742     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
10743     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
10744     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
10745     return Val;
10746   }
10747 
10748   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
10749     LLVMContext &Context = *DAG.getContext();
10750     SDValue Val = Parts[0];
10751     EVT ValueEltVT = ValueVT.getVectorElementType();
10752     EVT PartEltVT = PartVT.getVectorElementType();
10753     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
10754     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
10755     if (PartVTBitSize % ValueVTBitSize == 0) {
10756       assert(PartVTBitSize >= ValueVTBitSize);
10757       EVT SameEltTypeVT = ValueVT;
10758       // If the element types are different, convert it to the same element type
10759       // of PartVT.
10760       // Give an example here, we want copy a <vscale x 1 x i8> value from
10761       // <vscale x 4 x i16>.
10762       // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
10763       // then we can extract <vscale x 1 x i8>.
10764       if (ValueEltVT != PartEltVT) {
10765         unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
10766         assert(Count != 0 && "The number of element should not be zero.");
10767         SameEltTypeVT =
10768             EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
10769         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
10770       }
10771       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
10772                         DAG.getVectorIdxConstant(0, DL));
10773       return Val;
10774     }
10775   }
10776   return SDValue();
10777 }
10778 
10779 SDValue
10780 RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10781                                    SelectionDAG &DAG,
10782                                    SmallVectorImpl<SDNode *> &Created) const {
10783   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
10784   if (isIntDivCheap(N->getValueType(0), Attr))
10785     return SDValue(N, 0); // Lower SDIV as SDIV
10786 
10787   assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
10788          "Unexpected divisor!");
10789 
10790   // Conditional move is needed, so do the transformation iff Zbt is enabled.
10791   if (!Subtarget.hasStdExtZbt())
10792     return SDValue();
10793 
10794   // When |Divisor| >= 2 ^ 12, it isn't profitable to do such transformation.
10795   // Besides, more critical path instructions will be generated when dividing
10796   // by 2. So we keep using the original DAGs for these cases.
10797   unsigned Lg2 = Divisor.countTrailingZeros();
10798   if (Lg2 == 1 || Lg2 >= 12)
10799     return SDValue();
10800 
10801   // fold (sdiv X, pow2)
10802   EVT VT = N->getValueType(0);
10803   if (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))
10804     return SDValue();
10805 
10806   SDLoc DL(N);
10807   SDValue N0 = N->getOperand(0);
10808   SDValue Zero = DAG.getConstant(0, DL, VT);
10809   SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
10810 
10811   // Add (N0 < 0) ? Pow2 - 1 : 0;
10812   SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT);
10813   SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
10814   SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
10815 
10816   Created.push_back(Cmp.getNode());
10817   Created.push_back(Add.getNode());
10818   Created.push_back(Sel.getNode());
10819 
10820   // Divide by pow2.
10821   SDValue SRA =
10822       DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT));
10823 
10824   // If we're dividing by a positive value, we're done.  Otherwise, we must
10825   // negate the result.
10826   if (Divisor.isNonNegative())
10827     return SRA;
10828 
10829   Created.push_back(SRA.getNode());
10830   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
10831 }
10832 
10833 #define GET_REGISTER_MATCHER
10834 #include "RISCVGenAsmMatcher.inc"
10835 
10836 Register
10837 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
10838                                        const MachineFunction &MF) const {
10839   Register Reg = MatchRegisterAltName(RegName);
10840   if (Reg == RISCV::NoRegister)
10841     Reg = MatchRegisterName(RegName);
10842   if (Reg == RISCV::NoRegister)
10843     report_fatal_error(
10844         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
10845   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
10846   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
10847     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
10848                              StringRef(RegName) + "\"."));
10849   return Reg;
10850 }
10851 
10852 namespace llvm {
10853 namespace RISCVVIntrinsicsTable {
10854 
10855 #define GET_RISCVVIntrinsicsTable_IMPL
10856 #include "RISCVGenSearchableTables.inc"
10857 
10858 } // namespace RISCVVIntrinsicsTable
10859 
10860 } // namespace llvm
10861