1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/IR/PatternMatch.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/KnownBits.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/raw_ostream.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "riscv-lower"
44 
45 STATISTIC(NumTailCalls, "Number of tail calls");
46 
47 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
48                                          const RISCVSubtarget &STI)
49     : TargetLowering(TM), Subtarget(STI) {
50 
51   if (Subtarget.isRV32E())
52     report_fatal_error("Codegen not yet implemented for RV32E");
53 
54   RISCVABI::ABI ABI = Subtarget.getTargetABI();
55   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
56 
57   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
58       !Subtarget.hasStdExtF()) {
59     errs() << "Hard-float 'f' ABI can't be used for a target that "
60                 "doesn't support the F instruction set extension (ignoring "
61                           "target-abi)\n";
62     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
63   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
64              !Subtarget.hasStdExtD()) {
65     errs() << "Hard-float 'd' ABI can't be used for a target that "
66               "doesn't support the D instruction set extension (ignoring "
67               "target-abi)\n";
68     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
69   }
70 
71   switch (ABI) {
72   default:
73     report_fatal_error("Don't know how to lower this ABI");
74   case RISCVABI::ABI_ILP32:
75   case RISCVABI::ABI_ILP32F:
76   case RISCVABI::ABI_ILP32D:
77   case RISCVABI::ABI_LP64:
78   case RISCVABI::ABI_LP64F:
79   case RISCVABI::ABI_LP64D:
80     break;
81   }
82 
83   MVT XLenVT = Subtarget.getXLenVT();
84 
85   // Set up the register classes.
86   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
87 
88   if (Subtarget.hasStdExtZfh())
89     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
90   if (Subtarget.hasStdExtF())
91     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
92   if (Subtarget.hasStdExtD())
93     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
94 
95   static const MVT::SimpleValueType BoolVecVTs[] = {
96       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
97       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
98   static const MVT::SimpleValueType IntVecVTs[] = {
99       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
100       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
101       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
102       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
103       MVT::nxv4i64, MVT::nxv8i64};
104   static const MVT::SimpleValueType F16VecVTs[] = {
105       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
106       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
107   static const MVT::SimpleValueType F32VecVTs[] = {
108       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
109   static const MVT::SimpleValueType F64VecVTs[] = {
110       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
111 
112   if (Subtarget.hasStdExtV()) {
113     auto addRegClassForRVV = [this](MVT VT) {
114       unsigned Size = VT.getSizeInBits().getKnownMinValue();
115       assert(Size <= 512 && isPowerOf2_32(Size));
116       const TargetRegisterClass *RC;
117       if (Size <= 64)
118         RC = &RISCV::VRRegClass;
119       else if (Size == 128)
120         RC = &RISCV::VRM2RegClass;
121       else if (Size == 256)
122         RC = &RISCV::VRM4RegClass;
123       else
124         RC = &RISCV::VRM8RegClass;
125 
126       addRegisterClass(VT, RC);
127     };
128 
129     for (MVT VT : BoolVecVTs)
130       addRegClassForRVV(VT);
131     for (MVT VT : IntVecVTs)
132       addRegClassForRVV(VT);
133 
134     if (Subtarget.hasStdExtZfh())
135       for (MVT VT : F16VecVTs)
136         addRegClassForRVV(VT);
137 
138     if (Subtarget.hasStdExtF())
139       for (MVT VT : F32VecVTs)
140         addRegClassForRVV(VT);
141 
142     if (Subtarget.hasStdExtD())
143       for (MVT VT : F64VecVTs)
144         addRegClassForRVV(VT);
145 
146     if (Subtarget.useRVVForFixedLengthVectors()) {
147       auto addRegClassForFixedVectors = [this](MVT VT) {
148         MVT ContainerVT = getContainerForFixedLengthVector(VT);
149         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
150         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
151         addRegisterClass(VT, TRI.getRegClass(RCID));
152       };
153       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
154         if (useRVVForFixedLengthVectorVT(VT))
155           addRegClassForFixedVectors(VT);
156 
157       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
158         if (useRVVForFixedLengthVectorVT(VT))
159           addRegClassForFixedVectors(VT);
160     }
161   }
162 
163   // Compute derived properties from the register classes.
164   computeRegisterProperties(STI.getRegisterInfo());
165 
166   setStackPointerRegisterToSaveRestore(RISCV::X2);
167 
168   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
169     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
170 
171   // TODO: add all necessary setOperationAction calls.
172   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
173 
174   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
175   setOperationAction(ISD::BR_CC, XLenVT, Expand);
176   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
177   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
178 
179   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
180   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
181 
182   setOperationAction(ISD::VASTART, MVT::Other, Custom);
183   setOperationAction(ISD::VAARG, MVT::Other, Expand);
184   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
185   setOperationAction(ISD::VAEND, MVT::Other, Expand);
186 
187   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
188   if (!Subtarget.hasStdExtZbb()) {
189     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
190     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
191   }
192 
193   if (Subtarget.is64Bit()) {
194     setOperationAction(ISD::ADD, MVT::i32, Custom);
195     setOperationAction(ISD::SUB, MVT::i32, Custom);
196     setOperationAction(ISD::SHL, MVT::i32, Custom);
197     setOperationAction(ISD::SRA, MVT::i32, Custom);
198     setOperationAction(ISD::SRL, MVT::i32, Custom);
199 
200     setOperationAction(ISD::UADDO, MVT::i32, Custom);
201     setOperationAction(ISD::USUBO, MVT::i32, Custom);
202     setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
203     setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
204   } else {
205     setLibcallName(RTLIB::SHL_I128, nullptr);
206     setLibcallName(RTLIB::SRL_I128, nullptr);
207     setLibcallName(RTLIB::SRA_I128, nullptr);
208     setLibcallName(RTLIB::MUL_I128, nullptr);
209     setLibcallName(RTLIB::MULO_I64, nullptr);
210   }
211 
212   if (!Subtarget.hasStdExtM()) {
213     setOperationAction(ISD::MUL, XLenVT, Expand);
214     setOperationAction(ISD::MULHS, XLenVT, Expand);
215     setOperationAction(ISD::MULHU, XLenVT, Expand);
216     setOperationAction(ISD::SDIV, XLenVT, Expand);
217     setOperationAction(ISD::UDIV, XLenVT, Expand);
218     setOperationAction(ISD::SREM, XLenVT, Expand);
219     setOperationAction(ISD::UREM, XLenVT, Expand);
220   } else {
221     if (Subtarget.is64Bit()) {
222       setOperationAction(ISD::MUL, MVT::i32, Custom);
223       setOperationAction(ISD::MUL, MVT::i128, Custom);
224 
225       setOperationAction(ISD::SDIV, MVT::i8, Custom);
226       setOperationAction(ISD::UDIV, MVT::i8, Custom);
227       setOperationAction(ISD::UREM, MVT::i8, Custom);
228       setOperationAction(ISD::SDIV, MVT::i16, Custom);
229       setOperationAction(ISD::UDIV, MVT::i16, Custom);
230       setOperationAction(ISD::UREM, MVT::i16, Custom);
231       setOperationAction(ISD::SDIV, MVT::i32, Custom);
232       setOperationAction(ISD::UDIV, MVT::i32, Custom);
233       setOperationAction(ISD::UREM, MVT::i32, Custom);
234     } else {
235       setOperationAction(ISD::MUL, MVT::i64, Custom);
236     }
237   }
238 
239   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
240   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
241   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
242   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
243 
244   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
245   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
246   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
247 
248   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
249     if (Subtarget.is64Bit()) {
250       setOperationAction(ISD::ROTL, MVT::i32, Custom);
251       setOperationAction(ISD::ROTR, MVT::i32, Custom);
252     }
253   } else {
254     setOperationAction(ISD::ROTL, XLenVT, Expand);
255     setOperationAction(ISD::ROTR, XLenVT, Expand);
256   }
257 
258   if (Subtarget.hasStdExtZbp()) {
259     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
260     // more combining.
261     setOperationAction(ISD::BITREVERSE, XLenVT,   Custom);
262     setOperationAction(ISD::BSWAP,      XLenVT,   Custom);
263     setOperationAction(ISD::BITREVERSE, MVT::i8,  Custom);
264     // BSWAP i8 doesn't exist.
265     setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
266     setOperationAction(ISD::BSWAP,      MVT::i16, Custom);
267 
268     if (Subtarget.is64Bit()) {
269       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
270       setOperationAction(ISD::BSWAP,      MVT::i32, Custom);
271     }
272   } else {
273     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
274     // pattern match it directly in isel.
275     setOperationAction(ISD::BSWAP, XLenVT,
276                        Subtarget.hasStdExtZbb() ? Legal : Expand);
277   }
278 
279   if (Subtarget.hasStdExtZbb()) {
280     setOperationAction(ISD::SMIN, XLenVT, Legal);
281     setOperationAction(ISD::SMAX, XLenVT, Legal);
282     setOperationAction(ISD::UMIN, XLenVT, Legal);
283     setOperationAction(ISD::UMAX, XLenVT, Legal);
284 
285     if (Subtarget.is64Bit()) {
286       setOperationAction(ISD::CTTZ, MVT::i32, Custom);
287       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
288       setOperationAction(ISD::CTLZ, MVT::i32, Custom);
289       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
290     }
291   } else {
292     setOperationAction(ISD::CTTZ, XLenVT, Expand);
293     setOperationAction(ISD::CTLZ, XLenVT, Expand);
294     setOperationAction(ISD::CTPOP, XLenVT, Expand);
295   }
296 
297   if (Subtarget.hasStdExtZbt()) {
298     setOperationAction(ISD::FSHL, XLenVT, Custom);
299     setOperationAction(ISD::FSHR, XLenVT, Custom);
300     setOperationAction(ISD::SELECT, XLenVT, Legal);
301 
302     if (Subtarget.is64Bit()) {
303       setOperationAction(ISD::FSHL, MVT::i32, Custom);
304       setOperationAction(ISD::FSHR, MVT::i32, Custom);
305     }
306   } else {
307     setOperationAction(ISD::SELECT, XLenVT, Custom);
308   }
309 
310   static const ISD::CondCode FPCCToExpand[] = {
311       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
312       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
313       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
314 
315   static const ISD::NodeType FPOpToExpand[] = {
316       ISD::FSIN, ISD::FCOS,       ISD::FSINCOS,   ISD::FPOW,
317       ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
318 
319   if (Subtarget.hasStdExtZfh())
320     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
321 
322   if (Subtarget.hasStdExtZfh()) {
323     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
324     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
325     setOperationAction(ISD::LRINT, MVT::f16, Legal);
326     setOperationAction(ISD::LLRINT, MVT::f16, Legal);
327     setOperationAction(ISD::LROUND, MVT::f16, Legal);
328     setOperationAction(ISD::LLROUND, MVT::f16, Legal);
329     for (auto CC : FPCCToExpand)
330       setCondCodeAction(CC, MVT::f16, Expand);
331     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
332     setOperationAction(ISD::SELECT, MVT::f16, Custom);
333     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
334     for (auto Op : FPOpToExpand)
335       setOperationAction(Op, MVT::f16, Expand);
336   }
337 
338   if (Subtarget.hasStdExtF()) {
339     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
340     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
341     setOperationAction(ISD::LRINT, MVT::f32, Legal);
342     setOperationAction(ISD::LLRINT, MVT::f32, Legal);
343     setOperationAction(ISD::LROUND, MVT::f32, Legal);
344     setOperationAction(ISD::LLROUND, MVT::f32, Legal);
345     for (auto CC : FPCCToExpand)
346       setCondCodeAction(CC, MVT::f32, Expand);
347     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
348     setOperationAction(ISD::SELECT, MVT::f32, Custom);
349     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
350     for (auto Op : FPOpToExpand)
351       setOperationAction(Op, MVT::f32, Expand);
352     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
353     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
354   }
355 
356   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
357     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
358 
359   if (Subtarget.hasStdExtD()) {
360     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
361     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
362     setOperationAction(ISD::LRINT, MVT::f64, Legal);
363     setOperationAction(ISD::LLRINT, MVT::f64, Legal);
364     setOperationAction(ISD::LROUND, MVT::f64, Legal);
365     setOperationAction(ISD::LLROUND, MVT::f64, Legal);
366     for (auto CC : FPCCToExpand)
367       setCondCodeAction(CC, MVT::f64, Expand);
368     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
369     setOperationAction(ISD::SELECT, MVT::f64, Custom);
370     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
371     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
372     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
373     for (auto Op : FPOpToExpand)
374       setOperationAction(Op, MVT::f64, Expand);
375     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
376     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
377   }
378 
379   if (Subtarget.is64Bit()) {
380     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
381     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
382     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
383     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
384   }
385 
386   if (Subtarget.hasStdExtF()) {
387     setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
388     setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
389 
390     setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
391     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
392   }
393 
394   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
395   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
396   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
397   setOperationAction(ISD::JumpTable, XLenVT, Custom);
398 
399   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
400 
401   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
402   // Unfortunately this can't be determined just from the ISA naming string.
403   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
404                      Subtarget.is64Bit() ? Legal : Custom);
405 
406   setOperationAction(ISD::TRAP, MVT::Other, Legal);
407   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
408   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
409   if (Subtarget.is64Bit())
410     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
411 
412   if (Subtarget.hasStdExtA()) {
413     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
414     setMinCmpXchgSizeInBits(32);
415   } else {
416     setMaxAtomicSizeInBitsSupported(0);
417   }
418 
419   setBooleanContents(ZeroOrOneBooleanContent);
420 
421   if (Subtarget.hasStdExtV()) {
422     setBooleanVectorContents(ZeroOrOneBooleanContent);
423 
424     setOperationAction(ISD::VSCALE, XLenVT, Custom);
425 
426     // RVV intrinsics may have illegal operands.
427     // We also need to custom legalize vmv.x.s.
428     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
429     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
430     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
431     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
432     if (Subtarget.is64Bit()) {
433       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
434     } else {
435       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
436       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
437     }
438 
439     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
440     setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
441 
442     static const unsigned IntegerVPOps[] = {
443         ISD::VP_ADD,         ISD::VP_SUB,         ISD::VP_MUL,
444         ISD::VP_SDIV,        ISD::VP_UDIV,        ISD::VP_SREM,
445         ISD::VP_UREM,        ISD::VP_AND,         ISD::VP_OR,
446         ISD::VP_XOR,         ISD::VP_ASHR,        ISD::VP_LSHR,
447         ISD::VP_SHL,         ISD::VP_REDUCE_ADD,  ISD::VP_REDUCE_AND,
448         ISD::VP_REDUCE_OR,   ISD::VP_REDUCE_XOR,  ISD::VP_REDUCE_SMAX,
449         ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN};
450 
451     static const unsigned FloatingPointVPOps[] = {
452         ISD::VP_FADD,        ISD::VP_FSUB,        ISD::VP_FMUL,
453         ISD::VP_FDIV,        ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
454         ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX};
455 
456     if (!Subtarget.is64Bit()) {
457       // We must custom-lower certain vXi64 operations on RV32 due to the vector
458       // element type being illegal.
459       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
460       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
461 
462       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
463       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
464       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
465       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
466       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
467       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
468       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
469       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
470 
471       setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
472       setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
473       setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
474       setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
475       setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
476       setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
477       setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
478       setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
479     }
480 
481     for (MVT VT : BoolVecVTs) {
482       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
483 
484       // Mask VTs are custom-expanded into a series of standard nodes
485       setOperationAction(ISD::TRUNCATE, VT, Custom);
486       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
487       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
488       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
489 
490       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
491       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
492 
493       setOperationAction(ISD::SELECT, VT, Custom);
494       setOperationAction(ISD::SELECT_CC, VT, Expand);
495       setOperationAction(ISD::VSELECT, VT, Expand);
496 
497       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
498       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
499       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
500 
501       setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
502       setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
503       setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
504 
505       // RVV has native int->float & float->int conversions where the
506       // element type sizes are within one power-of-two of each other. Any
507       // wider distances between type sizes have to be lowered as sequences
508       // which progressively narrow the gap in stages.
509       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
510       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
511       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
512       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
513 
514       // Expand all extending loads to types larger than this, and truncating
515       // stores from types larger than this.
516       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
517         setTruncStoreAction(OtherVT, VT, Expand);
518         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
519         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
520         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
521       }
522     }
523 
524     for (MVT VT : IntVecVTs) {
525       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
526       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
527 
528       setOperationAction(ISD::SMIN, VT, Legal);
529       setOperationAction(ISD::SMAX, VT, Legal);
530       setOperationAction(ISD::UMIN, VT, Legal);
531       setOperationAction(ISD::UMAX, VT, Legal);
532 
533       setOperationAction(ISD::ROTL, VT, Expand);
534       setOperationAction(ISD::ROTR, VT, Expand);
535 
536       // Custom-lower extensions and truncations from/to mask types.
537       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
538       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
539       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
540 
541       // RVV has native int->float & float->int conversions where the
542       // element type sizes are within one power-of-two of each other. Any
543       // wider distances between type sizes have to be lowered as sequences
544       // which progressively narrow the gap in stages.
545       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
546       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
547       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
548       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
549 
550       setOperationAction(ISD::SADDSAT, VT, Legal);
551       setOperationAction(ISD::UADDSAT, VT, Legal);
552       setOperationAction(ISD::SSUBSAT, VT, Legal);
553       setOperationAction(ISD::USUBSAT, VT, Legal);
554 
555       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
556       // nodes which truncate by one power of two at a time.
557       setOperationAction(ISD::TRUNCATE, VT, Custom);
558 
559       // Custom-lower insert/extract operations to simplify patterns.
560       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
561       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
562 
563       // Custom-lower reduction operations to set up the corresponding custom
564       // nodes' operands.
565       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
566       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
567       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
568       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
569       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
570       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
571       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
572       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
573 
574       for (unsigned VPOpc : IntegerVPOps)
575         setOperationAction(VPOpc, VT, Custom);
576 
577       setOperationAction(ISD::LOAD, VT, Custom);
578       setOperationAction(ISD::STORE, VT, Custom);
579 
580       setOperationAction(ISD::MLOAD, VT, Custom);
581       setOperationAction(ISD::MSTORE, VT, Custom);
582       setOperationAction(ISD::MGATHER, VT, Custom);
583       setOperationAction(ISD::MSCATTER, VT, Custom);
584 
585       setOperationAction(ISD::VP_LOAD, VT, Custom);
586       setOperationAction(ISD::VP_STORE, VT, Custom);
587       setOperationAction(ISD::VP_GATHER, VT, Custom);
588       setOperationAction(ISD::VP_SCATTER, VT, Custom);
589 
590       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
591       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
592       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
593 
594       setOperationAction(ISD::SELECT, VT, Custom);
595       setOperationAction(ISD::SELECT_CC, VT, Expand);
596 
597       setOperationAction(ISD::STEP_VECTOR, VT, Custom);
598       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
599 
600       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
601         setTruncStoreAction(VT, OtherVT, Expand);
602         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
603         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
604         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
605       }
606     }
607 
608     // Expand various CCs to best match the RVV ISA, which natively supports UNE
609     // but no other unordered comparisons, and supports all ordered comparisons
610     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
611     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
612     // and we pattern-match those back to the "original", swapping operands once
613     // more. This way we catch both operations and both "vf" and "fv" forms with
614     // fewer patterns.
615     static const ISD::CondCode VFPCCToExpand[] = {
616         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
617         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
618         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
619     };
620 
621     // Sets common operation actions on RVV floating-point vector types.
622     const auto SetCommonVFPActions = [&](MVT VT) {
623       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
624       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
625       // sizes are within one power-of-two of each other. Therefore conversions
626       // between vXf16 and vXf64 must be lowered as sequences which convert via
627       // vXf32.
628       setOperationAction(ISD::FP_ROUND, VT, Custom);
629       setOperationAction(ISD::FP_EXTEND, VT, Custom);
630       // Custom-lower insert/extract operations to simplify patterns.
631       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
632       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
633       // Expand various condition codes (explained above).
634       for (auto CC : VFPCCToExpand)
635         setCondCodeAction(CC, VT, Expand);
636 
637       setOperationAction(ISD::FMINNUM, VT, Legal);
638       setOperationAction(ISD::FMAXNUM, VT, Legal);
639 
640       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
641       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
642       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
643       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
644 
645       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
646 
647       setOperationAction(ISD::LOAD, VT, Custom);
648       setOperationAction(ISD::STORE, VT, Custom);
649 
650       setOperationAction(ISD::MLOAD, VT, Custom);
651       setOperationAction(ISD::MSTORE, VT, Custom);
652       setOperationAction(ISD::MGATHER, VT, Custom);
653       setOperationAction(ISD::MSCATTER, VT, Custom);
654 
655       setOperationAction(ISD::VP_LOAD, VT, Custom);
656       setOperationAction(ISD::VP_STORE, VT, Custom);
657       setOperationAction(ISD::VP_GATHER, VT, Custom);
658       setOperationAction(ISD::VP_SCATTER, VT, Custom);
659 
660       setOperationAction(ISD::SELECT, VT, Custom);
661       setOperationAction(ISD::SELECT_CC, VT, Expand);
662 
663       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
664       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
665       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
666 
667       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
668 
669       for (unsigned VPOpc : FloatingPointVPOps)
670         setOperationAction(VPOpc, VT, Custom);
671     };
672 
673     // Sets common extload/truncstore actions on RVV floating-point vector
674     // types.
675     const auto SetCommonVFPExtLoadTruncStoreActions =
676         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
677           for (auto SmallVT : SmallerVTs) {
678             setTruncStoreAction(VT, SmallVT, Expand);
679             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
680           }
681         };
682 
683     if (Subtarget.hasStdExtZfh())
684       for (MVT VT : F16VecVTs)
685         SetCommonVFPActions(VT);
686 
687     for (MVT VT : F32VecVTs) {
688       if (Subtarget.hasStdExtF())
689         SetCommonVFPActions(VT);
690       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
691     }
692 
693     for (MVT VT : F64VecVTs) {
694       if (Subtarget.hasStdExtD())
695         SetCommonVFPActions(VT);
696       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
697       SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
698     }
699 
700     if (Subtarget.useRVVForFixedLengthVectors()) {
701       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
702         if (!useRVVForFixedLengthVectorVT(VT))
703           continue;
704 
705         // By default everything must be expanded.
706         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
707           setOperationAction(Op, VT, Expand);
708         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
709           setTruncStoreAction(VT, OtherVT, Expand);
710           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
711           setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
712           setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
713         }
714 
715         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
716         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
717         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
718 
719         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
720         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
721 
722         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
723         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
724 
725         setOperationAction(ISD::LOAD, VT, Custom);
726         setOperationAction(ISD::STORE, VT, Custom);
727 
728         setOperationAction(ISD::SETCC, VT, Custom);
729 
730         setOperationAction(ISD::SELECT, VT, Custom);
731 
732         setOperationAction(ISD::TRUNCATE, VT, Custom);
733 
734         setOperationAction(ISD::BITCAST, VT, Custom);
735 
736         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
737         setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
738         setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
739 
740         setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
741         setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
742         setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
743 
744         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
745         setOperationAction(ISD::UINT_TO_FP, VT, Custom);
746         setOperationAction(ISD::FP_TO_SINT, VT, Custom);
747         setOperationAction(ISD::FP_TO_UINT, VT, Custom);
748 
749         // Operations below are different for between masks and other vectors.
750         if (VT.getVectorElementType() == MVT::i1) {
751           setOperationAction(ISD::AND, VT, Custom);
752           setOperationAction(ISD::OR, VT, Custom);
753           setOperationAction(ISD::XOR, VT, Custom);
754           continue;
755         }
756 
757         // Use SPLAT_VECTOR to prevent type legalization from destroying the
758         // splats when type legalizing i64 scalar on RV32.
759         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
760         // improvements first.
761         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
762           setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
763           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
764         }
765 
766         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
767         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
768 
769         setOperationAction(ISD::MLOAD, VT, Custom);
770         setOperationAction(ISD::MSTORE, VT, Custom);
771         setOperationAction(ISD::MGATHER, VT, Custom);
772         setOperationAction(ISD::MSCATTER, VT, Custom);
773 
774         setOperationAction(ISD::VP_LOAD, VT, Custom);
775         setOperationAction(ISD::VP_STORE, VT, Custom);
776         setOperationAction(ISD::VP_GATHER, VT, Custom);
777         setOperationAction(ISD::VP_SCATTER, VT, Custom);
778 
779         setOperationAction(ISD::ADD, VT, Custom);
780         setOperationAction(ISD::MUL, VT, Custom);
781         setOperationAction(ISD::SUB, VT, Custom);
782         setOperationAction(ISD::AND, VT, Custom);
783         setOperationAction(ISD::OR, VT, Custom);
784         setOperationAction(ISD::XOR, VT, Custom);
785         setOperationAction(ISD::SDIV, VT, Custom);
786         setOperationAction(ISD::SREM, VT, Custom);
787         setOperationAction(ISD::UDIV, VT, Custom);
788         setOperationAction(ISD::UREM, VT, Custom);
789         setOperationAction(ISD::SHL, VT, Custom);
790         setOperationAction(ISD::SRA, VT, Custom);
791         setOperationAction(ISD::SRL, VT, Custom);
792 
793         setOperationAction(ISD::SMIN, VT, Custom);
794         setOperationAction(ISD::SMAX, VT, Custom);
795         setOperationAction(ISD::UMIN, VT, Custom);
796         setOperationAction(ISD::UMAX, VT, Custom);
797         setOperationAction(ISD::ABS,  VT, Custom);
798 
799         setOperationAction(ISD::MULHS, VT, Custom);
800         setOperationAction(ISD::MULHU, VT, Custom);
801 
802         setOperationAction(ISD::SADDSAT, VT, Custom);
803         setOperationAction(ISD::UADDSAT, VT, Custom);
804         setOperationAction(ISD::SSUBSAT, VT, Custom);
805         setOperationAction(ISD::USUBSAT, VT, Custom);
806 
807         setOperationAction(ISD::VSELECT, VT, Custom);
808         setOperationAction(ISD::SELECT_CC, VT, Expand);
809 
810         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
811         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
812         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
813 
814         // Custom-lower reduction operations to set up the corresponding custom
815         // nodes' operands.
816         setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
817         setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
818         setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
819         setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
820         setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
821 
822         for (unsigned VPOpc : IntegerVPOps)
823           setOperationAction(VPOpc, VT, Custom);
824       }
825 
826       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
827         if (!useRVVForFixedLengthVectorVT(VT))
828           continue;
829 
830         // By default everything must be expanded.
831         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
832           setOperationAction(Op, VT, Expand);
833         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
834           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
835           setTruncStoreAction(VT, OtherVT, Expand);
836         }
837 
838         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
839         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
840         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
841 
842         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
843         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
844         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
845         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
846         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
847 
848         setOperationAction(ISD::LOAD, VT, Custom);
849         setOperationAction(ISD::STORE, VT, Custom);
850         setOperationAction(ISD::MLOAD, VT, Custom);
851         setOperationAction(ISD::MSTORE, VT, Custom);
852         setOperationAction(ISD::MGATHER, VT, Custom);
853         setOperationAction(ISD::MSCATTER, VT, Custom);
854 
855         setOperationAction(ISD::VP_LOAD, VT, Custom);
856         setOperationAction(ISD::VP_STORE, VT, Custom);
857         setOperationAction(ISD::VP_GATHER, VT, Custom);
858         setOperationAction(ISD::VP_SCATTER, VT, Custom);
859 
860         setOperationAction(ISD::FADD, VT, Custom);
861         setOperationAction(ISD::FSUB, VT, Custom);
862         setOperationAction(ISD::FMUL, VT, Custom);
863         setOperationAction(ISD::FDIV, VT, Custom);
864         setOperationAction(ISD::FNEG, VT, Custom);
865         setOperationAction(ISD::FABS, VT, Custom);
866         setOperationAction(ISD::FCOPYSIGN, VT, Custom);
867         setOperationAction(ISD::FSQRT, VT, Custom);
868         setOperationAction(ISD::FMA, VT, Custom);
869         setOperationAction(ISD::FMINNUM, VT, Custom);
870         setOperationAction(ISD::FMAXNUM, VT, Custom);
871 
872         setOperationAction(ISD::FP_ROUND, VT, Custom);
873         setOperationAction(ISD::FP_EXTEND, VT, Custom);
874 
875         for (auto CC : VFPCCToExpand)
876           setCondCodeAction(CC, VT, Expand);
877 
878         setOperationAction(ISD::VSELECT, VT, Custom);
879         setOperationAction(ISD::SELECT, VT, Custom);
880         setOperationAction(ISD::SELECT_CC, VT, Expand);
881 
882         setOperationAction(ISD::BITCAST, VT, Custom);
883 
884         setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
885         setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
886         setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
887         setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
888 
889         for (unsigned VPOpc : FloatingPointVPOps)
890           setOperationAction(VPOpc, VT, Custom);
891       }
892 
893       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
894       setOperationAction(ISD::BITCAST, MVT::i8, Custom);
895       setOperationAction(ISD::BITCAST, MVT::i16, Custom);
896       setOperationAction(ISD::BITCAST, MVT::i32, Custom);
897       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
898       setOperationAction(ISD::BITCAST, MVT::f16, Custom);
899       setOperationAction(ISD::BITCAST, MVT::f32, Custom);
900       setOperationAction(ISD::BITCAST, MVT::f64, Custom);
901     }
902   }
903 
904   // Function alignments.
905   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
906   setMinFunctionAlignment(FunctionAlignment);
907   setPrefFunctionAlignment(FunctionAlignment);
908 
909   setMinimumJumpTableEntries(5);
910 
911   // Jumps are expensive, compared to logic
912   setJumpIsExpensive();
913 
914   // We can use any register for comparisons
915   setHasMultipleConditionRegisters();
916 
917   setTargetDAGCombine(ISD::ADD);
918   setTargetDAGCombine(ISD::SUB);
919   setTargetDAGCombine(ISD::AND);
920   setTargetDAGCombine(ISD::OR);
921   setTargetDAGCombine(ISD::XOR);
922   setTargetDAGCombine(ISD::ANY_EXTEND);
923   setTargetDAGCombine(ISD::ZERO_EXTEND);
924   if (Subtarget.hasStdExtV()) {
925     setTargetDAGCombine(ISD::FCOPYSIGN);
926     setTargetDAGCombine(ISD::MGATHER);
927     setTargetDAGCombine(ISD::MSCATTER);
928     setTargetDAGCombine(ISD::VP_GATHER);
929     setTargetDAGCombine(ISD::VP_SCATTER);
930     setTargetDAGCombine(ISD::SRA);
931     setTargetDAGCombine(ISD::SRL);
932     setTargetDAGCombine(ISD::SHL);
933     setTargetDAGCombine(ISD::STORE);
934   }
935 }
936 
937 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
938                                             LLVMContext &Context,
939                                             EVT VT) const {
940   if (!VT.isVector())
941     return getPointerTy(DL);
942   if (Subtarget.hasStdExtV() &&
943       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
944     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
945   return VT.changeVectorElementTypeToInteger();
946 }
947 
948 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
949   return Subtarget.getXLenVT();
950 }
951 
952 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
953                                              const CallInst &I,
954                                              MachineFunction &MF,
955                                              unsigned Intrinsic) const {
956   switch (Intrinsic) {
957   default:
958     return false;
959   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
960   case Intrinsic::riscv_masked_atomicrmw_add_i32:
961   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
962   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
963   case Intrinsic::riscv_masked_atomicrmw_max_i32:
964   case Intrinsic::riscv_masked_atomicrmw_min_i32:
965   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
966   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
967   case Intrinsic::riscv_masked_cmpxchg_i32: {
968     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
969     Info.opc = ISD::INTRINSIC_W_CHAIN;
970     Info.memVT = MVT::getVT(PtrTy->getElementType());
971     Info.ptrVal = I.getArgOperand(0);
972     Info.offset = 0;
973     Info.align = Align(4);
974     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
975                  MachineMemOperand::MOVolatile;
976     return true;
977   }
978   case Intrinsic::riscv_masked_strided_load:
979     Info.opc = ISD::INTRINSIC_W_CHAIN;
980     Info.ptrVal = I.getArgOperand(1);
981     Info.memVT = MVT::getVT(I.getType()->getScalarType());
982     Info.align = Align(I.getType()->getScalarSizeInBits() / 8);
983     Info.size = MemoryLocation::UnknownSize;
984     Info.flags |= MachineMemOperand::MOLoad;
985     return true;
986   case Intrinsic::riscv_masked_strided_store:
987     Info.opc = ISD::INTRINSIC_VOID;
988     Info.ptrVal = I.getArgOperand(1);
989     Info.memVT = MVT::getVT(I.getArgOperand(0)->getType()->getScalarType());
990     Info.align =
991         Align(I.getArgOperand(0)->getType()->getScalarSizeInBits() / 8);
992     Info.size = MemoryLocation::UnknownSize;
993     Info.flags |= MachineMemOperand::MOStore;
994     return true;
995   }
996 }
997 
998 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
999                                                 const AddrMode &AM, Type *Ty,
1000                                                 unsigned AS,
1001                                                 Instruction *I) const {
1002   // No global is ever allowed as a base.
1003   if (AM.BaseGV)
1004     return false;
1005 
1006   // Require a 12-bit signed offset.
1007   if (!isInt<12>(AM.BaseOffs))
1008     return false;
1009 
1010   switch (AM.Scale) {
1011   case 0: // "r+i" or just "i", depending on HasBaseReg.
1012     break;
1013   case 1:
1014     if (!AM.HasBaseReg) // allow "r+i".
1015       break;
1016     return false; // disallow "r+r" or "r+r+i".
1017   default:
1018     return false;
1019   }
1020 
1021   return true;
1022 }
1023 
1024 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1025   return isInt<12>(Imm);
1026 }
1027 
1028 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1029   return isInt<12>(Imm);
1030 }
1031 
1032 // On RV32, 64-bit integers are split into their high and low parts and held
1033 // in two different registers, so the trunc is free since the low register can
1034 // just be used.
1035 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1036   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1037     return false;
1038   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1039   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1040   return (SrcBits == 64 && DestBits == 32);
1041 }
1042 
1043 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1044   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1045       !SrcVT.isInteger() || !DstVT.isInteger())
1046     return false;
1047   unsigned SrcBits = SrcVT.getSizeInBits();
1048   unsigned DestBits = DstVT.getSizeInBits();
1049   return (SrcBits == 64 && DestBits == 32);
1050 }
1051 
1052 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1053   // Zexts are free if they can be combined with a load.
1054   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1055     EVT MemVT = LD->getMemoryVT();
1056     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
1057          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
1058         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1059          LD->getExtensionType() == ISD::ZEXTLOAD))
1060       return true;
1061   }
1062 
1063   return TargetLowering::isZExtFree(Val, VT2);
1064 }
1065 
1066 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1067   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1068 }
1069 
1070 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
1071   return Subtarget.hasStdExtZbb();
1072 }
1073 
1074 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
1075   return Subtarget.hasStdExtZbb();
1076 }
1077 
1078 /// Check if sinking \p I's operands to I's basic block is profitable, because
1079 /// the operands can be folded into a target instruction, e.g.
1080 /// splats of scalars can fold into vector instructions.
1081 bool RISCVTargetLowering::shouldSinkOperands(
1082     Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1083   using namespace llvm::PatternMatch;
1084 
1085   if (!I->getType()->isVectorTy() || !Subtarget.hasStdExtV())
1086     return false;
1087 
1088   auto IsSinker = [&](Instruction *I, int Operand) {
1089     switch (I->getOpcode()) {
1090     case Instruction::Add:
1091     case Instruction::Sub:
1092     case Instruction::Mul:
1093     case Instruction::And:
1094     case Instruction::Or:
1095     case Instruction::Xor:
1096     case Instruction::FAdd:
1097     case Instruction::FSub:
1098     case Instruction::FMul:
1099     case Instruction::FDiv:
1100       return true;
1101     case Instruction::Shl:
1102     case Instruction::LShr:
1103     case Instruction::AShr:
1104       return Operand == 1;
1105     case Instruction::Call:
1106       if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1107         switch (II->getIntrinsicID()) {
1108         case Intrinsic::fma:
1109           return Operand == 0 || Operand == 1;
1110         default:
1111           return false;
1112         }
1113       }
1114       return false;
1115     default:
1116       return false;
1117     }
1118   };
1119 
1120   for (auto OpIdx : enumerate(I->operands())) {
1121     if (!IsSinker(I, OpIdx.index()))
1122       continue;
1123 
1124     Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1125     // Make sure we are not already sinking this operand
1126     if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1127       continue;
1128 
1129     // We are looking for a splat that can be sunk.
1130     if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1131                              m_Undef(), m_ZeroMask())))
1132       continue;
1133 
1134     // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1135     // and vector registers
1136     for (Use &U : Op->uses()) {
1137       Instruction *Insn = cast<Instruction>(U.getUser());
1138       if (!IsSinker(Insn, U.getOperandNo()))
1139         return false;
1140     }
1141 
1142     Ops.push_back(&Op->getOperandUse(0));
1143     Ops.push_back(&OpIdx.value());
1144   }
1145   return true;
1146 }
1147 
1148 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1149                                        bool ForCodeSize) const {
1150   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1151     return false;
1152   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1153     return false;
1154   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1155     return false;
1156   if (Imm.isNegZero())
1157     return false;
1158   return Imm.isZero();
1159 }
1160 
1161 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
1162   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1163          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1164          (VT == MVT::f64 && Subtarget.hasStdExtD());
1165 }
1166 
1167 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1168                                                       CallingConv::ID CC,
1169                                                       EVT VT) const {
1170   // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
1171   // end up using a GPR but that will be decided based on ABI.
1172   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1173     return MVT::f32;
1174 
1175   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1176 }
1177 
1178 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1179                                                            CallingConv::ID CC,
1180                                                            EVT VT) const {
1181   // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
1182   // end up using a GPR but that will be decided based on ABI.
1183   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1184     return 1;
1185 
1186   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1187 }
1188 
1189 // Changes the condition code and swaps operands if necessary, so the SetCC
1190 // operation matches one of the comparisons supported directly by branches
1191 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1192 // with 1/-1.
1193 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1194                                     ISD::CondCode &CC, SelectionDAG &DAG) {
1195   // Convert X > -1 to X >= 0.
1196   if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1197     RHS = DAG.getConstant(0, DL, RHS.getValueType());
1198     CC = ISD::SETGE;
1199     return;
1200   }
1201   // Convert X < 1 to 0 >= X.
1202   if (CC == ISD::SETLT && isOneConstant(RHS)) {
1203     RHS = LHS;
1204     LHS = DAG.getConstant(0, DL, RHS.getValueType());
1205     CC = ISD::SETGE;
1206     return;
1207   }
1208 
1209   switch (CC) {
1210   default:
1211     break;
1212   case ISD::SETGT:
1213   case ISD::SETLE:
1214   case ISD::SETUGT:
1215   case ISD::SETULE:
1216     CC = ISD::getSetCCSwappedOperands(CC);
1217     std::swap(LHS, RHS);
1218     break;
1219   }
1220 }
1221 
1222 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
1223   assert(VT.isScalableVector() && "Expecting a scalable vector type");
1224   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1225   if (VT.getVectorElementType() == MVT::i1)
1226     KnownSize *= 8;
1227 
1228   switch (KnownSize) {
1229   default:
1230     llvm_unreachable("Invalid LMUL.");
1231   case 8:
1232     return RISCVII::VLMUL::LMUL_F8;
1233   case 16:
1234     return RISCVII::VLMUL::LMUL_F4;
1235   case 32:
1236     return RISCVII::VLMUL::LMUL_F2;
1237   case 64:
1238     return RISCVII::VLMUL::LMUL_1;
1239   case 128:
1240     return RISCVII::VLMUL::LMUL_2;
1241   case 256:
1242     return RISCVII::VLMUL::LMUL_4;
1243   case 512:
1244     return RISCVII::VLMUL::LMUL_8;
1245   }
1246 }
1247 
1248 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
1249   switch (LMul) {
1250   default:
1251     llvm_unreachable("Invalid LMUL.");
1252   case RISCVII::VLMUL::LMUL_F8:
1253   case RISCVII::VLMUL::LMUL_F4:
1254   case RISCVII::VLMUL::LMUL_F2:
1255   case RISCVII::VLMUL::LMUL_1:
1256     return RISCV::VRRegClassID;
1257   case RISCVII::VLMUL::LMUL_2:
1258     return RISCV::VRM2RegClassID;
1259   case RISCVII::VLMUL::LMUL_4:
1260     return RISCV::VRM4RegClassID;
1261   case RISCVII::VLMUL::LMUL_8:
1262     return RISCV::VRM8RegClassID;
1263   }
1264 }
1265 
1266 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
1267   RISCVII::VLMUL LMUL = getLMUL(VT);
1268   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1269       LMUL == RISCVII::VLMUL::LMUL_F4 ||
1270       LMUL == RISCVII::VLMUL::LMUL_F2 ||
1271       LMUL == RISCVII::VLMUL::LMUL_1) {
1272     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1273                   "Unexpected subreg numbering");
1274     return RISCV::sub_vrm1_0 + Index;
1275   }
1276   if (LMUL == RISCVII::VLMUL::LMUL_2) {
1277     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1278                   "Unexpected subreg numbering");
1279     return RISCV::sub_vrm2_0 + Index;
1280   }
1281   if (LMUL == RISCVII::VLMUL::LMUL_4) {
1282     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1283                   "Unexpected subreg numbering");
1284     return RISCV::sub_vrm4_0 + Index;
1285   }
1286   llvm_unreachable("Invalid vector type.");
1287 }
1288 
1289 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
1290   if (VT.getVectorElementType() == MVT::i1)
1291     return RISCV::VRRegClassID;
1292   return getRegClassIDForLMUL(getLMUL(VT));
1293 }
1294 
1295 // Attempt to decompose a subvector insert/extract between VecVT and
1296 // SubVecVT via subregister indices. Returns the subregister index that
1297 // can perform the subvector insert/extract with the given element index, as
1298 // well as the index corresponding to any leftover subvectors that must be
1299 // further inserted/extracted within the register class for SubVecVT.
1300 std::pair<unsigned, unsigned>
1301 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1302     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1303     const RISCVRegisterInfo *TRI) {
1304   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1305                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1306                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1307                 "Register classes not ordered");
1308   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1309   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1310   // Try to compose a subregister index that takes us from the incoming
1311   // LMUL>1 register class down to the outgoing one. At each step we half
1312   // the LMUL:
1313   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1314   // Note that this is not guaranteed to find a subregister index, such as
1315   // when we are extracting from one VR type to another.
1316   unsigned SubRegIdx = RISCV::NoSubRegister;
1317   for (const unsigned RCID :
1318        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1319     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1320       VecVT = VecVT.getHalfNumVectorElementsVT();
1321       bool IsHi =
1322           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1323       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1324                                             getSubregIndexByMVT(VecVT, IsHi));
1325       if (IsHi)
1326         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1327     }
1328   return {SubRegIdx, InsertExtractIdx};
1329 }
1330 
1331 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1332 // stores for those types.
1333 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1334   return !Subtarget.useRVVForFixedLengthVectors() ||
1335          (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
1336 }
1337 
1338 bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
1339   if (ScalarTy->isPointerTy())
1340     return true;
1341 
1342   if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1343       ScalarTy->isIntegerTy(32) || ScalarTy->isIntegerTy(64))
1344     return true;
1345 
1346   if (ScalarTy->isHalfTy())
1347     return Subtarget.hasStdExtZfh();
1348   if (ScalarTy->isFloatTy())
1349     return Subtarget.hasStdExtF();
1350   if (ScalarTy->isDoubleTy())
1351     return Subtarget.hasStdExtD();
1352 
1353   return false;
1354 }
1355 
1356 static bool useRVVForFixedLengthVectorVT(MVT VT,
1357                                          const RISCVSubtarget &Subtarget) {
1358   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1359   if (!Subtarget.useRVVForFixedLengthVectors())
1360     return false;
1361 
1362   // We only support a set of vector types with a consistent maximum fixed size
1363   // across all supported vector element types to avoid legalization issues.
1364   // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1365   // fixed-length vector type we support is 1024 bytes.
1366   if (VT.getFixedSizeInBits() > 1024 * 8)
1367     return false;
1368 
1369   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1370 
1371   MVT EltVT = VT.getVectorElementType();
1372 
1373   // Don't use RVV for vectors we cannot scalarize if required.
1374   switch (EltVT.SimpleTy) {
1375   // i1 is supported but has different rules.
1376   default:
1377     return false;
1378   case MVT::i1:
1379     // Masks can only use a single register.
1380     if (VT.getVectorNumElements() > MinVLen)
1381       return false;
1382     MinVLen /= 8;
1383     break;
1384   case MVT::i8:
1385   case MVT::i16:
1386   case MVT::i32:
1387   case MVT::i64:
1388     break;
1389   case MVT::f16:
1390     if (!Subtarget.hasStdExtZfh())
1391       return false;
1392     break;
1393   case MVT::f32:
1394     if (!Subtarget.hasStdExtF())
1395       return false;
1396     break;
1397   case MVT::f64:
1398     if (!Subtarget.hasStdExtD())
1399       return false;
1400     break;
1401   }
1402 
1403   // Reject elements larger than ELEN.
1404   if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
1405     return false;
1406 
1407   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1408   // Don't use RVV for types that don't fit.
1409   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1410     return false;
1411 
1412   // TODO: Perhaps an artificial restriction, but worth having whilst getting
1413   // the base fixed length RVV support in place.
1414   if (!VT.isPow2VectorType())
1415     return false;
1416 
1417   return true;
1418 }
1419 
1420 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1421   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1422 }
1423 
1424 // Return the largest legal scalable vector type that matches VT's element type.
1425 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
1426                                             const RISCVSubtarget &Subtarget) {
1427   // This may be called before legal types are setup.
1428   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1429           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1430          "Expected legal fixed length vector!");
1431 
1432   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1433   unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
1434 
1435   MVT EltVT = VT.getVectorElementType();
1436   switch (EltVT.SimpleTy) {
1437   default:
1438     llvm_unreachable("unexpected element type for RVV container");
1439   case MVT::i1:
1440   case MVT::i8:
1441   case MVT::i16:
1442   case MVT::i32:
1443   case MVT::i64:
1444   case MVT::f16:
1445   case MVT::f32:
1446   case MVT::f64: {
1447     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1448     // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1449     // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1450     unsigned NumElts =
1451         (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1452     NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1453     assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1454     return MVT::getScalableVectorVT(EltVT, NumElts);
1455   }
1456   }
1457 }
1458 
1459 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
1460                                             const RISCVSubtarget &Subtarget) {
1461   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1462                                           Subtarget);
1463 }
1464 
1465 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1466   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
1467 }
1468 
1469 // Grow V to consume an entire RVV register.
1470 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1471                                        const RISCVSubtarget &Subtarget) {
1472   assert(VT.isScalableVector() &&
1473          "Expected to convert into a scalable vector!");
1474   assert(V.getValueType().isFixedLengthVector() &&
1475          "Expected a fixed length vector operand!");
1476   SDLoc DL(V);
1477   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1478   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1479 }
1480 
1481 // Shrink V so it's just big enough to maintain a VT's worth of data.
1482 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1483                                          const RISCVSubtarget &Subtarget) {
1484   assert(VT.isFixedLengthVector() &&
1485          "Expected to convert into a fixed length vector!");
1486   assert(V.getValueType().isScalableVector() &&
1487          "Expected a scalable vector operand!");
1488   SDLoc DL(V);
1489   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1490   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1491 }
1492 
1493 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1494 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1495 // the vector type that it is contained in.
1496 static std::pair<SDValue, SDValue>
1497 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1498                 const RISCVSubtarget &Subtarget) {
1499   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1500   MVT XLenVT = Subtarget.getXLenVT();
1501   SDValue VL = VecVT.isFixedLengthVector()
1502                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1503                    : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1504   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1505   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1506   return {Mask, VL};
1507 }
1508 
1509 // As above but assuming the given type is a scalable vector type.
1510 static std::pair<SDValue, SDValue>
1511 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1512                         const RISCVSubtarget &Subtarget) {
1513   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1514   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1515 }
1516 
1517 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1518 // of either is (currently) supported. This can get us into an infinite loop
1519 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1520 // as a ..., etc.
1521 // Until either (or both) of these can reliably lower any node, reporting that
1522 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1523 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1524 // which is not desirable.
1525 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1526     EVT VT, unsigned DefinedValues) const {
1527   return false;
1528 }
1529 
1530 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1531   // Only splats are currently supported.
1532   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1533     return true;
1534 
1535   return false;
1536 }
1537 
1538 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) {
1539   // RISCV FP-to-int conversions saturate to the destination register size, but
1540   // don't produce 0 for nan. We can use a conversion instruction and fix the
1541   // nan case with a compare and a select.
1542   SDValue Src = Op.getOperand(0);
1543 
1544   EVT DstVT = Op.getValueType();
1545   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1546 
1547   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1548   unsigned Opc;
1549   if (SatVT == DstVT)
1550     Opc = IsSigned ? RISCVISD::FCVT_X_RTZ : RISCVISD::FCVT_XU_RTZ;
1551   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1552     Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64;
1553   else
1554     return SDValue();
1555   // FIXME: Support other SatVTs by clamping before or after the conversion.
1556 
1557   SDLoc DL(Op);
1558   SDValue FpToInt = DAG.getNode(Opc, DL, DstVT, Src);
1559 
1560   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1561   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1562 }
1563 
1564 static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
1565                                  const RISCVSubtarget &Subtarget) {
1566   MVT VT = Op.getSimpleValueType();
1567   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1568 
1569   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1570 
1571   SDLoc DL(Op);
1572   SDValue Mask, VL;
1573   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1574 
1575   unsigned Opc =
1576       VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
1577   SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
1578   return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1579 }
1580 
1581 struct VIDSequence {
1582   int64_t StepNumerator;
1583   unsigned StepDenominator;
1584   int64_t Addend;
1585 };
1586 
1587 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1588 // to the (non-zero) step S and start value X. This can be then lowered as the
1589 // RVV sequence (VID * S) + X, for example.
1590 // The step S is represented as an integer numerator divided by a positive
1591 // denominator. Note that the implementation currently only identifies
1592 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1593 // cannot detect 2/3, for example.
1594 // Note that this method will also match potentially unappealing index
1595 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1596 // determine whether this is worth generating code for.
1597 static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
1598   unsigned NumElts = Op.getNumOperands();
1599   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1600   if (!Op.getValueType().isInteger())
1601     return None;
1602 
1603   Optional<unsigned> SeqStepDenom;
1604   Optional<int64_t> SeqStepNum, SeqAddend;
1605   Optional<std::pair<uint64_t, unsigned>> PrevElt;
1606   unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1607   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1608     // Assume undef elements match the sequence; we just have to be careful
1609     // when interpolating across them.
1610     if (Op.getOperand(Idx).isUndef())
1611       continue;
1612     // The BUILD_VECTOR must be all constants.
1613     if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1614       return None;
1615 
1616     uint64_t Val = Op.getConstantOperandVal(Idx) &
1617                    maskTrailingOnes<uint64_t>(EltSizeInBits);
1618 
1619     if (PrevElt) {
1620       // Calculate the step since the last non-undef element, and ensure
1621       // it's consistent across the entire sequence.
1622       unsigned IdxDiff = Idx - PrevElt->second;
1623       int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1624 
1625       // A zero-value value difference means that we're somewhere in the middle
1626       // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1627       // step change before evaluating the sequence.
1628       if (ValDiff != 0) {
1629         int64_t Remainder = ValDiff % IdxDiff;
1630         // Normalize the step if it's greater than 1.
1631         if (Remainder != ValDiff) {
1632           // The difference must cleanly divide the element span.
1633           if (Remainder != 0)
1634             return None;
1635           ValDiff /= IdxDiff;
1636           IdxDiff = 1;
1637         }
1638 
1639         if (!SeqStepNum)
1640           SeqStepNum = ValDiff;
1641         else if (ValDiff != SeqStepNum)
1642           return None;
1643 
1644         if (!SeqStepDenom)
1645           SeqStepDenom = IdxDiff;
1646         else if (IdxDiff != *SeqStepDenom)
1647           return None;
1648       }
1649     }
1650 
1651     // Record and/or check any addend.
1652     if (SeqStepNum && SeqStepDenom) {
1653       uint64_t ExpectedVal =
1654           (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1655       int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1656       if (!SeqAddend)
1657         SeqAddend = Addend;
1658       else if (SeqAddend != Addend)
1659         return None;
1660     }
1661 
1662     // Record this non-undef element for later.
1663     if (!PrevElt || PrevElt->first != Val)
1664       PrevElt = std::make_pair(Val, Idx);
1665   }
1666   // We need to have logged both a step and an addend for this to count as
1667   // a legal index sequence.
1668   if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
1669     return None;
1670 
1671   return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
1672 }
1673 
1674 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
1675                                  const RISCVSubtarget &Subtarget) {
1676   MVT VT = Op.getSimpleValueType();
1677   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1678 
1679   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1680 
1681   SDLoc DL(Op);
1682   SDValue Mask, VL;
1683   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1684 
1685   MVT XLenVT = Subtarget.getXLenVT();
1686   unsigned NumElts = Op.getNumOperands();
1687 
1688   if (VT.getVectorElementType() == MVT::i1) {
1689     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1690       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1691       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1692     }
1693 
1694     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1695       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1696       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1697     }
1698 
1699     // Lower constant mask BUILD_VECTORs via an integer vector type, in
1700     // scalar integer chunks whose bit-width depends on the number of mask
1701     // bits and XLEN.
1702     // First, determine the most appropriate scalar integer type to use. This
1703     // is at most XLenVT, but may be shrunk to a smaller vector element type
1704     // according to the size of the final vector - use i8 chunks rather than
1705     // XLenVT if we're producing a v8i1. This results in more consistent
1706     // codegen across RV32 and RV64.
1707     unsigned NumViaIntegerBits =
1708         std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1709     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1710       // If we have to use more than one INSERT_VECTOR_ELT then this
1711       // optimization is likely to increase code size; avoid peforming it in
1712       // such a case. We can use a load from a constant pool in this case.
1713       if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1714         return SDValue();
1715       // Now we can create our integer vector type. Note that it may be larger
1716       // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1717       MVT IntegerViaVecVT =
1718           MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1719                            divideCeil(NumElts, NumViaIntegerBits));
1720 
1721       uint64_t Bits = 0;
1722       unsigned BitPos = 0, IntegerEltIdx = 0;
1723       SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1724 
1725       for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1726         // Once we accumulate enough bits to fill our scalar type, insert into
1727         // our vector and clear our accumulated data.
1728         if (I != 0 && I % NumViaIntegerBits == 0) {
1729           if (NumViaIntegerBits <= 32)
1730             Bits = SignExtend64(Bits, 32);
1731           SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1732           Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1733                             Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1734           Bits = 0;
1735           BitPos = 0;
1736           IntegerEltIdx++;
1737         }
1738         SDValue V = Op.getOperand(I);
1739         bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
1740         Bits |= ((uint64_t)BitValue << BitPos);
1741       }
1742 
1743       // Insert the (remaining) scalar value into position in our integer
1744       // vector type.
1745       if (NumViaIntegerBits <= 32)
1746         Bits = SignExtend64(Bits, 32);
1747       SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1748       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
1749                         DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1750 
1751       if (NumElts < NumViaIntegerBits) {
1752         // If we're producing a smaller vector than our minimum legal integer
1753         // type, bitcast to the equivalent (known-legal) mask type, and extract
1754         // our final mask.
1755         assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
1756         Vec = DAG.getBitcast(MVT::v8i1, Vec);
1757         Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
1758                           DAG.getConstant(0, DL, XLenVT));
1759       } else {
1760         // Else we must have produced an integer type with the same size as the
1761         // mask type; bitcast for the final result.
1762         assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
1763         Vec = DAG.getBitcast(VT, Vec);
1764       }
1765 
1766       return Vec;
1767     }
1768 
1769     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
1770     // vector type, we have a legal equivalently-sized i8 type, so we can use
1771     // that.
1772     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
1773     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
1774 
1775     SDValue WideVec;
1776     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1777       // For a splat, perform a scalar truncate before creating the wider
1778       // vector.
1779       assert(Splat.getValueType() == XLenVT &&
1780              "Unexpected type for i1 splat value");
1781       Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
1782                           DAG.getConstant(1, DL, XLenVT));
1783       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
1784     } else {
1785       SmallVector<SDValue, 8> Ops(Op->op_values());
1786       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
1787       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
1788       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
1789     }
1790 
1791     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
1792   }
1793 
1794   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1795     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1796                                         : RISCVISD::VMV_V_X_VL;
1797     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1798     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1799   }
1800 
1801   // Try and match index sequences, which we can lower to the vid instruction
1802   // with optional modifications. An all-undef vector is matched by
1803   // getSplatValue, above.
1804   if (auto SimpleVID = isSimpleVIDSequence(Op)) {
1805     int64_t StepNumerator = SimpleVID->StepNumerator;
1806     unsigned StepDenominator = SimpleVID->StepDenominator;
1807     int64_t Addend = SimpleVID->Addend;
1808     // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
1809     // threshold since it's the immediate value many RVV instructions accept.
1810     if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) &&
1811         isInt<5>(Addend)) {
1812       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1813       // Convert right out of the scalable type so we can use standard ISD
1814       // nodes for the rest of the computation. If we used scalable types with
1815       // these, we'd lose the fixed-length vector info and generate worse
1816       // vsetvli code.
1817       VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
1818       assert(StepNumerator != 0 && "Invalid step");
1819       bool Negate = false;
1820       if (StepNumerator != 1) {
1821         int64_t SplatStepVal = StepNumerator;
1822         unsigned Opcode = ISD::MUL;
1823         if (isPowerOf2_64(std::abs(StepNumerator))) {
1824           Negate = StepNumerator < 0;
1825           Opcode = ISD::SHL;
1826           SplatStepVal = Log2_64(std::abs(StepNumerator));
1827         }
1828         SDValue SplatStep = DAG.getSplatVector(
1829             VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
1830         VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep);
1831       }
1832       if (StepDenominator != 1) {
1833         SDValue SplatStep = DAG.getSplatVector(
1834             VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
1835         VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
1836       }
1837       if (Addend != 0 || Negate) {
1838         SDValue SplatAddend =
1839             DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
1840         VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
1841       }
1842       return VID;
1843     }
1844   }
1845 
1846   // Attempt to detect "hidden" splats, which only reveal themselves as splats
1847   // when re-interpreted as a vector with a larger element type. For example,
1848   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
1849   // could be instead splat as
1850   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
1851   // TODO: This optimization could also work on non-constant splats, but it
1852   // would require bit-manipulation instructions to construct the splat value.
1853   SmallVector<SDValue> Sequence;
1854   unsigned EltBitSize = VT.getScalarSizeInBits();
1855   const auto *BV = cast<BuildVectorSDNode>(Op);
1856   if (VT.isInteger() && EltBitSize < 64 &&
1857       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
1858       BV->getRepeatedSequence(Sequence) &&
1859       (Sequence.size() * EltBitSize) <= 64) {
1860     unsigned SeqLen = Sequence.size();
1861     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
1862     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
1863     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
1864             ViaIntVT == MVT::i64) &&
1865            "Unexpected sequence type");
1866 
1867     unsigned EltIdx = 0;
1868     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
1869     uint64_t SplatValue = 0;
1870     // Construct the amalgamated value which can be splatted as this larger
1871     // vector type.
1872     for (const auto &SeqV : Sequence) {
1873       if (!SeqV.isUndef())
1874         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
1875                        << (EltIdx * EltBitSize));
1876       EltIdx++;
1877     }
1878 
1879     // On RV64, sign-extend from 32 to 64 bits where possible in order to
1880     // achieve better constant materializion.
1881     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
1882       SplatValue = SignExtend64(SplatValue, 32);
1883 
1884     // Since we can't introduce illegal i64 types at this stage, we can only
1885     // perform an i64 splat on RV32 if it is its own sign-extended value. That
1886     // way we can use RVV instructions to splat.
1887     assert((ViaIntVT.bitsLE(XLenVT) ||
1888             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
1889            "Unexpected bitcast sequence");
1890     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
1891       SDValue ViaVL =
1892           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
1893       MVT ViaContainerVT =
1894           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
1895       SDValue Splat =
1896           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
1897                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
1898       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
1899       return DAG.getBitcast(VT, Splat);
1900     }
1901   }
1902 
1903   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
1904   // which constitute a large proportion of the elements. In such cases we can
1905   // splat a vector with the dominant element and make up the shortfall with
1906   // INSERT_VECTOR_ELTs.
1907   // Note that this includes vectors of 2 elements by association. The
1908   // upper-most element is the "dominant" one, allowing us to use a splat to
1909   // "insert" the upper element, and an insert of the lower element at position
1910   // 0, which improves codegen.
1911   SDValue DominantValue;
1912   unsigned MostCommonCount = 0;
1913   DenseMap<SDValue, unsigned> ValueCounts;
1914   unsigned NumUndefElts =
1915       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
1916 
1917   // Track the number of scalar loads we know we'd be inserting, estimated as
1918   // any non-zero floating-point constant. Other kinds of element are either
1919   // already in registers or are materialized on demand. The threshold at which
1920   // a vector load is more desirable than several scalar materializion and
1921   // vector-insertion instructions is not known.
1922   unsigned NumScalarLoads = 0;
1923 
1924   for (SDValue V : Op->op_values()) {
1925     if (V.isUndef())
1926       continue;
1927 
1928     ValueCounts.insert(std::make_pair(V, 0));
1929     unsigned &Count = ValueCounts[V];
1930 
1931     if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
1932       NumScalarLoads += !CFP->isExactlyValue(+0.0);
1933 
1934     // Is this value dominant? In case of a tie, prefer the highest element as
1935     // it's cheaper to insert near the beginning of a vector than it is at the
1936     // end.
1937     if (++Count >= MostCommonCount) {
1938       DominantValue = V;
1939       MostCommonCount = Count;
1940     }
1941   }
1942 
1943   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
1944   unsigned NumDefElts = NumElts - NumUndefElts;
1945   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
1946 
1947   // Don't perform this optimization when optimizing for size, since
1948   // materializing elements and inserting them tends to cause code bloat.
1949   if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
1950       ((MostCommonCount > DominantValueCountThreshold) ||
1951        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
1952     // Start by splatting the most common element.
1953     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
1954 
1955     DenseSet<SDValue> Processed{DominantValue};
1956     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
1957     for (const auto &OpIdx : enumerate(Op->ops())) {
1958       const SDValue &V = OpIdx.value();
1959       if (V.isUndef() || !Processed.insert(V).second)
1960         continue;
1961       if (ValueCounts[V] == 1) {
1962         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
1963                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
1964       } else {
1965         // Blend in all instances of this value using a VSELECT, using a
1966         // mask where each bit signals whether that element is the one
1967         // we're after.
1968         SmallVector<SDValue> Ops;
1969         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
1970           return DAG.getConstant(V == V1, DL, XLenVT);
1971         });
1972         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
1973                           DAG.getBuildVector(SelMaskTy, DL, Ops),
1974                           DAG.getSplatBuildVector(VT, DL, V), Vec);
1975       }
1976     }
1977 
1978     return Vec;
1979   }
1980 
1981   return SDValue();
1982 }
1983 
1984 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
1985                                    SDValue Hi, SDValue VL, SelectionDAG &DAG) {
1986   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
1987     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
1988     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
1989     // If Hi constant is all the same sign bit as Lo, lower this as a custom
1990     // node in order to try and match RVV vector/scalar instructions.
1991     if ((LoC >> 31) == HiC)
1992       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
1993   }
1994 
1995   // Fall back to a stack store and stride x0 vector load.
1996   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
1997 }
1998 
1999 // Called by type legalization to handle splat of i64 on RV32.
2000 // FIXME: We can optimize this when the type has sign or zero bits in one
2001 // of the halves.
2002 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
2003                                    SDValue VL, SelectionDAG &DAG) {
2004   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2005   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2006                            DAG.getConstant(0, DL, MVT::i32));
2007   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2008                            DAG.getConstant(1, DL, MVT::i32));
2009   return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
2010 }
2011 
2012 // This function lowers a splat of a scalar operand Splat with the vector
2013 // length VL. It ensures the final sequence is type legal, which is useful when
2014 // lowering a splat after type legalization.
2015 static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
2016                                 SelectionDAG &DAG,
2017                                 const RISCVSubtarget &Subtarget) {
2018   if (VT.isFloatingPoint())
2019     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
2020 
2021   MVT XLenVT = Subtarget.getXLenVT();
2022 
2023   // Simplest case is that the operand needs to be promoted to XLenVT.
2024   if (Scalar.getValueType().bitsLE(XLenVT)) {
2025     // If the operand is a constant, sign extend to increase our chances
2026     // of being able to use a .vi instruction. ANY_EXTEND would become a
2027     // a zero extend and the simm5 check in isel would fail.
2028     // FIXME: Should we ignore the upper bits in isel instead?
2029     unsigned ExtOpc =
2030         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2031     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2032     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
2033   }
2034 
2035   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2036          "Unexpected scalar for splat lowering!");
2037 
2038   // Otherwise use the more complicated splatting algorithm.
2039   return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
2040 }
2041 
2042 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
2043                                    const RISCVSubtarget &Subtarget) {
2044   SDValue V1 = Op.getOperand(0);
2045   SDValue V2 = Op.getOperand(1);
2046   SDLoc DL(Op);
2047   MVT XLenVT = Subtarget.getXLenVT();
2048   MVT VT = Op.getSimpleValueType();
2049   unsigned NumElts = VT.getVectorNumElements();
2050   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
2051 
2052   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2053 
2054   SDValue TrueMask, VL;
2055   std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2056 
2057   if (SVN->isSplat()) {
2058     const int Lane = SVN->getSplatIndex();
2059     if (Lane >= 0) {
2060       MVT SVT = VT.getVectorElementType();
2061 
2062       // Turn splatted vector load into a strided load with an X0 stride.
2063       SDValue V = V1;
2064       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
2065       // with undef.
2066       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
2067       int Offset = Lane;
2068       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
2069         int OpElements =
2070             V.getOperand(0).getSimpleValueType().getVectorNumElements();
2071         V = V.getOperand(Offset / OpElements);
2072         Offset %= OpElements;
2073       }
2074 
2075       // We need to ensure the load isn't atomic or volatile.
2076       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
2077         auto *Ld = cast<LoadSDNode>(V);
2078         Offset *= SVT.getStoreSize();
2079         SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2080                                                    TypeSize::Fixed(Offset), DL);
2081 
2082         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2083         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2084           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2085           SDValue IntID =
2086               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2087           SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
2088                            DAG.getRegister(RISCV::X0, XLenVT), VL};
2089           SDValue NewLoad = DAG.getMemIntrinsicNode(
2090               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2091               DAG.getMachineFunction().getMachineMemOperand(
2092                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2093           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2094           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2095         }
2096 
2097         // Otherwise use a scalar load and splat. This will give the best
2098         // opportunity to fold a splat into the operation. ISel can turn it into
2099         // the x0 strided load if we aren't able to fold away the select.
2100         if (SVT.isFloatingPoint())
2101           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2102                           Ld->getPointerInfo().getWithOffset(Offset),
2103                           Ld->getOriginalAlign(),
2104                           Ld->getMemOperand()->getFlags());
2105         else
2106           V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2107                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
2108                              Ld->getOriginalAlign(),
2109                              Ld->getMemOperand()->getFlags());
2110         DAG.makeEquivalentMemoryOrdering(Ld, V);
2111 
2112         unsigned Opc =
2113             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
2114         SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
2115         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2116       }
2117 
2118       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2119       assert(Lane < (int)NumElts && "Unexpected lane!");
2120       SDValue Gather =
2121           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
2122                       DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
2123       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2124     }
2125   }
2126 
2127   // Detect shuffles which can be re-expressed as vector selects; these are
2128   // shuffles in which each element in the destination is taken from an element
2129   // at the corresponding index in either source vectors.
2130   bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
2131     int MaskIndex = MaskIdx.value();
2132     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2133   });
2134 
2135   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2136 
2137   SmallVector<SDValue> MaskVals;
2138   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2139   // merged with a second vrgather.
2140   SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2141 
2142   // By default we preserve the original operand order, and use a mask to
2143   // select LHS as true and RHS as false. However, since RVV vector selects may
2144   // feature splats but only on the LHS, we may choose to invert our mask and
2145   // instead select between RHS and LHS.
2146   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2147   bool InvertMask = IsSelect == SwapOps;
2148 
2149   // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2150   // half.
2151   DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2152 
2153   // Now construct the mask that will be used by the vselect or blended
2154   // vrgather operation. For vrgathers, construct the appropriate indices into
2155   // each vector.
2156   for (int MaskIndex : SVN->getMask()) {
2157     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2158     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2159     if (!IsSelect) {
2160       bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2161       GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2162                                      ? DAG.getConstant(MaskIndex, DL, XLenVT)
2163                                      : DAG.getUNDEF(XLenVT));
2164       GatherIndicesRHS.push_back(
2165           IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2166                             : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2167       if (IsLHSOrUndefIndex && MaskIndex >= 0)
2168         ++LHSIndexCounts[MaskIndex];
2169       if (!IsLHSOrUndefIndex)
2170         ++RHSIndexCounts[MaskIndex - NumElts];
2171     }
2172   }
2173 
2174   if (SwapOps) {
2175     std::swap(V1, V2);
2176     std::swap(GatherIndicesLHS, GatherIndicesRHS);
2177   }
2178 
2179   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2180   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2181   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2182 
2183   if (IsSelect)
2184     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2185 
2186   if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2187     // On such a large vector we're unable to use i8 as the index type.
2188     // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2189     // may involve vector splitting if we're already at LMUL=8, or our
2190     // user-supplied maximum fixed-length LMUL.
2191     return SDValue();
2192   }
2193 
2194   unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2195   unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2196   MVT IndexVT = VT.changeTypeToInteger();
2197   // Since we can't introduce illegal index types at this stage, use i16 and
2198   // vrgatherei16 if the corresponding index type for plain vrgather is greater
2199   // than XLenVT.
2200   if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2201     GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2202     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2203   }
2204 
2205   MVT IndexContainerVT =
2206       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2207 
2208   SDValue Gather;
2209   // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2210   // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2211   if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2212     Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
2213   } else {
2214     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2215     // If only one index is used, we can use a "splat" vrgather.
2216     // TODO: We can splat the most-common index and fix-up any stragglers, if
2217     // that's beneficial.
2218     if (LHSIndexCounts.size() == 1) {
2219       int SplatIndex = LHSIndexCounts.begin()->getFirst();
2220       Gather =
2221           DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2222                       DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2223     } else {
2224       SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2225       LHSIndices =
2226           convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2227 
2228       Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2229                            TrueMask, VL);
2230     }
2231   }
2232 
2233   // If a second vector operand is used by this shuffle, blend it in with an
2234   // additional vrgather.
2235   if (!V2.isUndef()) {
2236     V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2237     // If only one index is used, we can use a "splat" vrgather.
2238     // TODO: We can splat the most-common index and fix-up any stragglers, if
2239     // that's beneficial.
2240     if (RHSIndexCounts.size() == 1) {
2241       int SplatIndex = RHSIndexCounts.begin()->getFirst();
2242       V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2243                        DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2244     } else {
2245       SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2246       RHSIndices =
2247           convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2248       V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
2249                        VL);
2250     }
2251 
2252     MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2253     SelectMask =
2254         convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2255 
2256     Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
2257                          Gather, VL);
2258   }
2259 
2260   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2261 }
2262 
2263 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
2264                                      SDLoc DL, SelectionDAG &DAG,
2265                                      const RISCVSubtarget &Subtarget) {
2266   if (VT.isScalableVector())
2267     return DAG.getFPExtendOrRound(Op, DL, VT);
2268   assert(VT.isFixedLengthVector() &&
2269          "Unexpected value type for RVV FP extend/round lowering");
2270   SDValue Mask, VL;
2271   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2272   unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
2273                         ? RISCVISD::FP_EXTEND_VL
2274                         : RISCVISD::FP_ROUND_VL;
2275   return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
2276 }
2277 
2278 // While RVV has alignment restrictions, we should always be able to load as a
2279 // legal equivalently-sized byte-typed vector instead. This method is
2280 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2281 // the load is already correctly-aligned, it returns SDValue().
2282 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2283                                                     SelectionDAG &DAG) const {
2284   auto *Load = cast<LoadSDNode>(Op);
2285   assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2286 
2287   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
2288                                      Load->getMemoryVT(),
2289                                      *Load->getMemOperand()))
2290     return SDValue();
2291 
2292   SDLoc DL(Op);
2293   MVT VT = Op.getSimpleValueType();
2294   unsigned EltSizeBits = VT.getScalarSizeInBits();
2295   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2296          "Unexpected unaligned RVV load type");
2297   MVT NewVT =
2298       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2299   assert(NewVT.isValid() &&
2300          "Expecting equally-sized RVV vector types to be legal");
2301   SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2302                           Load->getPointerInfo(), Load->getOriginalAlign(),
2303                           Load->getMemOperand()->getFlags());
2304   return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2305 }
2306 
2307 // While RVV has alignment restrictions, we should always be able to store as a
2308 // legal equivalently-sized byte-typed vector instead. This method is
2309 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2310 // returns SDValue() if the store is already correctly aligned.
2311 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2312                                                      SelectionDAG &DAG) const {
2313   auto *Store = cast<StoreSDNode>(Op);
2314   assert(Store && Store->getValue().getValueType().isVector() &&
2315          "Expected vector store");
2316 
2317   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
2318                                      Store->getMemoryVT(),
2319                                      *Store->getMemOperand()))
2320     return SDValue();
2321 
2322   SDLoc DL(Op);
2323   SDValue StoredVal = Store->getValue();
2324   MVT VT = StoredVal.getSimpleValueType();
2325   unsigned EltSizeBits = VT.getScalarSizeInBits();
2326   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2327          "Unexpected unaligned RVV store type");
2328   MVT NewVT =
2329       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2330   assert(NewVT.isValid() &&
2331          "Expecting equally-sized RVV vector types to be legal");
2332   StoredVal = DAG.getBitcast(NewVT, StoredVal);
2333   return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2334                       Store->getPointerInfo(), Store->getOriginalAlign(),
2335                       Store->getMemOperand()->getFlags());
2336 }
2337 
2338 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
2339                                             SelectionDAG &DAG) const {
2340   switch (Op.getOpcode()) {
2341   default:
2342     report_fatal_error("unimplemented operand");
2343   case ISD::GlobalAddress:
2344     return lowerGlobalAddress(Op, DAG);
2345   case ISD::BlockAddress:
2346     return lowerBlockAddress(Op, DAG);
2347   case ISD::ConstantPool:
2348     return lowerConstantPool(Op, DAG);
2349   case ISD::JumpTable:
2350     return lowerJumpTable(Op, DAG);
2351   case ISD::GlobalTLSAddress:
2352     return lowerGlobalTLSAddress(Op, DAG);
2353   case ISD::SELECT:
2354     return lowerSELECT(Op, DAG);
2355   case ISD::BRCOND:
2356     return lowerBRCOND(Op, DAG);
2357   case ISD::VASTART:
2358     return lowerVASTART(Op, DAG);
2359   case ISD::FRAMEADDR:
2360     return lowerFRAMEADDR(Op, DAG);
2361   case ISD::RETURNADDR:
2362     return lowerRETURNADDR(Op, DAG);
2363   case ISD::SHL_PARTS:
2364     return lowerShiftLeftParts(Op, DAG);
2365   case ISD::SRA_PARTS:
2366     return lowerShiftRightParts(Op, DAG, true);
2367   case ISD::SRL_PARTS:
2368     return lowerShiftRightParts(Op, DAG, false);
2369   case ISD::BITCAST: {
2370     SDLoc DL(Op);
2371     EVT VT = Op.getValueType();
2372     SDValue Op0 = Op.getOperand(0);
2373     EVT Op0VT = Op0.getValueType();
2374     MVT XLenVT = Subtarget.getXLenVT();
2375     if (VT.isFixedLengthVector()) {
2376       // We can handle fixed length vector bitcasts with a simple replacement
2377       // in isel.
2378       if (Op0VT.isFixedLengthVector())
2379         return Op;
2380       // When bitcasting from scalar to fixed-length vector, insert the scalar
2381       // into a one-element vector of the result type, and perform a vector
2382       // bitcast.
2383       if (!Op0VT.isVector()) {
2384         auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2385         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2386                                               DAG.getUNDEF(BVT), Op0,
2387                                               DAG.getConstant(0, DL, XLenVT)));
2388       }
2389       return SDValue();
2390     }
2391     // Custom-legalize bitcasts from fixed-length vector types to scalar types
2392     // thus: bitcast the vector to a one-element vector type whose element type
2393     // is the same as the result type, and extract the first element.
2394     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2395       LLVMContext &Context = *DAG.getContext();
2396       SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
2397       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
2398                          DAG.getConstant(0, DL, XLenVT));
2399     }
2400     if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
2401       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
2402       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
2403       return FPConv;
2404     }
2405     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
2406         Subtarget.hasStdExtF()) {
2407       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2408       SDValue FPConv =
2409           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
2410       return FPConv;
2411     }
2412     return SDValue();
2413   }
2414   case ISD::INTRINSIC_WO_CHAIN:
2415     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2416   case ISD::INTRINSIC_W_CHAIN:
2417     return LowerINTRINSIC_W_CHAIN(Op, DAG);
2418   case ISD::INTRINSIC_VOID:
2419     return LowerINTRINSIC_VOID(Op, DAG);
2420   case ISD::BSWAP:
2421   case ISD::BITREVERSE: {
2422     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
2423     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2424     MVT VT = Op.getSimpleValueType();
2425     SDLoc DL(Op);
2426     // Start with the maximum immediate value which is the bitwidth - 1.
2427     unsigned Imm = VT.getSizeInBits() - 1;
2428     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
2429     if (Op.getOpcode() == ISD::BSWAP)
2430       Imm &= ~0x7U;
2431     return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
2432                        DAG.getConstant(Imm, DL, VT));
2433   }
2434   case ISD::FSHL:
2435   case ISD::FSHR: {
2436     MVT VT = Op.getSimpleValueType();
2437     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
2438     SDLoc DL(Op);
2439     if (Op.getOperand(2).getOpcode() == ISD::Constant)
2440       return Op;
2441     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
2442     // use log(XLen) bits. Mask the shift amount accordingly.
2443     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
2444     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
2445                                 DAG.getConstant(ShAmtWidth, DL, VT));
2446     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
2447     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
2448   }
2449   case ISD::TRUNCATE: {
2450     SDLoc DL(Op);
2451     MVT VT = Op.getSimpleValueType();
2452     // Only custom-lower vector truncates
2453     if (!VT.isVector())
2454       return Op;
2455 
2456     // Truncates to mask types are handled differently
2457     if (VT.getVectorElementType() == MVT::i1)
2458       return lowerVectorMaskTrunc(Op, DAG);
2459 
2460     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
2461     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
2462     // truncate by one power of two at a time.
2463     MVT DstEltVT = VT.getVectorElementType();
2464 
2465     SDValue Src = Op.getOperand(0);
2466     MVT SrcVT = Src.getSimpleValueType();
2467     MVT SrcEltVT = SrcVT.getVectorElementType();
2468 
2469     assert(DstEltVT.bitsLT(SrcEltVT) &&
2470            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
2471            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
2472            "Unexpected vector truncate lowering");
2473 
2474     MVT ContainerVT = SrcVT;
2475     if (SrcVT.isFixedLengthVector()) {
2476       ContainerVT = getContainerForFixedLengthVector(SrcVT);
2477       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2478     }
2479 
2480     SDValue Result = Src;
2481     SDValue Mask, VL;
2482     std::tie(Mask, VL) =
2483         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
2484     LLVMContext &Context = *DAG.getContext();
2485     const ElementCount Count = ContainerVT.getVectorElementCount();
2486     do {
2487       SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2488       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
2489       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
2490                            Mask, VL);
2491     } while (SrcEltVT != DstEltVT);
2492 
2493     if (SrcVT.isFixedLengthVector())
2494       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
2495 
2496     return Result;
2497   }
2498   case ISD::ANY_EXTEND:
2499   case ISD::ZERO_EXTEND:
2500     if (Op.getOperand(0).getValueType().isVector() &&
2501         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2502       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
2503     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
2504   case ISD::SIGN_EXTEND:
2505     if (Op.getOperand(0).getValueType().isVector() &&
2506         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2507       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
2508     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
2509   case ISD::SPLAT_VECTOR_PARTS:
2510     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
2511   case ISD::INSERT_VECTOR_ELT:
2512     return lowerINSERT_VECTOR_ELT(Op, DAG);
2513   case ISD::EXTRACT_VECTOR_ELT:
2514     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
2515   case ISD::VSCALE: {
2516     MVT VT = Op.getSimpleValueType();
2517     SDLoc DL(Op);
2518     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
2519     // We define our scalable vector types for lmul=1 to use a 64 bit known
2520     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
2521     // vscale as VLENB / 8.
2522     assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!");
2523     if (isa<ConstantSDNode>(Op.getOperand(0))) {
2524       // We assume VLENB is a multiple of 8. We manually choose the best shift
2525       // here because SimplifyDemandedBits isn't always able to simplify it.
2526       uint64_t Val = Op.getConstantOperandVal(0);
2527       if (isPowerOf2_64(Val)) {
2528         uint64_t Log2 = Log2_64(Val);
2529         if (Log2 < 3)
2530           return DAG.getNode(ISD::SRL, DL, VT, VLENB,
2531                              DAG.getConstant(3 - Log2, DL, VT));
2532         if (Log2 > 3)
2533           return DAG.getNode(ISD::SHL, DL, VT, VLENB,
2534                              DAG.getConstant(Log2 - 3, DL, VT));
2535         return VLENB;
2536       }
2537       // If the multiplier is a multiple of 8, scale it down to avoid needing
2538       // to shift the VLENB value.
2539       if ((Val % 8) == 0)
2540         return DAG.getNode(ISD::MUL, DL, VT, VLENB,
2541                            DAG.getConstant(Val / 8, DL, VT));
2542     }
2543 
2544     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
2545                                  DAG.getConstant(3, DL, VT));
2546     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
2547   }
2548   case ISD::FP_EXTEND: {
2549     // RVV can only do fp_extend to types double the size as the source. We
2550     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
2551     // via f32.
2552     SDLoc DL(Op);
2553     MVT VT = Op.getSimpleValueType();
2554     SDValue Src = Op.getOperand(0);
2555     MVT SrcVT = Src.getSimpleValueType();
2556 
2557     // Prepare any fixed-length vector operands.
2558     MVT ContainerVT = VT;
2559     if (SrcVT.isFixedLengthVector()) {
2560       ContainerVT = getContainerForFixedLengthVector(VT);
2561       MVT SrcContainerVT =
2562           ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
2563       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2564     }
2565 
2566     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
2567         SrcVT.getVectorElementType() != MVT::f16) {
2568       // For scalable vectors, we only need to close the gap between
2569       // vXf16->vXf64.
2570       if (!VT.isFixedLengthVector())
2571         return Op;
2572       // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
2573       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2574       return convertFromScalableVector(VT, Src, DAG, Subtarget);
2575     }
2576 
2577     MVT InterVT = VT.changeVectorElementType(MVT::f32);
2578     MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
2579     SDValue IntermediateExtend = getRVVFPExtendOrRound(
2580         Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
2581 
2582     SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
2583                                            DL, DAG, Subtarget);
2584     if (VT.isFixedLengthVector())
2585       return convertFromScalableVector(VT, Extend, DAG, Subtarget);
2586     return Extend;
2587   }
2588   case ISD::FP_ROUND: {
2589     // RVV can only do fp_round to types half the size as the source. We
2590     // custom-lower f64->f16 rounds via RVV's round-to-odd float
2591     // conversion instruction.
2592     SDLoc DL(Op);
2593     MVT VT = Op.getSimpleValueType();
2594     SDValue Src = Op.getOperand(0);
2595     MVT SrcVT = Src.getSimpleValueType();
2596 
2597     // Prepare any fixed-length vector operands.
2598     MVT ContainerVT = VT;
2599     if (VT.isFixedLengthVector()) {
2600       MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2601       ContainerVT =
2602           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2603       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2604     }
2605 
2606     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
2607         SrcVT.getVectorElementType() != MVT::f64) {
2608       // For scalable vectors, we only need to close the gap between
2609       // vXf64<->vXf16.
2610       if (!VT.isFixedLengthVector())
2611         return Op;
2612       // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
2613       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2614       return convertFromScalableVector(VT, Src, DAG, Subtarget);
2615     }
2616 
2617     SDValue Mask, VL;
2618     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2619 
2620     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
2621     SDValue IntermediateRound =
2622         DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
2623     SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
2624                                           DL, DAG, Subtarget);
2625 
2626     if (VT.isFixedLengthVector())
2627       return convertFromScalableVector(VT, Round, DAG, Subtarget);
2628     return Round;
2629   }
2630   case ISD::FP_TO_SINT:
2631   case ISD::FP_TO_UINT:
2632   case ISD::SINT_TO_FP:
2633   case ISD::UINT_TO_FP: {
2634     // RVV can only do fp<->int conversions to types half/double the size as
2635     // the source. We custom-lower any conversions that do two hops into
2636     // sequences.
2637     MVT VT = Op.getSimpleValueType();
2638     if (!VT.isVector())
2639       return Op;
2640     SDLoc DL(Op);
2641     SDValue Src = Op.getOperand(0);
2642     MVT EltVT = VT.getVectorElementType();
2643     MVT SrcVT = Src.getSimpleValueType();
2644     MVT SrcEltVT = SrcVT.getVectorElementType();
2645     unsigned EltSize = EltVT.getSizeInBits();
2646     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2647     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
2648            "Unexpected vector element types");
2649 
2650     bool IsInt2FP = SrcEltVT.isInteger();
2651     // Widening conversions
2652     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
2653       if (IsInt2FP) {
2654         // Do a regular integer sign/zero extension then convert to float.
2655         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
2656                                       VT.getVectorElementCount());
2657         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
2658                                  ? ISD::ZERO_EXTEND
2659                                  : ISD::SIGN_EXTEND;
2660         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
2661         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
2662       }
2663       // FP2Int
2664       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
2665       // Do one doubling fp_extend then complete the operation by converting
2666       // to int.
2667       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2668       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
2669       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
2670     }
2671 
2672     // Narrowing conversions
2673     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
2674       if (IsInt2FP) {
2675         // One narrowing int_to_fp, then an fp_round.
2676         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
2677         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2678         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
2679         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
2680       }
2681       // FP2Int
2682       // One narrowing fp_to_int, then truncate the integer. If the float isn't
2683       // representable by the integer, the result is poison.
2684       MVT IVecVT =
2685           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
2686                            VT.getVectorElementCount());
2687       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
2688       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
2689     }
2690 
2691     // Scalable vectors can exit here. Patterns will handle equally-sized
2692     // conversions halving/doubling ones.
2693     if (!VT.isFixedLengthVector())
2694       return Op;
2695 
2696     // For fixed-length vectors we lower to a custom "VL" node.
2697     unsigned RVVOpc = 0;
2698     switch (Op.getOpcode()) {
2699     default:
2700       llvm_unreachable("Impossible opcode");
2701     case ISD::FP_TO_SINT:
2702       RVVOpc = RISCVISD::FP_TO_SINT_VL;
2703       break;
2704     case ISD::FP_TO_UINT:
2705       RVVOpc = RISCVISD::FP_TO_UINT_VL;
2706       break;
2707     case ISD::SINT_TO_FP:
2708       RVVOpc = RISCVISD::SINT_TO_FP_VL;
2709       break;
2710     case ISD::UINT_TO_FP:
2711       RVVOpc = RISCVISD::UINT_TO_FP_VL;
2712       break;
2713     }
2714 
2715     MVT ContainerVT, SrcContainerVT;
2716     // Derive the reference container type from the larger vector type.
2717     if (SrcEltSize > EltSize) {
2718       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2719       ContainerVT =
2720           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2721     } else {
2722       ContainerVT = getContainerForFixedLengthVector(VT);
2723       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
2724     }
2725 
2726     SDValue Mask, VL;
2727     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2728 
2729     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2730     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
2731     return convertFromScalableVector(VT, Src, DAG, Subtarget);
2732   }
2733   case ISD::FP_TO_SINT_SAT:
2734   case ISD::FP_TO_UINT_SAT:
2735     return lowerFP_TO_INT_SAT(Op, DAG);
2736   case ISD::VECREDUCE_ADD:
2737   case ISD::VECREDUCE_UMAX:
2738   case ISD::VECREDUCE_SMAX:
2739   case ISD::VECREDUCE_UMIN:
2740   case ISD::VECREDUCE_SMIN:
2741     return lowerVECREDUCE(Op, DAG);
2742   case ISD::VECREDUCE_AND:
2743   case ISD::VECREDUCE_OR:
2744   case ISD::VECREDUCE_XOR:
2745     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2746       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
2747     return lowerVECREDUCE(Op, DAG);
2748   case ISD::VECREDUCE_FADD:
2749   case ISD::VECREDUCE_SEQ_FADD:
2750   case ISD::VECREDUCE_FMIN:
2751   case ISD::VECREDUCE_FMAX:
2752     return lowerFPVECREDUCE(Op, DAG);
2753   case ISD::VP_REDUCE_ADD:
2754   case ISD::VP_REDUCE_UMAX:
2755   case ISD::VP_REDUCE_SMAX:
2756   case ISD::VP_REDUCE_UMIN:
2757   case ISD::VP_REDUCE_SMIN:
2758   case ISD::VP_REDUCE_FADD:
2759   case ISD::VP_REDUCE_SEQ_FADD:
2760   case ISD::VP_REDUCE_FMIN:
2761   case ISD::VP_REDUCE_FMAX:
2762     return lowerVPREDUCE(Op, DAG);
2763   case ISD::VP_REDUCE_AND:
2764   case ISD::VP_REDUCE_OR:
2765   case ISD::VP_REDUCE_XOR:
2766     if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
2767       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
2768     return lowerVPREDUCE(Op, DAG);
2769   case ISD::INSERT_SUBVECTOR:
2770     return lowerINSERT_SUBVECTOR(Op, DAG);
2771   case ISD::EXTRACT_SUBVECTOR:
2772     return lowerEXTRACT_SUBVECTOR(Op, DAG);
2773   case ISD::STEP_VECTOR:
2774     return lowerSTEP_VECTOR(Op, DAG);
2775   case ISD::VECTOR_REVERSE:
2776     return lowerVECTOR_REVERSE(Op, DAG);
2777   case ISD::BUILD_VECTOR:
2778     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
2779   case ISD::SPLAT_VECTOR:
2780     if (Op.getValueType().getVectorElementType() == MVT::i1)
2781       return lowerVectorMaskSplat(Op, DAG);
2782     return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
2783   case ISD::VECTOR_SHUFFLE:
2784     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
2785   case ISD::CONCAT_VECTORS: {
2786     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
2787     // better than going through the stack, as the default expansion does.
2788     SDLoc DL(Op);
2789     MVT VT = Op.getSimpleValueType();
2790     unsigned NumOpElts =
2791         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
2792     SDValue Vec = DAG.getUNDEF(VT);
2793     for (const auto &OpIdx : enumerate(Op->ops()))
2794       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
2795                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
2796     return Vec;
2797   }
2798   case ISD::LOAD:
2799     if (auto V = expandUnalignedRVVLoad(Op, DAG))
2800       return V;
2801     if (Op.getValueType().isFixedLengthVector())
2802       return lowerFixedLengthVectorLoadToRVV(Op, DAG);
2803     return Op;
2804   case ISD::STORE:
2805     if (auto V = expandUnalignedRVVStore(Op, DAG))
2806       return V;
2807     if (Op.getOperand(1).getValueType().isFixedLengthVector())
2808       return lowerFixedLengthVectorStoreToRVV(Op, DAG);
2809     return Op;
2810   case ISD::MLOAD:
2811   case ISD::VP_LOAD:
2812     return lowerMaskedLoad(Op, DAG);
2813   case ISD::MSTORE:
2814   case ISD::VP_STORE:
2815     return lowerMaskedStore(Op, DAG);
2816   case ISD::SETCC:
2817     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
2818   case ISD::ADD:
2819     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
2820   case ISD::SUB:
2821     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
2822   case ISD::MUL:
2823     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
2824   case ISD::MULHS:
2825     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
2826   case ISD::MULHU:
2827     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
2828   case ISD::AND:
2829     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
2830                                               RISCVISD::AND_VL);
2831   case ISD::OR:
2832     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
2833                                               RISCVISD::OR_VL);
2834   case ISD::XOR:
2835     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
2836                                               RISCVISD::XOR_VL);
2837   case ISD::SDIV:
2838     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
2839   case ISD::SREM:
2840     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
2841   case ISD::UDIV:
2842     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
2843   case ISD::UREM:
2844     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
2845   case ISD::SHL:
2846   case ISD::SRA:
2847   case ISD::SRL:
2848     if (Op.getSimpleValueType().isFixedLengthVector())
2849       return lowerFixedLengthVectorShiftToRVV(Op, DAG);
2850     // This can be called for an i32 shift amount that needs to be promoted.
2851     assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
2852            "Unexpected custom legalisation");
2853     return SDValue();
2854   case ISD::SADDSAT:
2855     return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
2856   case ISD::UADDSAT:
2857     return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
2858   case ISD::SSUBSAT:
2859     return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
2860   case ISD::USUBSAT:
2861     return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
2862   case ISD::FADD:
2863     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
2864   case ISD::FSUB:
2865     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
2866   case ISD::FMUL:
2867     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
2868   case ISD::FDIV:
2869     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
2870   case ISD::FNEG:
2871     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
2872   case ISD::FABS:
2873     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
2874   case ISD::FSQRT:
2875     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
2876   case ISD::FMA:
2877     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
2878   case ISD::SMIN:
2879     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
2880   case ISD::SMAX:
2881     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
2882   case ISD::UMIN:
2883     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
2884   case ISD::UMAX:
2885     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
2886   case ISD::FMINNUM:
2887     return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
2888   case ISD::FMAXNUM:
2889     return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
2890   case ISD::ABS:
2891     return lowerABS(Op, DAG);
2892   case ISD::VSELECT:
2893     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
2894   case ISD::FCOPYSIGN:
2895     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
2896   case ISD::MGATHER:
2897   case ISD::VP_GATHER:
2898     return lowerMaskedGather(Op, DAG);
2899   case ISD::MSCATTER:
2900   case ISD::VP_SCATTER:
2901     return lowerMaskedScatter(Op, DAG);
2902   case ISD::FLT_ROUNDS_:
2903     return lowerGET_ROUNDING(Op, DAG);
2904   case ISD::SET_ROUNDING:
2905     return lowerSET_ROUNDING(Op, DAG);
2906   case ISD::VP_ADD:
2907     return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
2908   case ISD::VP_SUB:
2909     return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
2910   case ISD::VP_MUL:
2911     return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
2912   case ISD::VP_SDIV:
2913     return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
2914   case ISD::VP_UDIV:
2915     return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
2916   case ISD::VP_SREM:
2917     return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
2918   case ISD::VP_UREM:
2919     return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
2920   case ISD::VP_AND:
2921     return lowerVPOp(Op, DAG, RISCVISD::AND_VL);
2922   case ISD::VP_OR:
2923     return lowerVPOp(Op, DAG, RISCVISD::OR_VL);
2924   case ISD::VP_XOR:
2925     return lowerVPOp(Op, DAG, RISCVISD::XOR_VL);
2926   case ISD::VP_ASHR:
2927     return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
2928   case ISD::VP_LSHR:
2929     return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
2930   case ISD::VP_SHL:
2931     return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
2932   case ISD::VP_FADD:
2933     return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
2934   case ISD::VP_FSUB:
2935     return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
2936   case ISD::VP_FMUL:
2937     return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
2938   case ISD::VP_FDIV:
2939     return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
2940   }
2941 }
2942 
2943 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
2944                              SelectionDAG &DAG, unsigned Flags) {
2945   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
2946 }
2947 
2948 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
2949                              SelectionDAG &DAG, unsigned Flags) {
2950   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
2951                                    Flags);
2952 }
2953 
2954 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
2955                              SelectionDAG &DAG, unsigned Flags) {
2956   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
2957                                    N->getOffset(), Flags);
2958 }
2959 
2960 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
2961                              SelectionDAG &DAG, unsigned Flags) {
2962   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
2963 }
2964 
2965 template <class NodeTy>
2966 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2967                                      bool IsLocal) const {
2968   SDLoc DL(N);
2969   EVT Ty = getPointerTy(DAG.getDataLayout());
2970 
2971   if (isPositionIndependent()) {
2972     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2973     if (IsLocal)
2974       // Use PC-relative addressing to access the symbol. This generates the
2975       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
2976       // %pcrel_lo(auipc)).
2977       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2978 
2979     // Use PC-relative addressing to access the GOT for this symbol, then load
2980     // the address from the GOT. This generates the pattern (PseudoLA sym),
2981     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
2982     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
2983   }
2984 
2985   switch (getTargetMachine().getCodeModel()) {
2986   default:
2987     report_fatal_error("Unsupported code model for lowering");
2988   case CodeModel::Small: {
2989     // Generate a sequence for accessing addresses within the first 2 GiB of
2990     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
2991     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
2992     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
2993     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2994     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
2995   }
2996   case CodeModel::Medium: {
2997     // Generate a sequence for accessing addresses within any 2GiB range within
2998     // the address space. This generates the pattern (PseudoLLA sym), which
2999     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
3000     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3001     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3002   }
3003   }
3004 }
3005 
3006 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
3007                                                 SelectionDAG &DAG) const {
3008   SDLoc DL(Op);
3009   EVT Ty = Op.getValueType();
3010   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3011   int64_t Offset = N->getOffset();
3012   MVT XLenVT = Subtarget.getXLenVT();
3013 
3014   const GlobalValue *GV = N->getGlobal();
3015   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
3016   SDValue Addr = getAddr(N, DAG, IsLocal);
3017 
3018   // In order to maximise the opportunity for common subexpression elimination,
3019   // emit a separate ADD node for the global address offset instead of folding
3020   // it in the global address node. Later peephole optimisations may choose to
3021   // fold it back in when profitable.
3022   if (Offset != 0)
3023     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3024                        DAG.getConstant(Offset, DL, XLenVT));
3025   return Addr;
3026 }
3027 
3028 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
3029                                                SelectionDAG &DAG) const {
3030   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
3031 
3032   return getAddr(N, DAG);
3033 }
3034 
3035 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
3036                                                SelectionDAG &DAG) const {
3037   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
3038 
3039   return getAddr(N, DAG);
3040 }
3041 
3042 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
3043                                             SelectionDAG &DAG) const {
3044   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
3045 
3046   return getAddr(N, DAG);
3047 }
3048 
3049 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3050                                               SelectionDAG &DAG,
3051                                               bool UseGOT) const {
3052   SDLoc DL(N);
3053   EVT Ty = getPointerTy(DAG.getDataLayout());
3054   const GlobalValue *GV = N->getGlobal();
3055   MVT XLenVT = Subtarget.getXLenVT();
3056 
3057   if (UseGOT) {
3058     // Use PC-relative addressing to access the GOT for this TLS symbol, then
3059     // load the address from the GOT and add the thread pointer. This generates
3060     // the pattern (PseudoLA_TLS_IE sym), which expands to
3061     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
3062     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3063     SDValue Load =
3064         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
3065 
3066     // Add the thread pointer.
3067     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3068     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
3069   }
3070 
3071   // Generate a sequence for accessing the address relative to the thread
3072   // pointer, with the appropriate adjustment for the thread pointer offset.
3073   // This generates the pattern
3074   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
3075   SDValue AddrHi =
3076       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
3077   SDValue AddrAdd =
3078       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
3079   SDValue AddrLo =
3080       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
3081 
3082   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3083   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3084   SDValue MNAdd = SDValue(
3085       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
3086       0);
3087   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
3088 }
3089 
3090 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3091                                                SelectionDAG &DAG) const {
3092   SDLoc DL(N);
3093   EVT Ty = getPointerTy(DAG.getDataLayout());
3094   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3095   const GlobalValue *GV = N->getGlobal();
3096 
3097   // Use a PC-relative addressing mode to access the global dynamic GOT address.
3098   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3099   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3100   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3101   SDValue Load =
3102       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
3103 
3104   // Prepare argument list to generate call.
3105   ArgListTy Args;
3106   ArgListEntry Entry;
3107   Entry.Node = Load;
3108   Entry.Ty = CallTy;
3109   Args.push_back(Entry);
3110 
3111   // Setup call to __tls_get_addr.
3112   TargetLowering::CallLoweringInfo CLI(DAG);
3113   CLI.setDebugLoc(DL)
3114       .setChain(DAG.getEntryNode())
3115       .setLibCallee(CallingConv::C, CallTy,
3116                     DAG.getExternalSymbol("__tls_get_addr", Ty),
3117                     std::move(Args));
3118 
3119   return LowerCallTo(CLI).first;
3120 }
3121 
3122 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3123                                                    SelectionDAG &DAG) const {
3124   SDLoc DL(Op);
3125   EVT Ty = Op.getValueType();
3126   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3127   int64_t Offset = N->getOffset();
3128   MVT XLenVT = Subtarget.getXLenVT();
3129 
3130   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
3131 
3132   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3133       CallingConv::GHC)
3134     report_fatal_error("In GHC calling convention TLS is not supported");
3135 
3136   SDValue Addr;
3137   switch (Model) {
3138   case TLSModel::LocalExec:
3139     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3140     break;
3141   case TLSModel::InitialExec:
3142     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3143     break;
3144   case TLSModel::LocalDynamic:
3145   case TLSModel::GeneralDynamic:
3146     Addr = getDynamicTLSAddr(N, DAG);
3147     break;
3148   }
3149 
3150   // In order to maximise the opportunity for common subexpression elimination,
3151   // emit a separate ADD node for the global address offset instead of folding
3152   // it in the global address node. Later peephole optimisations may choose to
3153   // fold it back in when profitable.
3154   if (Offset != 0)
3155     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3156                        DAG.getConstant(Offset, DL, XLenVT));
3157   return Addr;
3158 }
3159 
3160 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3161   SDValue CondV = Op.getOperand(0);
3162   SDValue TrueV = Op.getOperand(1);
3163   SDValue FalseV = Op.getOperand(2);
3164   SDLoc DL(Op);
3165   MVT VT = Op.getSimpleValueType();
3166   MVT XLenVT = Subtarget.getXLenVT();
3167 
3168   // Lower vector SELECTs to VSELECTs by splatting the condition.
3169   if (VT.isVector()) {
3170     MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3171     SDValue CondSplat = VT.isScalableVector()
3172                             ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3173                             : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3174     return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3175   }
3176 
3177   // If the result type is XLenVT and CondV is the output of a SETCC node
3178   // which also operated on XLenVT inputs, then merge the SETCC node into the
3179   // lowered RISCVISD::SELECT_CC to take advantage of the integer
3180   // compare+branch instructions. i.e.:
3181   // (select (setcc lhs, rhs, cc), truev, falsev)
3182   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3183   if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3184       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3185     SDValue LHS = CondV.getOperand(0);
3186     SDValue RHS = CondV.getOperand(1);
3187     const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3188     ISD::CondCode CCVal = CC->get();
3189 
3190     // Special case for a select of 2 constants that have a diffence of 1.
3191     // Normally this is done by DAGCombine, but if the select is introduced by
3192     // type legalization or op legalization, we miss it. Restricting to SETLT
3193     // case for now because that is what signed saturating add/sub need.
3194     // FIXME: We don't need the condition to be SETLT or even a SETCC,
3195     // but we would probably want to swap the true/false values if the condition
3196     // is SETGE/SETLE to avoid an XORI.
3197     if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3198         CCVal == ISD::SETLT) {
3199       const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
3200       const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
3201       if (TrueVal - 1 == FalseVal)
3202         return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
3203       if (TrueVal + 1 == FalseVal)
3204         return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
3205     }
3206 
3207     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3208 
3209     SDValue TargetCC = DAG.getCondCode(CCVal);
3210     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
3211     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3212   }
3213 
3214   // Otherwise:
3215   // (select condv, truev, falsev)
3216   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
3217   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3218   SDValue SetNE = DAG.getCondCode(ISD::SETNE);
3219 
3220   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
3221 
3222   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3223 }
3224 
3225 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
3226   SDValue CondV = Op.getOperand(1);
3227   SDLoc DL(Op);
3228   MVT XLenVT = Subtarget.getXLenVT();
3229 
3230   if (CondV.getOpcode() == ISD::SETCC &&
3231       CondV.getOperand(0).getValueType() == XLenVT) {
3232     SDValue LHS = CondV.getOperand(0);
3233     SDValue RHS = CondV.getOperand(1);
3234     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
3235 
3236     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3237 
3238     SDValue TargetCC = DAG.getCondCode(CCVal);
3239     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3240                        LHS, RHS, TargetCC, Op.getOperand(2));
3241   }
3242 
3243   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3244                      CondV, DAG.getConstant(0, DL, XLenVT),
3245                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3246 }
3247 
3248 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3249   MachineFunction &MF = DAG.getMachineFunction();
3250   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
3251 
3252   SDLoc DL(Op);
3253   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3254                                  getPointerTy(MF.getDataLayout()));
3255 
3256   // vastart just stores the address of the VarArgsFrameIndex slot into the
3257   // memory location argument.
3258   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3259   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3260                       MachinePointerInfo(SV));
3261 }
3262 
3263 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3264                                             SelectionDAG &DAG) const {
3265   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3266   MachineFunction &MF = DAG.getMachineFunction();
3267   MachineFrameInfo &MFI = MF.getFrameInfo();
3268   MFI.setFrameAddressIsTaken(true);
3269   Register FrameReg = RI.getFrameRegister(MF);
3270   int XLenInBytes = Subtarget.getXLen() / 8;
3271 
3272   EVT VT = Op.getValueType();
3273   SDLoc DL(Op);
3274   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3275   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3276   while (Depth--) {
3277     int Offset = -(XLenInBytes * 2);
3278     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3279                               DAG.getIntPtrConstant(Offset, DL));
3280     FrameAddr =
3281         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3282   }
3283   return FrameAddr;
3284 }
3285 
3286 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3287                                              SelectionDAG &DAG) const {
3288   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3289   MachineFunction &MF = DAG.getMachineFunction();
3290   MachineFrameInfo &MFI = MF.getFrameInfo();
3291   MFI.setReturnAddressIsTaken(true);
3292   MVT XLenVT = Subtarget.getXLenVT();
3293   int XLenInBytes = Subtarget.getXLen() / 8;
3294 
3295   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3296     return SDValue();
3297 
3298   EVT VT = Op.getValueType();
3299   SDLoc DL(Op);
3300   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3301   if (Depth) {
3302     int Off = -XLenInBytes;
3303     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3304     SDValue Offset = DAG.getConstant(Off, DL, VT);
3305     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3306                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3307                        MachinePointerInfo());
3308   }
3309 
3310   // Return the value of the return address register, marking it an implicit
3311   // live-in.
3312   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3313   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3314 }
3315 
3316 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3317                                                  SelectionDAG &DAG) const {
3318   SDLoc DL(Op);
3319   SDValue Lo = Op.getOperand(0);
3320   SDValue Hi = Op.getOperand(1);
3321   SDValue Shamt = Op.getOperand(2);
3322   EVT VT = Lo.getValueType();
3323 
3324   // if Shamt-XLEN < 0: // Shamt < XLEN
3325   //   Lo = Lo << Shamt
3326   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
3327   // else:
3328   //   Lo = 0
3329   //   Hi = Lo << (Shamt-XLEN)
3330 
3331   SDValue Zero = DAG.getConstant(0, DL, VT);
3332   SDValue One = DAG.getConstant(1, DL, VT);
3333   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3334   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3335   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3336   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3337 
3338   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3339   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3340   SDValue ShiftRightLo =
3341       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
3342   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3343   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3344   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
3345 
3346   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3347 
3348   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3349   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3350 
3351   SDValue Parts[2] = {Lo, Hi};
3352   return DAG.getMergeValues(Parts, DL);
3353 }
3354 
3355 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
3356                                                   bool IsSRA) const {
3357   SDLoc DL(Op);
3358   SDValue Lo = Op.getOperand(0);
3359   SDValue Hi = Op.getOperand(1);
3360   SDValue Shamt = Op.getOperand(2);
3361   EVT VT = Lo.getValueType();
3362 
3363   // SRA expansion:
3364   //   if Shamt-XLEN < 0: // Shamt < XLEN
3365   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3366   //     Hi = Hi >>s Shamt
3367   //   else:
3368   //     Lo = Hi >>s (Shamt-XLEN);
3369   //     Hi = Hi >>s (XLEN-1)
3370   //
3371   // SRL expansion:
3372   //   if Shamt-XLEN < 0: // Shamt < XLEN
3373   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3374   //     Hi = Hi >>u Shamt
3375   //   else:
3376   //     Lo = Hi >>u (Shamt-XLEN);
3377   //     Hi = 0;
3378 
3379   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3380 
3381   SDValue Zero = DAG.getConstant(0, DL, VT);
3382   SDValue One = DAG.getConstant(1, DL, VT);
3383   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3384   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3385   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3386   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3387 
3388   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3389   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3390   SDValue ShiftLeftHi =
3391       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
3392   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3393   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3394   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
3395   SDValue HiFalse =
3396       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
3397 
3398   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3399 
3400   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3401   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3402 
3403   SDValue Parts[2] = {Lo, Hi};
3404   return DAG.getMergeValues(Parts, DL);
3405 }
3406 
3407 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
3408 // legal equivalently-sized i8 type, so we can use that as a go-between.
3409 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
3410                                                   SelectionDAG &DAG) const {
3411   SDLoc DL(Op);
3412   MVT VT = Op.getSimpleValueType();
3413   SDValue SplatVal = Op.getOperand(0);
3414   // All-zeros or all-ones splats are handled specially.
3415   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
3416     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3417     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
3418   }
3419   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
3420     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3421     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
3422   }
3423   MVT XLenVT = Subtarget.getXLenVT();
3424   assert(SplatVal.getValueType() == XLenVT &&
3425          "Unexpected type for i1 splat value");
3426   MVT InterVT = VT.changeVectorElementType(MVT::i8);
3427   SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
3428                          DAG.getConstant(1, DL, XLenVT));
3429   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
3430   SDValue Zero = DAG.getConstant(0, DL, InterVT);
3431   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
3432 }
3433 
3434 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
3435 // illegal (currently only vXi64 RV32).
3436 // FIXME: We could also catch non-constant sign-extended i32 values and lower
3437 // them to SPLAT_VECTOR_I64
3438 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
3439                                                      SelectionDAG &DAG) const {
3440   SDLoc DL(Op);
3441   MVT VecVT = Op.getSimpleValueType();
3442   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
3443          "Unexpected SPLAT_VECTOR_PARTS lowering");
3444 
3445   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
3446   SDValue Lo = Op.getOperand(0);
3447   SDValue Hi = Op.getOperand(1);
3448 
3449   if (VecVT.isFixedLengthVector()) {
3450     MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3451     SDLoc DL(Op);
3452     SDValue Mask, VL;
3453     std::tie(Mask, VL) =
3454         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3455 
3456     SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
3457     return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
3458   }
3459 
3460   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3461     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3462     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3463     // If Hi constant is all the same sign bit as Lo, lower this as a custom
3464     // node in order to try and match RVV vector/scalar instructions.
3465     if ((LoC >> 31) == HiC)
3466       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3467   }
3468 
3469   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3470   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3471       isa<ConstantSDNode>(Hi.getOperand(1)) &&
3472       Hi.getConstantOperandVal(1) == 31)
3473     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3474 
3475   // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
3476   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
3477                      DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i64));
3478 }
3479 
3480 // Custom-lower extensions from mask vectors by using a vselect either with 1
3481 // for zero/any-extension or -1 for sign-extension:
3482 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
3483 // Note that any-extension is lowered identically to zero-extension.
3484 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
3485                                                 int64_t ExtTrueVal) const {
3486   SDLoc DL(Op);
3487   MVT VecVT = Op.getSimpleValueType();
3488   SDValue Src = Op.getOperand(0);
3489   // Only custom-lower extensions from mask types
3490   assert(Src.getValueType().isVector() &&
3491          Src.getValueType().getVectorElementType() == MVT::i1);
3492 
3493   MVT XLenVT = Subtarget.getXLenVT();
3494   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
3495   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
3496 
3497   if (VecVT.isScalableVector()) {
3498     // Be careful not to introduce illegal scalar types at this stage, and be
3499     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
3500     // illegal and must be expanded. Since we know that the constants are
3501     // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
3502     bool IsRV32E64 =
3503         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
3504 
3505     if (!IsRV32E64) {
3506       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
3507       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
3508     } else {
3509       SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
3510       SplatTrueVal =
3511           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
3512     }
3513 
3514     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
3515   }
3516 
3517   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3518   MVT I1ContainerVT =
3519       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3520 
3521   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
3522 
3523   SDValue Mask, VL;
3524   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3525 
3526   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
3527   SplatTrueVal =
3528       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
3529   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
3530                                SplatTrueVal, SplatZero, VL);
3531 
3532   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
3533 }
3534 
3535 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
3536     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
3537   MVT ExtVT = Op.getSimpleValueType();
3538   // Only custom-lower extensions from fixed-length vector types.
3539   if (!ExtVT.isFixedLengthVector())
3540     return Op;
3541   MVT VT = Op.getOperand(0).getSimpleValueType();
3542   // Grab the canonical container type for the extended type. Infer the smaller
3543   // type from that to ensure the same number of vector elements, as we know
3544   // the LMUL will be sufficient to hold the smaller type.
3545   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
3546   // Get the extended container type manually to ensure the same number of
3547   // vector elements between source and dest.
3548   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
3549                                      ContainerExtVT.getVectorElementCount());
3550 
3551   SDValue Op1 =
3552       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
3553 
3554   SDLoc DL(Op);
3555   SDValue Mask, VL;
3556   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3557 
3558   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
3559 
3560   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
3561 }
3562 
3563 // Custom-lower truncations from vectors to mask vectors by using a mask and a
3564 // setcc operation:
3565 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
3566 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
3567                                                   SelectionDAG &DAG) const {
3568   SDLoc DL(Op);
3569   EVT MaskVT = Op.getValueType();
3570   // Only expect to custom-lower truncations to mask types
3571   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
3572          "Unexpected type for vector mask lowering");
3573   SDValue Src = Op.getOperand(0);
3574   MVT VecVT = Src.getSimpleValueType();
3575 
3576   // If this is a fixed vector, we need to convert it to a scalable vector.
3577   MVT ContainerVT = VecVT;
3578   if (VecVT.isFixedLengthVector()) {
3579     ContainerVT = getContainerForFixedLengthVector(VecVT);
3580     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3581   }
3582 
3583   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
3584   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
3585 
3586   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
3587   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
3588 
3589   if (VecVT.isScalableVector()) {
3590     SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
3591     return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
3592   }
3593 
3594   SDValue Mask, VL;
3595   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3596 
3597   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
3598   SDValue Trunc =
3599       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
3600   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
3601                       DAG.getCondCode(ISD::SETNE), Mask, VL);
3602   return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
3603 }
3604 
3605 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
3606 // first position of a vector, and that vector is slid up to the insert index.
3607 // By limiting the active vector length to index+1 and merging with the
3608 // original vector (with an undisturbed tail policy for elements >= VL), we
3609 // achieve the desired result of leaving all elements untouched except the one
3610 // at VL-1, which is replaced with the desired value.
3611 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3612                                                     SelectionDAG &DAG) const {
3613   SDLoc DL(Op);
3614   MVT VecVT = Op.getSimpleValueType();
3615   SDValue Vec = Op.getOperand(0);
3616   SDValue Val = Op.getOperand(1);
3617   SDValue Idx = Op.getOperand(2);
3618 
3619   if (VecVT.getVectorElementType() == MVT::i1) {
3620     // FIXME: For now we just promote to an i8 vector and insert into that,
3621     // but this is probably not optimal.
3622     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
3623     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3624     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
3625     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
3626   }
3627 
3628   MVT ContainerVT = VecVT;
3629   // If the operand is a fixed-length vector, convert to a scalable one.
3630   if (VecVT.isFixedLengthVector()) {
3631     ContainerVT = getContainerForFixedLengthVector(VecVT);
3632     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3633   }
3634 
3635   MVT XLenVT = Subtarget.getXLenVT();
3636 
3637   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3638   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
3639   // Even i64-element vectors on RV32 can be lowered without scalar
3640   // legalization if the most-significant 32 bits of the value are not affected
3641   // by the sign-extension of the lower 32 bits.
3642   // TODO: We could also catch sign extensions of a 32-bit value.
3643   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
3644     const auto *CVal = cast<ConstantSDNode>(Val);
3645     if (isInt<32>(CVal->getSExtValue())) {
3646       IsLegalInsert = true;
3647       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3648     }
3649   }
3650 
3651   SDValue Mask, VL;
3652   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3653 
3654   SDValue ValInVec;
3655 
3656   if (IsLegalInsert) {
3657     unsigned Opc =
3658         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
3659     if (isNullConstant(Idx)) {
3660       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
3661       if (!VecVT.isFixedLengthVector())
3662         return Vec;
3663       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
3664     }
3665     ValInVec =
3666         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
3667   } else {
3668     // On RV32, i64-element vectors must be specially handled to place the
3669     // value at element 0, by using two vslide1up instructions in sequence on
3670     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
3671     // this.
3672     SDValue One = DAG.getConstant(1, DL, XLenVT);
3673     SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
3674     SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
3675     MVT I32ContainerVT =
3676         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
3677     SDValue I32Mask =
3678         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
3679     // Limit the active VL to two.
3680     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
3681     // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
3682     // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
3683     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
3684                            InsertI64VL);
3685     // First slide in the hi value, then the lo in underneath it.
3686     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3687                            ValHi, I32Mask, InsertI64VL);
3688     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3689                            ValLo, I32Mask, InsertI64VL);
3690     // Bitcast back to the right container type.
3691     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
3692   }
3693 
3694   // Now that the value is in a vector, slide it into position.
3695   SDValue InsertVL =
3696       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
3697   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
3698                                 ValInVec, Idx, Mask, InsertVL);
3699   if (!VecVT.isFixedLengthVector())
3700     return Slideup;
3701   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
3702 }
3703 
3704 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
3705 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
3706 // types this is done using VMV_X_S to allow us to glean information about the
3707 // sign bits of the result.
3708 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3709                                                      SelectionDAG &DAG) const {
3710   SDLoc DL(Op);
3711   SDValue Idx = Op.getOperand(1);
3712   SDValue Vec = Op.getOperand(0);
3713   EVT EltVT = Op.getValueType();
3714   MVT VecVT = Vec.getSimpleValueType();
3715   MVT XLenVT = Subtarget.getXLenVT();
3716 
3717   if (VecVT.getVectorElementType() == MVT::i1) {
3718     // FIXME: For now we just promote to an i8 vector and extract from that,
3719     // but this is probably not optimal.
3720     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
3721     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3722     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
3723   }
3724 
3725   // If this is a fixed vector, we need to convert it to a scalable vector.
3726   MVT ContainerVT = VecVT;
3727   if (VecVT.isFixedLengthVector()) {
3728     ContainerVT = getContainerForFixedLengthVector(VecVT);
3729     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3730   }
3731 
3732   // If the index is 0, the vector is already in the right position.
3733   if (!isNullConstant(Idx)) {
3734     // Use a VL of 1 to avoid processing more elements than we need.
3735     SDValue VL = DAG.getConstant(1, DL, XLenVT);
3736     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3737     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3738     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3739                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
3740   }
3741 
3742   if (!EltVT.isInteger()) {
3743     // Floating-point extracts are handled in TableGen.
3744     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
3745                        DAG.getConstant(0, DL, XLenVT));
3746   }
3747 
3748   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
3749   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
3750 }
3751 
3752 // Some RVV intrinsics may claim that they want an integer operand to be
3753 // promoted or expanded.
3754 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
3755                                           const RISCVSubtarget &Subtarget) {
3756   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3757           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
3758          "Unexpected opcode");
3759 
3760   if (!Subtarget.hasStdExtV())
3761     return SDValue();
3762 
3763   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
3764   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
3765   SDLoc DL(Op);
3766 
3767   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
3768       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
3769   if (!II || !II->SplatOperand)
3770     return SDValue();
3771 
3772   unsigned SplatOp = II->SplatOperand + HasChain;
3773   assert(SplatOp < Op.getNumOperands());
3774 
3775   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
3776   SDValue &ScalarOp = Operands[SplatOp];
3777   MVT OpVT = ScalarOp.getSimpleValueType();
3778   MVT XLenVT = Subtarget.getXLenVT();
3779 
3780   // If this isn't a scalar, or its type is XLenVT we're done.
3781   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
3782     return SDValue();
3783 
3784   // Simplest case is that the operand needs to be promoted to XLenVT.
3785   if (OpVT.bitsLT(XLenVT)) {
3786     // If the operand is a constant, sign extend to increase our chances
3787     // of being able to use a .vi instruction. ANY_EXTEND would become a
3788     // a zero extend and the simm5 check in isel would fail.
3789     // FIXME: Should we ignore the upper bits in isel instead?
3790     unsigned ExtOpc =
3791         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3792     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
3793     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3794   }
3795 
3796   // Use the previous operand to get the vXi64 VT. The result might be a mask
3797   // VT for compares. Using the previous operand assumes that the previous
3798   // operand will never have a smaller element size than a scalar operand and
3799   // that a widening operation never uses SEW=64.
3800   // NOTE: If this fails the below assert, we can probably just find the
3801   // element count from any operand or result and use it to construct the VT.
3802   assert(II->SplatOperand > 1 && "Unexpected splat operand!");
3803   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
3804 
3805   // The more complex case is when the scalar is larger than XLenVT.
3806   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
3807          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
3808 
3809   // If this is a sign-extended 32-bit constant, we can truncate it and rely
3810   // on the instruction to sign-extend since SEW>XLEN.
3811   if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
3812     if (isInt<32>(CVal->getSExtValue())) {
3813       ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3814       return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3815     }
3816   }
3817 
3818   // We need to convert the scalar to a splat vector.
3819   // FIXME: Can we implicitly truncate the scalar if it is known to
3820   // be sign extended?
3821   // VL should be the last operand.
3822   SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
3823   assert(VL.getValueType() == XLenVT);
3824   ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
3825   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3826 }
3827 
3828 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3829                                                      SelectionDAG &DAG) const {
3830   unsigned IntNo = Op.getConstantOperandVal(0);
3831   SDLoc DL(Op);
3832   MVT XLenVT = Subtarget.getXLenVT();
3833 
3834   switch (IntNo) {
3835   default:
3836     break; // Don't custom lower most intrinsics.
3837   case Intrinsic::thread_pointer: {
3838     EVT PtrVT = getPointerTy(DAG.getDataLayout());
3839     return DAG.getRegister(RISCV::X4, PtrVT);
3840   }
3841   case Intrinsic::riscv_orc_b:
3842     // Lower to the GORCI encoding for orc.b.
3843     return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
3844                        DAG.getConstant(7, DL, XLenVT));
3845   case Intrinsic::riscv_grev:
3846   case Intrinsic::riscv_gorc: {
3847     unsigned Opc =
3848         IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
3849     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3850   }
3851   case Intrinsic::riscv_shfl:
3852   case Intrinsic::riscv_unshfl: {
3853     unsigned Opc =
3854         IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
3855     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3856   }
3857   case Intrinsic::riscv_bcompress:
3858   case Intrinsic::riscv_bdecompress: {
3859     unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
3860                                                        : RISCVISD::BDECOMPRESS;
3861     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3862   }
3863   case Intrinsic::riscv_vmv_x_s:
3864     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
3865     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
3866                        Op.getOperand(1));
3867   case Intrinsic::riscv_vmv_v_x:
3868     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
3869                             Op.getSimpleValueType(), DL, DAG, Subtarget);
3870   case Intrinsic::riscv_vfmv_v_f:
3871     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
3872                        Op.getOperand(1), Op.getOperand(2));
3873   case Intrinsic::riscv_vmv_s_x: {
3874     SDValue Scalar = Op.getOperand(2);
3875 
3876     if (Scalar.getValueType().bitsLE(XLenVT)) {
3877       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
3878       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
3879                          Op.getOperand(1), Scalar, Op.getOperand(3));
3880     }
3881 
3882     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
3883 
3884     // This is an i64 value that lives in two scalar registers. We have to
3885     // insert this in a convoluted way. First we build vXi64 splat containing
3886     // the/ two values that we assemble using some bit math. Next we'll use
3887     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
3888     // to merge element 0 from our splat into the source vector.
3889     // FIXME: This is probably not the best way to do this, but it is
3890     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
3891     // point.
3892     //   sw lo, (a0)
3893     //   sw hi, 4(a0)
3894     //   vlse vX, (a0)
3895     //
3896     //   vid.v      vVid
3897     //   vmseq.vx   mMask, vVid, 0
3898     //   vmerge.vvm vDest, vSrc, vVal, mMask
3899     MVT VT = Op.getSimpleValueType();
3900     SDValue Vec = Op.getOperand(1);
3901     SDValue VL = Op.getOperand(3);
3902 
3903     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
3904     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
3905                                       DAG.getConstant(0, DL, MVT::i32), VL);
3906 
3907     MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
3908     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3909     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
3910     SDValue SelectCond =
3911         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
3912                     DAG.getCondCode(ISD::SETEQ), Mask, VL);
3913     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
3914                        Vec, VL);
3915   }
3916   case Intrinsic::riscv_vslide1up:
3917   case Intrinsic::riscv_vslide1down:
3918   case Intrinsic::riscv_vslide1up_mask:
3919   case Intrinsic::riscv_vslide1down_mask: {
3920     // We need to special case these when the scalar is larger than XLen.
3921     unsigned NumOps = Op.getNumOperands();
3922     bool IsMasked = NumOps == 7;
3923     unsigned OpOffset = IsMasked ? 1 : 0;
3924     SDValue Scalar = Op.getOperand(2 + OpOffset);
3925     if (Scalar.getValueType().bitsLE(XLenVT))
3926       break;
3927 
3928     // Splatting a sign extended constant is fine.
3929     if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
3930       if (isInt<32>(CVal->getSExtValue()))
3931         break;
3932 
3933     MVT VT = Op.getSimpleValueType();
3934     assert(VT.getVectorElementType() == MVT::i64 &&
3935            Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
3936 
3937     // Convert the vector source to the equivalent nxvXi32 vector.
3938     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3939     SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
3940 
3941     SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3942                                    DAG.getConstant(0, DL, XLenVT));
3943     SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3944                                    DAG.getConstant(1, DL, XLenVT));
3945 
3946     // Double the VL since we halved SEW.
3947     SDValue VL = Op.getOperand(NumOps - (1 + OpOffset));
3948     SDValue I32VL =
3949         DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
3950 
3951     MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
3952     SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
3953 
3954     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
3955     // instructions.
3956     if (IntNo == Intrinsic::riscv_vslide1up ||
3957         IntNo == Intrinsic::riscv_vslide1up_mask) {
3958       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
3959                         I32Mask, I32VL);
3960       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
3961                         I32Mask, I32VL);
3962     } else {
3963       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
3964                         I32Mask, I32VL);
3965       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
3966                         I32Mask, I32VL);
3967     }
3968 
3969     // Convert back to nxvXi64.
3970     Vec = DAG.getBitcast(VT, Vec);
3971 
3972     if (!IsMasked)
3973       return Vec;
3974 
3975     // Apply mask after the operation.
3976     SDValue Mask = Op.getOperand(NumOps - 3);
3977     SDValue MaskedOff = Op.getOperand(1);
3978     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
3979   }
3980   }
3981 
3982   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3983 }
3984 
3985 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
3986                                                     SelectionDAG &DAG) const {
3987   unsigned IntNo = Op.getConstantOperandVal(1);
3988   switch (IntNo) {
3989   default:
3990     break;
3991   case Intrinsic::riscv_masked_strided_load: {
3992     SDLoc DL(Op);
3993     MVT XLenVT = Subtarget.getXLenVT();
3994 
3995     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
3996     // the selection of the masked intrinsics doesn't do this for us.
3997     SDValue Mask = Op.getOperand(5);
3998     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
3999 
4000     MVT VT = Op->getSimpleValueType(0);
4001     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4002 
4003     SDValue PassThru = Op.getOperand(2);
4004     if (!IsUnmasked) {
4005       MVT MaskVT =
4006           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4007       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4008       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4009     }
4010 
4011     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4012 
4013     SDValue IntID = DAG.getTargetConstant(
4014         IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
4015         XLenVT);
4016 
4017     auto *Load = cast<MemIntrinsicSDNode>(Op);
4018     SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
4019     if (!IsUnmasked)
4020       Ops.push_back(PassThru);
4021     Ops.push_back(Op.getOperand(3)); // Ptr
4022     Ops.push_back(Op.getOperand(4)); // Stride
4023     if (!IsUnmasked)
4024       Ops.push_back(Mask);
4025     Ops.push_back(VL);
4026     if (!IsUnmasked) {
4027       SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
4028       Ops.push_back(Policy);
4029     }
4030 
4031     SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4032     SDValue Result =
4033         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
4034                                 Load->getMemoryVT(), Load->getMemOperand());
4035     SDValue Chain = Result.getValue(1);
4036     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4037     return DAG.getMergeValues({Result, Chain}, DL);
4038   }
4039   }
4040 
4041   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
4042 }
4043 
4044 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
4045                                                  SelectionDAG &DAG) const {
4046   unsigned IntNo = Op.getConstantOperandVal(1);
4047   switch (IntNo) {
4048   default:
4049     break;
4050   case Intrinsic::riscv_masked_strided_store: {
4051     SDLoc DL(Op);
4052     MVT XLenVT = Subtarget.getXLenVT();
4053 
4054     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4055     // the selection of the masked intrinsics doesn't do this for us.
4056     SDValue Mask = Op.getOperand(5);
4057     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4058 
4059     SDValue Val = Op.getOperand(2);
4060     MVT VT = Val.getSimpleValueType();
4061     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4062 
4063     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
4064     if (!IsUnmasked) {
4065       MVT MaskVT =
4066           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4067       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4068     }
4069 
4070     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4071 
4072     SDValue IntID = DAG.getTargetConstant(
4073         IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
4074         XLenVT);
4075 
4076     auto *Store = cast<MemIntrinsicSDNode>(Op);
4077     SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
4078     Ops.push_back(Val);
4079     Ops.push_back(Op.getOperand(3)); // Ptr
4080     Ops.push_back(Op.getOperand(4)); // Stride
4081     if (!IsUnmasked)
4082       Ops.push_back(Mask);
4083     Ops.push_back(VL);
4084 
4085     return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
4086                                    Ops, Store->getMemoryVT(),
4087                                    Store->getMemOperand());
4088   }
4089   }
4090 
4091   return SDValue();
4092 }
4093 
4094 static MVT getLMUL1VT(MVT VT) {
4095   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
4096          "Unexpected vector MVT");
4097   return MVT::getScalableVectorVT(
4098       VT.getVectorElementType(),
4099       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
4100 }
4101 
4102 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
4103   switch (ISDOpcode) {
4104   default:
4105     llvm_unreachable("Unhandled reduction");
4106   case ISD::VECREDUCE_ADD:
4107     return RISCVISD::VECREDUCE_ADD_VL;
4108   case ISD::VECREDUCE_UMAX:
4109     return RISCVISD::VECREDUCE_UMAX_VL;
4110   case ISD::VECREDUCE_SMAX:
4111     return RISCVISD::VECREDUCE_SMAX_VL;
4112   case ISD::VECREDUCE_UMIN:
4113     return RISCVISD::VECREDUCE_UMIN_VL;
4114   case ISD::VECREDUCE_SMIN:
4115     return RISCVISD::VECREDUCE_SMIN_VL;
4116   case ISD::VECREDUCE_AND:
4117     return RISCVISD::VECREDUCE_AND_VL;
4118   case ISD::VECREDUCE_OR:
4119     return RISCVISD::VECREDUCE_OR_VL;
4120   case ISD::VECREDUCE_XOR:
4121     return RISCVISD::VECREDUCE_XOR_VL;
4122   }
4123 }
4124 
4125 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
4126                                                          SelectionDAG &DAG,
4127                                                          bool IsVP) const {
4128   SDLoc DL(Op);
4129   SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
4130   MVT VecVT = Vec.getSimpleValueType();
4131   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
4132           Op.getOpcode() == ISD::VECREDUCE_OR ||
4133           Op.getOpcode() == ISD::VECREDUCE_XOR ||
4134           Op.getOpcode() == ISD::VP_REDUCE_AND ||
4135           Op.getOpcode() == ISD::VP_REDUCE_OR ||
4136           Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
4137          "Unexpected reduction lowering");
4138 
4139   MVT XLenVT = Subtarget.getXLenVT();
4140   assert(Op.getValueType() == XLenVT &&
4141          "Expected reduction output to be legalized to XLenVT");
4142 
4143   MVT ContainerVT = VecVT;
4144   if (VecVT.isFixedLengthVector()) {
4145     ContainerVT = getContainerForFixedLengthVector(VecVT);
4146     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4147   }
4148 
4149   SDValue Mask, VL;
4150   if (IsVP) {
4151     Mask = Op.getOperand(2);
4152     VL = Op.getOperand(3);
4153   } else {
4154     std::tie(Mask, VL) =
4155         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4156   }
4157 
4158   unsigned BaseOpc;
4159   ISD::CondCode CC;
4160   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
4161 
4162   switch (Op.getOpcode()) {
4163   default:
4164     llvm_unreachable("Unhandled reduction");
4165   case ISD::VECREDUCE_AND:
4166   case ISD::VP_REDUCE_AND: {
4167     // vpopc ~x == 0
4168     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4169     Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
4170     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
4171     CC = ISD::SETEQ;
4172     BaseOpc = ISD::AND;
4173     break;
4174   }
4175   case ISD::VECREDUCE_OR:
4176   case ISD::VP_REDUCE_OR:
4177     // vpopc x != 0
4178     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
4179     CC = ISD::SETNE;
4180     BaseOpc = ISD::OR;
4181     break;
4182   case ISD::VECREDUCE_XOR:
4183   case ISD::VP_REDUCE_XOR: {
4184     // ((vpopc x) & 1) != 0
4185     SDValue One = DAG.getConstant(1, DL, XLenVT);
4186     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
4187     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
4188     CC = ISD::SETNE;
4189     BaseOpc = ISD::XOR;
4190     break;
4191   }
4192   }
4193 
4194   SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
4195 
4196   if (!IsVP)
4197     return SetCC;
4198 
4199   // Now include the start value in the operation.
4200   // Note that we must return the start value when no elements are operated
4201   // upon. The vpopc instructions we've emitted in each case above will return
4202   // 0 for an inactive vector, and so we've already received the neutral value:
4203   // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
4204   // can simply include the start value.
4205   return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
4206 }
4207 
4208 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
4209                                             SelectionDAG &DAG) const {
4210   SDLoc DL(Op);
4211   SDValue Vec = Op.getOperand(0);
4212   EVT VecEVT = Vec.getValueType();
4213 
4214   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
4215 
4216   // Due to ordering in legalize types we may have a vector type that needs to
4217   // be split. Do that manually so we can get down to a legal type.
4218   while (getTypeAction(*DAG.getContext(), VecEVT) ==
4219          TargetLowering::TypeSplitVector) {
4220     SDValue Lo, Hi;
4221     std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
4222     VecEVT = Lo.getValueType();
4223     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
4224   }
4225 
4226   // TODO: The type may need to be widened rather than split. Or widened before
4227   // it can be split.
4228   if (!isTypeLegal(VecEVT))
4229     return SDValue();
4230 
4231   MVT VecVT = VecEVT.getSimpleVT();
4232   MVT VecEltVT = VecVT.getVectorElementType();
4233   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
4234 
4235   MVT ContainerVT = VecVT;
4236   if (VecVT.isFixedLengthVector()) {
4237     ContainerVT = getContainerForFixedLengthVector(VecVT);
4238     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4239   }
4240 
4241   MVT M1VT = getLMUL1VT(ContainerVT);
4242 
4243   SDValue Mask, VL;
4244   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4245 
4246   // FIXME: This is a VLMAX splat which might be too large and can prevent
4247   // vsetvli removal.
4248   SDValue NeutralElem =
4249       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
4250   SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
4251   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
4252                                   IdentitySplat, Mask, VL);
4253   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
4254                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
4255   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
4256 }
4257 
4258 // Given a reduction op, this function returns the matching reduction opcode,
4259 // the vector SDValue and the scalar SDValue required to lower this to a
4260 // RISCVISD node.
4261 static std::tuple<unsigned, SDValue, SDValue>
4262 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
4263   SDLoc DL(Op);
4264   auto Flags = Op->getFlags();
4265   unsigned Opcode = Op.getOpcode();
4266   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
4267   switch (Opcode) {
4268   default:
4269     llvm_unreachable("Unhandled reduction");
4270   case ISD::VECREDUCE_FADD:
4271     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
4272                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
4273   case ISD::VECREDUCE_SEQ_FADD:
4274     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
4275                            Op.getOperand(0));
4276   case ISD::VECREDUCE_FMIN:
4277     return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
4278                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
4279   case ISD::VECREDUCE_FMAX:
4280     return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
4281                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
4282   }
4283 }
4284 
4285 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
4286                                               SelectionDAG &DAG) const {
4287   SDLoc DL(Op);
4288   MVT VecEltVT = Op.getSimpleValueType();
4289 
4290   unsigned RVVOpcode;
4291   SDValue VectorVal, ScalarVal;
4292   std::tie(RVVOpcode, VectorVal, ScalarVal) =
4293       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
4294   MVT VecVT = VectorVal.getSimpleValueType();
4295 
4296   MVT ContainerVT = VecVT;
4297   if (VecVT.isFixedLengthVector()) {
4298     ContainerVT = getContainerForFixedLengthVector(VecVT);
4299     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
4300   }
4301 
4302   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
4303 
4304   SDValue Mask, VL;
4305   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4306 
4307   // FIXME: This is a VLMAX splat which might be too large and can prevent
4308   // vsetvli removal.
4309   SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
4310   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
4311                                   VectorVal, ScalarSplat, Mask, VL);
4312   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
4313                      DAG.getConstant(0, DL, Subtarget.getXLenVT()));
4314 }
4315 
4316 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
4317   switch (ISDOpcode) {
4318   default:
4319     llvm_unreachable("Unhandled reduction");
4320   case ISD::VP_REDUCE_ADD:
4321     return RISCVISD::VECREDUCE_ADD_VL;
4322   case ISD::VP_REDUCE_UMAX:
4323     return RISCVISD::VECREDUCE_UMAX_VL;
4324   case ISD::VP_REDUCE_SMAX:
4325     return RISCVISD::VECREDUCE_SMAX_VL;
4326   case ISD::VP_REDUCE_UMIN:
4327     return RISCVISD::VECREDUCE_UMIN_VL;
4328   case ISD::VP_REDUCE_SMIN:
4329     return RISCVISD::VECREDUCE_SMIN_VL;
4330   case ISD::VP_REDUCE_AND:
4331     return RISCVISD::VECREDUCE_AND_VL;
4332   case ISD::VP_REDUCE_OR:
4333     return RISCVISD::VECREDUCE_OR_VL;
4334   case ISD::VP_REDUCE_XOR:
4335     return RISCVISD::VECREDUCE_XOR_VL;
4336   case ISD::VP_REDUCE_FADD:
4337     return RISCVISD::VECREDUCE_FADD_VL;
4338   case ISD::VP_REDUCE_SEQ_FADD:
4339     return RISCVISD::VECREDUCE_SEQ_FADD_VL;
4340   case ISD::VP_REDUCE_FMAX:
4341     return RISCVISD::VECREDUCE_FMAX_VL;
4342   case ISD::VP_REDUCE_FMIN:
4343     return RISCVISD::VECREDUCE_FMIN_VL;
4344   }
4345 }
4346 
4347 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
4348                                            SelectionDAG &DAG) const {
4349   SDLoc DL(Op);
4350   SDValue Vec = Op.getOperand(1);
4351   EVT VecEVT = Vec.getValueType();
4352 
4353   // TODO: The type may need to be widened rather than split. Or widened before
4354   // it can be split.
4355   if (!isTypeLegal(VecEVT))
4356     return SDValue();
4357 
4358   MVT VecVT = VecEVT.getSimpleVT();
4359   MVT VecEltVT = VecVT.getVectorElementType();
4360   unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
4361 
4362   MVT ContainerVT = VecVT;
4363   if (VecVT.isFixedLengthVector()) {
4364     ContainerVT = getContainerForFixedLengthVector(VecVT);
4365     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4366   }
4367 
4368   SDValue VL = Op.getOperand(3);
4369   SDValue Mask = Op.getOperand(2);
4370 
4371   MVT M1VT = getLMUL1VT(ContainerVT);
4372   MVT XLenVT = Subtarget.getXLenVT();
4373   MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
4374 
4375   // FIXME: This is a VLMAX splat which might be too large and can prevent
4376   // vsetvli removal.
4377   SDValue StartSplat = DAG.getSplatVector(M1VT, DL, Op.getOperand(0));
4378   SDValue Reduction =
4379       DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
4380   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
4381                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
4382   if (!VecVT.isInteger())
4383     return Elt0;
4384   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
4385 }
4386 
4387 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
4388                                                    SelectionDAG &DAG) const {
4389   SDValue Vec = Op.getOperand(0);
4390   SDValue SubVec = Op.getOperand(1);
4391   MVT VecVT = Vec.getSimpleValueType();
4392   MVT SubVecVT = SubVec.getSimpleValueType();
4393 
4394   SDLoc DL(Op);
4395   MVT XLenVT = Subtarget.getXLenVT();
4396   unsigned OrigIdx = Op.getConstantOperandVal(2);
4397   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
4398 
4399   // We don't have the ability to slide mask vectors up indexed by their i1
4400   // elements; the smallest we can do is i8. Often we are able to bitcast to
4401   // equivalent i8 vectors. Note that when inserting a fixed-length vector
4402   // into a scalable one, we might not necessarily have enough scalable
4403   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
4404   if (SubVecVT.getVectorElementType() == MVT::i1 &&
4405       (OrigIdx != 0 || !Vec.isUndef())) {
4406     if (VecVT.getVectorMinNumElements() >= 8 &&
4407         SubVecVT.getVectorMinNumElements() >= 8) {
4408       assert(OrigIdx % 8 == 0 && "Invalid index");
4409       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
4410              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
4411              "Unexpected mask vector lowering");
4412       OrigIdx /= 8;
4413       SubVecVT =
4414           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
4415                            SubVecVT.isScalableVector());
4416       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
4417                                VecVT.isScalableVector());
4418       Vec = DAG.getBitcast(VecVT, Vec);
4419       SubVec = DAG.getBitcast(SubVecVT, SubVec);
4420     } else {
4421       // We can't slide this mask vector up indexed by its i1 elements.
4422       // This poses a problem when we wish to insert a scalable vector which
4423       // can't be re-expressed as a larger type. Just choose the slow path and
4424       // extend to a larger type, then truncate back down.
4425       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
4426       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
4427       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
4428       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
4429       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
4430                         Op.getOperand(2));
4431       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
4432       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
4433     }
4434   }
4435 
4436   // If the subvector vector is a fixed-length type, we cannot use subregister
4437   // manipulation to simplify the codegen; we don't know which register of a
4438   // LMUL group contains the specific subvector as we only know the minimum
4439   // register size. Therefore we must slide the vector group up the full
4440   // amount.
4441   if (SubVecVT.isFixedLengthVector()) {
4442     if (OrigIdx == 0 && Vec.isUndef())
4443       return Op;
4444     MVT ContainerVT = VecVT;
4445     if (VecVT.isFixedLengthVector()) {
4446       ContainerVT = getContainerForFixedLengthVector(VecVT);
4447       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4448     }
4449     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
4450                          DAG.getUNDEF(ContainerVT), SubVec,
4451                          DAG.getConstant(0, DL, XLenVT));
4452     SDValue Mask =
4453         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
4454     // Set the vector length to only the number of elements we care about. Note
4455     // that for slideup this includes the offset.
4456     SDValue VL =
4457         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
4458     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
4459     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
4460                                   SubVec, SlideupAmt, Mask, VL);
4461     if (VecVT.isFixedLengthVector())
4462       Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
4463     return DAG.getBitcast(Op.getValueType(), Slideup);
4464   }
4465 
4466   unsigned SubRegIdx, RemIdx;
4467   std::tie(SubRegIdx, RemIdx) =
4468       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
4469           VecVT, SubVecVT, OrigIdx, TRI);
4470 
4471   RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
4472   bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
4473                          SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
4474                          SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
4475 
4476   // 1. If the Idx has been completely eliminated and this subvector's size is
4477   // a vector register or a multiple thereof, or the surrounding elements are
4478   // undef, then this is a subvector insert which naturally aligns to a vector
4479   // register. These can easily be handled using subregister manipulation.
4480   // 2. If the subvector is smaller than a vector register, then the insertion
4481   // must preserve the undisturbed elements of the register. We do this by
4482   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
4483   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
4484   // subvector within the vector register, and an INSERT_SUBVECTOR of that
4485   // LMUL=1 type back into the larger vector (resolving to another subregister
4486   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
4487   // to avoid allocating a large register group to hold our subvector.
4488   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
4489     return Op;
4490 
4491   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
4492   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
4493   // (in our case undisturbed). This means we can set up a subvector insertion
4494   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
4495   // size of the subvector.
4496   MVT InterSubVT = VecVT;
4497   SDValue AlignedExtract = Vec;
4498   unsigned AlignedIdx = OrigIdx - RemIdx;
4499   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
4500     InterSubVT = getLMUL1VT(VecVT);
4501     // Extract a subvector equal to the nearest full vector register type. This
4502     // should resolve to a EXTRACT_SUBREG instruction.
4503     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
4504                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
4505   }
4506 
4507   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
4508   // For scalable vectors this must be further multiplied by vscale.
4509   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
4510 
4511   SDValue Mask, VL;
4512   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
4513 
4514   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
4515   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
4516   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
4517   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
4518 
4519   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
4520                        DAG.getUNDEF(InterSubVT), SubVec,
4521                        DAG.getConstant(0, DL, XLenVT));
4522 
4523   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
4524                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
4525 
4526   // If required, insert this subvector back into the correct vector register.
4527   // This should resolve to an INSERT_SUBREG instruction.
4528   if (VecVT.bitsGT(InterSubVT))
4529     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
4530                           DAG.getConstant(AlignedIdx, DL, XLenVT));
4531 
4532   // We might have bitcast from a mask type: cast back to the original type if
4533   // required.
4534   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
4535 }
4536 
4537 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
4538                                                     SelectionDAG &DAG) const {
4539   SDValue Vec = Op.getOperand(0);
4540   MVT SubVecVT = Op.getSimpleValueType();
4541   MVT VecVT = Vec.getSimpleValueType();
4542 
4543   SDLoc DL(Op);
4544   MVT XLenVT = Subtarget.getXLenVT();
4545   unsigned OrigIdx = Op.getConstantOperandVal(1);
4546   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
4547 
4548   // We don't have the ability to slide mask vectors down indexed by their i1
4549   // elements; the smallest we can do is i8. Often we are able to bitcast to
4550   // equivalent i8 vectors. Note that when extracting a fixed-length vector
4551   // from a scalable one, we might not necessarily have enough scalable
4552   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
4553   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
4554     if (VecVT.getVectorMinNumElements() >= 8 &&
4555         SubVecVT.getVectorMinNumElements() >= 8) {
4556       assert(OrigIdx % 8 == 0 && "Invalid index");
4557       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
4558              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
4559              "Unexpected mask vector lowering");
4560       OrigIdx /= 8;
4561       SubVecVT =
4562           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
4563                            SubVecVT.isScalableVector());
4564       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
4565                                VecVT.isScalableVector());
4566       Vec = DAG.getBitcast(VecVT, Vec);
4567     } else {
4568       // We can't slide this mask vector down, indexed by its i1 elements.
4569       // This poses a problem when we wish to extract a scalable vector which
4570       // can't be re-expressed as a larger type. Just choose the slow path and
4571       // extend to a larger type, then truncate back down.
4572       // TODO: We could probably improve this when extracting certain fixed
4573       // from fixed, where we can extract as i8 and shift the correct element
4574       // right to reach the desired subvector?
4575       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
4576       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
4577       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
4578       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
4579                         Op.getOperand(1));
4580       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
4581       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
4582     }
4583   }
4584 
4585   // If the subvector vector is a fixed-length type, we cannot use subregister
4586   // manipulation to simplify the codegen; we don't know which register of a
4587   // LMUL group contains the specific subvector as we only know the minimum
4588   // register size. Therefore we must slide the vector group down the full
4589   // amount.
4590   if (SubVecVT.isFixedLengthVector()) {
4591     // With an index of 0 this is a cast-like subvector, which can be performed
4592     // with subregister operations.
4593     if (OrigIdx == 0)
4594       return Op;
4595     MVT ContainerVT = VecVT;
4596     if (VecVT.isFixedLengthVector()) {
4597       ContainerVT = getContainerForFixedLengthVector(VecVT);
4598       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4599     }
4600     SDValue Mask =
4601         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
4602     // Set the vector length to only the number of elements we care about. This
4603     // avoids sliding down elements we're going to discard straight away.
4604     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
4605     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
4606     SDValue Slidedown =
4607         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
4608                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
4609     // Now we can use a cast-like subvector extract to get the result.
4610     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
4611                             DAG.getConstant(0, DL, XLenVT));
4612     return DAG.getBitcast(Op.getValueType(), Slidedown);
4613   }
4614 
4615   unsigned SubRegIdx, RemIdx;
4616   std::tie(SubRegIdx, RemIdx) =
4617       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
4618           VecVT, SubVecVT, OrigIdx, TRI);
4619 
4620   // If the Idx has been completely eliminated then this is a subvector extract
4621   // which naturally aligns to a vector register. These can easily be handled
4622   // using subregister manipulation.
4623   if (RemIdx == 0)
4624     return Op;
4625 
4626   // Else we must shift our vector register directly to extract the subvector.
4627   // Do this using VSLIDEDOWN.
4628 
4629   // If the vector type is an LMUL-group type, extract a subvector equal to the
4630   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
4631   // instruction.
4632   MVT InterSubVT = VecVT;
4633   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
4634     InterSubVT = getLMUL1VT(VecVT);
4635     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
4636                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
4637   }
4638 
4639   // Slide this vector register down by the desired number of elements in order
4640   // to place the desired subvector starting at element 0.
4641   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
4642   // For scalable vectors this must be further multiplied by vscale.
4643   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
4644 
4645   SDValue Mask, VL;
4646   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
4647   SDValue Slidedown =
4648       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
4649                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
4650 
4651   // Now the vector is in the right position, extract our final subvector. This
4652   // should resolve to a COPY.
4653   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
4654                           DAG.getConstant(0, DL, XLenVT));
4655 
4656   // We might have bitcast from a mask type: cast back to the original type if
4657   // required.
4658   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
4659 }
4660 
4661 // Lower step_vector to the vid instruction. Any non-identity step value must
4662 // be accounted for my manual expansion.
4663 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
4664                                               SelectionDAG &DAG) const {
4665   SDLoc DL(Op);
4666   MVT VT = Op.getSimpleValueType();
4667   MVT XLenVT = Subtarget.getXLenVT();
4668   SDValue Mask, VL;
4669   std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
4670   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
4671   uint64_t StepValImm = Op.getConstantOperandVal(0);
4672   if (StepValImm != 1) {
4673     if (isPowerOf2_64(StepValImm)) {
4674       SDValue StepVal =
4675           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
4676                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
4677       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
4678     } else {
4679       SDValue StepVal = lowerScalarSplat(
4680           DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT,
4681           DL, DAG, Subtarget);
4682       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
4683     }
4684   }
4685   return StepVec;
4686 }
4687 
4688 // Implement vector_reverse using vrgather.vv with indices determined by
4689 // subtracting the id of each element from (VLMAX-1). This will convert
4690 // the indices like so:
4691 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
4692 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
4693 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
4694                                                  SelectionDAG &DAG) const {
4695   SDLoc DL(Op);
4696   MVT VecVT = Op.getSimpleValueType();
4697   unsigned EltSize = VecVT.getScalarSizeInBits();
4698   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
4699 
4700   unsigned MaxVLMAX = 0;
4701   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
4702   if (VectorBitsMax != 0)
4703     MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
4704 
4705   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
4706   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
4707 
4708   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
4709   // to use vrgatherei16.vv.
4710   // TODO: It's also possible to use vrgatherei16.vv for other types to
4711   // decrease register width for the index calculation.
4712   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
4713     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
4714     // Reverse each half, then reassemble them in reverse order.
4715     // NOTE: It's also possible that after splitting that VLMAX no longer
4716     // requires vrgatherei16.vv.
4717     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
4718       SDValue Lo, Hi;
4719       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
4720       EVT LoVT, HiVT;
4721       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
4722       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
4723       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
4724       // Reassemble the low and high pieces reversed.
4725       // FIXME: This is a CONCAT_VECTORS.
4726       SDValue Res =
4727           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
4728                       DAG.getIntPtrConstant(0, DL));
4729       return DAG.getNode(
4730           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
4731           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
4732     }
4733 
4734     // Just promote the int type to i16 which will double the LMUL.
4735     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
4736     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
4737   }
4738 
4739   MVT XLenVT = Subtarget.getXLenVT();
4740   SDValue Mask, VL;
4741   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
4742 
4743   // Calculate VLMAX-1 for the desired SEW.
4744   unsigned MinElts = VecVT.getVectorMinNumElements();
4745   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
4746                               DAG.getConstant(MinElts, DL, XLenVT));
4747   SDValue VLMinus1 =
4748       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
4749 
4750   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
4751   bool IsRV32E64 =
4752       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
4753   SDValue SplatVL;
4754   if (!IsRV32E64)
4755     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
4756   else
4757     SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
4758 
4759   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
4760   SDValue Indices =
4761       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
4762 
4763   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
4764 }
4765 
4766 SDValue
4767 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
4768                                                      SelectionDAG &DAG) const {
4769   SDLoc DL(Op);
4770   auto *Load = cast<LoadSDNode>(Op);
4771 
4772   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
4773                                         Load->getMemoryVT(),
4774                                         *Load->getMemOperand()) &&
4775          "Expecting a correctly-aligned load");
4776 
4777   MVT VT = Op.getSimpleValueType();
4778   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4779 
4780   SDValue VL =
4781       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
4782 
4783   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4784   SDValue NewLoad = DAG.getMemIntrinsicNode(
4785       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
4786       Load->getMemoryVT(), Load->getMemOperand());
4787 
4788   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4789   return DAG.getMergeValues({Result, Load->getChain()}, DL);
4790 }
4791 
4792 SDValue
4793 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
4794                                                       SelectionDAG &DAG) const {
4795   SDLoc DL(Op);
4796   auto *Store = cast<StoreSDNode>(Op);
4797 
4798   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
4799                                         Store->getMemoryVT(),
4800                                         *Store->getMemOperand()) &&
4801          "Expecting a correctly-aligned store");
4802 
4803   SDValue StoreVal = Store->getValue();
4804   MVT VT = StoreVal.getSimpleValueType();
4805 
4806   // If the size less than a byte, we need to pad with zeros to make a byte.
4807   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
4808     VT = MVT::v8i1;
4809     StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
4810                            DAG.getConstant(0, DL, VT), StoreVal,
4811                            DAG.getIntPtrConstant(0, DL));
4812   }
4813 
4814   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4815 
4816   SDValue VL =
4817       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
4818 
4819   SDValue NewValue =
4820       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
4821   return DAG.getMemIntrinsicNode(
4822       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
4823       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
4824       Store->getMemoryVT(), Store->getMemOperand());
4825 }
4826 
4827 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
4828                                              SelectionDAG &DAG) const {
4829   SDLoc DL(Op);
4830   MVT VT = Op.getSimpleValueType();
4831 
4832   const auto *MemSD = cast<MemSDNode>(Op);
4833   EVT MemVT = MemSD->getMemoryVT();
4834   MachineMemOperand *MMO = MemSD->getMemOperand();
4835   SDValue Chain = MemSD->getChain();
4836   SDValue BasePtr = MemSD->getBasePtr();
4837 
4838   SDValue Mask, PassThru, VL;
4839   if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
4840     Mask = VPLoad->getMask();
4841     PassThru = DAG.getUNDEF(VT);
4842     VL = VPLoad->getVectorLength();
4843   } else {
4844     const auto *MLoad = cast<MaskedLoadSDNode>(Op);
4845     Mask = MLoad->getMask();
4846     PassThru = MLoad->getPassThru();
4847   }
4848 
4849   MVT XLenVT = Subtarget.getXLenVT();
4850 
4851   MVT ContainerVT = VT;
4852   if (VT.isFixedLengthVector()) {
4853     ContainerVT = getContainerForFixedLengthVector(VT);
4854     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4855 
4856     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4857     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4858   }
4859 
4860   if (!VL)
4861     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
4862 
4863   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4864   SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT);
4865   SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
4866   SDValue Ops[] = {Chain, IntID, PassThru, BasePtr, Mask, VL, Policy};
4867   SDValue Result =
4868       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
4869   Chain = Result.getValue(1);
4870 
4871   if (VT.isFixedLengthVector())
4872     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4873 
4874   return DAG.getMergeValues({Result, Chain}, DL);
4875 }
4876 
4877 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
4878                                               SelectionDAG &DAG) const {
4879   SDLoc DL(Op);
4880 
4881   const auto *MemSD = cast<MemSDNode>(Op);
4882   EVT MemVT = MemSD->getMemoryVT();
4883   MachineMemOperand *MMO = MemSD->getMemOperand();
4884   SDValue Chain = MemSD->getChain();
4885   SDValue BasePtr = MemSD->getBasePtr();
4886   SDValue Val, Mask, VL;
4887 
4888   if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
4889     Val = VPStore->getValue();
4890     Mask = VPStore->getMask();
4891     VL = VPStore->getVectorLength();
4892   } else {
4893     const auto *MStore = cast<MaskedStoreSDNode>(Op);
4894     Val = MStore->getValue();
4895     Mask = MStore->getMask();
4896   }
4897 
4898   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4899 
4900   MVT VT = Val.getSimpleValueType();
4901   MVT XLenVT = Subtarget.getXLenVT();
4902 
4903   MVT ContainerVT = VT;
4904   if (VT.isFixedLengthVector()) {
4905     ContainerVT = getContainerForFixedLengthVector(VT);
4906 
4907     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
4908     if (!IsUnmasked) {
4909       MVT MaskVT =
4910           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4911       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4912     }
4913   }
4914 
4915   if (!VL)
4916     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
4917 
4918   unsigned IntID =
4919       IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
4920   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
4921   Ops.push_back(Val);
4922   Ops.push_back(BasePtr);
4923   if (!IsUnmasked)
4924     Ops.push_back(Mask);
4925   Ops.push_back(VL);
4926 
4927   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
4928                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
4929 }
4930 
4931 SDValue
4932 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
4933                                                       SelectionDAG &DAG) const {
4934   MVT InVT = Op.getOperand(0).getSimpleValueType();
4935   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
4936 
4937   MVT VT = Op.getSimpleValueType();
4938 
4939   SDValue Op1 =
4940       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
4941   SDValue Op2 =
4942       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
4943 
4944   SDLoc DL(Op);
4945   SDValue VL =
4946       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
4947 
4948   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4949   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
4950 
4951   SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
4952                             Op.getOperand(2), Mask, VL);
4953 
4954   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
4955 }
4956 
4957 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
4958     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
4959   MVT VT = Op.getSimpleValueType();
4960 
4961   if (VT.getVectorElementType() == MVT::i1)
4962     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
4963 
4964   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
4965 }
4966 
4967 SDValue
4968 RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op,
4969                                                       SelectionDAG &DAG) const {
4970   unsigned Opc;
4971   switch (Op.getOpcode()) {
4972   default: llvm_unreachable("Unexpected opcode!");
4973   case ISD::SHL: Opc = RISCVISD::SHL_VL; break;
4974   case ISD::SRA: Opc = RISCVISD::SRA_VL; break;
4975   case ISD::SRL: Opc = RISCVISD::SRL_VL; break;
4976   }
4977 
4978   return lowerToScalableOp(Op, DAG, Opc);
4979 }
4980 
4981 // Lower vector ABS to smax(X, sub(0, X)).
4982 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
4983   SDLoc DL(Op);
4984   MVT VT = Op.getSimpleValueType();
4985   SDValue X = Op.getOperand(0);
4986 
4987   assert(VT.isFixedLengthVector() && "Unexpected type");
4988 
4989   MVT ContainerVT = getContainerForFixedLengthVector(VT);
4990   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
4991 
4992   SDValue Mask, VL;
4993   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4994 
4995   SDValue SplatZero =
4996       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4997                   DAG.getConstant(0, DL, Subtarget.getXLenVT()));
4998   SDValue NegX =
4999       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
5000   SDValue Max =
5001       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
5002 
5003   return convertFromScalableVector(VT, Max, DAG, Subtarget);
5004 }
5005 
5006 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
5007     SDValue Op, SelectionDAG &DAG) const {
5008   SDLoc DL(Op);
5009   MVT VT = Op.getSimpleValueType();
5010   SDValue Mag = Op.getOperand(0);
5011   SDValue Sign = Op.getOperand(1);
5012   assert(Mag.getValueType() == Sign.getValueType() &&
5013          "Can only handle COPYSIGN with matching types.");
5014 
5015   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5016   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
5017   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
5018 
5019   SDValue Mask, VL;
5020   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5021 
5022   SDValue CopySign =
5023       DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
5024 
5025   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
5026 }
5027 
5028 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
5029     SDValue Op, SelectionDAG &DAG) const {
5030   MVT VT = Op.getSimpleValueType();
5031   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5032 
5033   MVT I1ContainerVT =
5034       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5035 
5036   SDValue CC =
5037       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
5038   SDValue Op1 =
5039       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
5040   SDValue Op2 =
5041       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
5042 
5043   SDLoc DL(Op);
5044   SDValue Mask, VL;
5045   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5046 
5047   SDValue Select =
5048       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
5049 
5050   return convertFromScalableVector(VT, Select, DAG, Subtarget);
5051 }
5052 
5053 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
5054                                                unsigned NewOpc,
5055                                                bool HasMask) const {
5056   MVT VT = Op.getSimpleValueType();
5057   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5058 
5059   // Create list of operands by converting existing ones to scalable types.
5060   SmallVector<SDValue, 6> Ops;
5061   for (const SDValue &V : Op->op_values()) {
5062     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
5063 
5064     // Pass through non-vector operands.
5065     if (!V.getValueType().isVector()) {
5066       Ops.push_back(V);
5067       continue;
5068     }
5069 
5070     // "cast" fixed length vector to a scalable vector.
5071     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
5072            "Only fixed length vectors are supported!");
5073     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
5074   }
5075 
5076   SDLoc DL(Op);
5077   SDValue Mask, VL;
5078   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5079   if (HasMask)
5080     Ops.push_back(Mask);
5081   Ops.push_back(VL);
5082 
5083   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
5084   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
5085 }
5086 
5087 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
5088 // * Operands of each node are assumed to be in the same order.
5089 // * The EVL operand is promoted from i32 to i64 on RV64.
5090 // * Fixed-length vectors are converted to their scalable-vector container
5091 //   types.
5092 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
5093                                        unsigned RISCVISDOpc) const {
5094   SDLoc DL(Op);
5095   MVT VT = Op.getSimpleValueType();
5096   SmallVector<SDValue, 4> Ops;
5097 
5098   for (const auto &OpIdx : enumerate(Op->ops())) {
5099     SDValue V = OpIdx.value();
5100     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
5101     // Pass through operands which aren't fixed-length vectors.
5102     if (!V.getValueType().isFixedLengthVector()) {
5103       Ops.push_back(V);
5104       continue;
5105     }
5106     // "cast" fixed length vector to a scalable vector.
5107     MVT OpVT = V.getSimpleValueType();
5108     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
5109     assert(useRVVForFixedLengthVectorVT(OpVT) &&
5110            "Only fixed length vectors are supported!");
5111     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
5112   }
5113 
5114   if (!VT.isFixedLengthVector())
5115     return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
5116 
5117   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5118 
5119   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
5120 
5121   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
5122 }
5123 
5124 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
5125 // matched to a RVV indexed load. The RVV indexed load instructions only
5126 // support the "unsigned unscaled" addressing mode; indices are implicitly
5127 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
5128 // signed or scaled indexing is extended to the XLEN value type and scaled
5129 // accordingly.
5130 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
5131                                                SelectionDAG &DAG) const {
5132   SDLoc DL(Op);
5133   MVT VT = Op.getSimpleValueType();
5134 
5135   const auto *MemSD = cast<MemSDNode>(Op.getNode());
5136   EVT MemVT = MemSD->getMemoryVT();
5137   MachineMemOperand *MMO = MemSD->getMemOperand();
5138   SDValue Chain = MemSD->getChain();
5139   SDValue BasePtr = MemSD->getBasePtr();
5140 
5141   ISD::LoadExtType LoadExtType;
5142   SDValue Index, Mask, PassThru, VL;
5143 
5144   if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
5145     Index = VPGN->getIndex();
5146     Mask = VPGN->getMask();
5147     PassThru = DAG.getUNDEF(VT);
5148     VL = VPGN->getVectorLength();
5149     // VP doesn't support extending loads.
5150     LoadExtType = ISD::NON_EXTLOAD;
5151   } else {
5152     // Else it must be a MGATHER.
5153     auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
5154     Index = MGN->getIndex();
5155     Mask = MGN->getMask();
5156     PassThru = MGN->getPassThru();
5157     LoadExtType = MGN->getExtensionType();
5158   }
5159 
5160   MVT IndexVT = Index.getSimpleValueType();
5161   MVT XLenVT = Subtarget.getXLenVT();
5162 
5163   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
5164          "Unexpected VTs!");
5165   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
5166   // Targets have to explicitly opt-in for extending vector loads.
5167   assert(LoadExtType == ISD::NON_EXTLOAD &&
5168          "Unexpected extending MGATHER/VP_GATHER");
5169   (void)LoadExtType;
5170 
5171   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
5172   // the selection of the masked intrinsics doesn't do this for us.
5173   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5174 
5175   MVT ContainerVT = VT;
5176   if (VT.isFixedLengthVector()) {
5177     // We need to use the larger of the result and index type to determine the
5178     // scalable type to use so we don't increase LMUL for any operand/result.
5179     if (VT.bitsGE(IndexVT)) {
5180       ContainerVT = getContainerForFixedLengthVector(VT);
5181       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
5182                                  ContainerVT.getVectorElementCount());
5183     } else {
5184       IndexVT = getContainerForFixedLengthVector(IndexVT);
5185       ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
5186                                      IndexVT.getVectorElementCount());
5187     }
5188 
5189     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
5190 
5191     if (!IsUnmasked) {
5192       MVT MaskVT =
5193           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5194       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5195       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
5196     }
5197   }
5198 
5199   if (!VL)
5200     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5201 
5202   unsigned IntID =
5203       IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
5204   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5205   if (!IsUnmasked)
5206     Ops.push_back(PassThru);
5207   Ops.push_back(BasePtr);
5208   Ops.push_back(Index);
5209   if (!IsUnmasked)
5210     Ops.push_back(Mask);
5211   Ops.push_back(VL);
5212   if (!IsUnmasked)
5213     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
5214 
5215   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5216   SDValue Result =
5217       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
5218   Chain = Result.getValue(1);
5219 
5220   if (VT.isFixedLengthVector())
5221     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
5222 
5223   return DAG.getMergeValues({Result, Chain}, DL);
5224 }
5225 
5226 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
5227 // matched to a RVV indexed store. The RVV indexed store instructions only
5228 // support the "unsigned unscaled" addressing mode; indices are implicitly
5229 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
5230 // signed or scaled indexing is extended to the XLEN value type and scaled
5231 // accordingly.
5232 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
5233                                                 SelectionDAG &DAG) const {
5234   SDLoc DL(Op);
5235   const auto *MemSD = cast<MemSDNode>(Op.getNode());
5236   EVT MemVT = MemSD->getMemoryVT();
5237   MachineMemOperand *MMO = MemSD->getMemOperand();
5238   SDValue Chain = MemSD->getChain();
5239   SDValue BasePtr = MemSD->getBasePtr();
5240 
5241   bool IsTruncatingStore = false;
5242   SDValue Index, Mask, Val, VL;
5243 
5244   if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
5245     Index = VPSN->getIndex();
5246     Mask = VPSN->getMask();
5247     Val = VPSN->getValue();
5248     VL = VPSN->getVectorLength();
5249     // VP doesn't support truncating stores.
5250     IsTruncatingStore = false;
5251   } else {
5252     // Else it must be a MSCATTER.
5253     auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
5254     Index = MSN->getIndex();
5255     Mask = MSN->getMask();
5256     Val = MSN->getValue();
5257     IsTruncatingStore = MSN->isTruncatingStore();
5258   }
5259 
5260   MVT VT = Val.getSimpleValueType();
5261   MVT IndexVT = Index.getSimpleValueType();
5262   MVT XLenVT = Subtarget.getXLenVT();
5263 
5264   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
5265          "Unexpected VTs!");
5266   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
5267   // Targets have to explicitly opt-in for extending vector loads and
5268   // truncating vector stores.
5269   assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
5270   (void)IsTruncatingStore;
5271 
5272   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
5273   // the selection of the masked intrinsics doesn't do this for us.
5274   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5275 
5276   MVT ContainerVT = VT;
5277   if (VT.isFixedLengthVector()) {
5278     // We need to use the larger of the value and index type to determine the
5279     // scalable type to use so we don't increase LMUL for any operand/result.
5280     if (VT.bitsGE(IndexVT)) {
5281       ContainerVT = getContainerForFixedLengthVector(VT);
5282       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
5283                                  ContainerVT.getVectorElementCount());
5284     } else {
5285       IndexVT = getContainerForFixedLengthVector(IndexVT);
5286       ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
5287                                      IndexVT.getVectorElementCount());
5288     }
5289 
5290     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
5291     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
5292 
5293     if (!IsUnmasked) {
5294       MVT MaskVT =
5295           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5296       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5297     }
5298   }
5299 
5300   if (!VL)
5301     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5302 
5303   unsigned IntID =
5304       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
5305   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5306   Ops.push_back(Val);
5307   Ops.push_back(BasePtr);
5308   Ops.push_back(Index);
5309   if (!IsUnmasked)
5310     Ops.push_back(Mask);
5311   Ops.push_back(VL);
5312 
5313   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
5314                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
5315 }
5316 
5317 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
5318                                                SelectionDAG &DAG) const {
5319   const MVT XLenVT = Subtarget.getXLenVT();
5320   SDLoc DL(Op);
5321   SDValue Chain = Op->getOperand(0);
5322   SDValue SysRegNo = DAG.getConstant(
5323       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
5324   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
5325   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
5326 
5327   // Encoding used for rounding mode in RISCV differs from that used in
5328   // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
5329   // table, which consists of a sequence of 4-bit fields, each representing
5330   // corresponding FLT_ROUNDS mode.
5331   static const int Table =
5332       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
5333       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
5334       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
5335       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
5336       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
5337 
5338   SDValue Shift =
5339       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
5340   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
5341                                 DAG.getConstant(Table, DL, XLenVT), Shift);
5342   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
5343                                DAG.getConstant(7, DL, XLenVT));
5344 
5345   return DAG.getMergeValues({Masked, Chain}, DL);
5346 }
5347 
5348 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
5349                                                SelectionDAG &DAG) const {
5350   const MVT XLenVT = Subtarget.getXLenVT();
5351   SDLoc DL(Op);
5352   SDValue Chain = Op->getOperand(0);
5353   SDValue RMValue = Op->getOperand(1);
5354   SDValue SysRegNo = DAG.getConstant(
5355       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
5356 
5357   // Encoding used for rounding mode in RISCV differs from that used in
5358   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
5359   // a table, which consists of a sequence of 4-bit fields, each representing
5360   // corresponding RISCV mode.
5361   static const unsigned Table =
5362       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
5363       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
5364       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
5365       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
5366       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
5367 
5368   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
5369                               DAG.getConstant(2, DL, XLenVT));
5370   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
5371                                 DAG.getConstant(Table, DL, XLenVT), Shift);
5372   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
5373                         DAG.getConstant(0x7, DL, XLenVT));
5374   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
5375                      RMValue);
5376 }
5377 
5378 // Returns the opcode of the target-specific SDNode that implements the 32-bit
5379 // form of the given Opcode.
5380 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
5381   switch (Opcode) {
5382   default:
5383     llvm_unreachable("Unexpected opcode");
5384   case ISD::SHL:
5385     return RISCVISD::SLLW;
5386   case ISD::SRA:
5387     return RISCVISD::SRAW;
5388   case ISD::SRL:
5389     return RISCVISD::SRLW;
5390   case ISD::SDIV:
5391     return RISCVISD::DIVW;
5392   case ISD::UDIV:
5393     return RISCVISD::DIVUW;
5394   case ISD::UREM:
5395     return RISCVISD::REMUW;
5396   case ISD::ROTL:
5397     return RISCVISD::ROLW;
5398   case ISD::ROTR:
5399     return RISCVISD::RORW;
5400   case RISCVISD::GREV:
5401     return RISCVISD::GREVW;
5402   case RISCVISD::GORC:
5403     return RISCVISD::GORCW;
5404   }
5405 }
5406 
5407 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
5408 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
5409 // otherwise be promoted to i64, making it difficult to select the
5410 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
5411 // type i8/i16/i32 is lost.
5412 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
5413                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
5414   SDLoc DL(N);
5415   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
5416   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5417   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
5418   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
5419   // ReplaceNodeResults requires we maintain the same type for the return value.
5420   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
5421 }
5422 
5423 // Converts the given 32-bit operation to a i64 operation with signed extension
5424 // semantic to reduce the signed extension instructions.
5425 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
5426   SDLoc DL(N);
5427   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5428   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5429   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
5430   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
5431                                DAG.getValueType(MVT::i32));
5432   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
5433 }
5434 
5435 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
5436                                              SmallVectorImpl<SDValue> &Results,
5437                                              SelectionDAG &DAG) const {
5438   SDLoc DL(N);
5439   switch (N->getOpcode()) {
5440   default:
5441     llvm_unreachable("Don't know how to custom type legalize this operation!");
5442   case ISD::STRICT_FP_TO_SINT:
5443   case ISD::STRICT_FP_TO_UINT:
5444   case ISD::FP_TO_SINT:
5445   case ISD::FP_TO_UINT: {
5446     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5447            "Unexpected custom legalisation");
5448     bool IsStrict = N->isStrictFPOpcode();
5449     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
5450                     N->getOpcode() == ISD::STRICT_FP_TO_SINT;
5451     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
5452     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
5453         TargetLowering::TypeSoftenFloat) {
5454       // FIXME: Support strict FP.
5455       if (IsStrict)
5456         return;
5457       if (!isTypeLegal(Op0.getValueType()))
5458         return;
5459       unsigned Opc =
5460           IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64;
5461       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0);
5462       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5463       return;
5464     }
5465     // If the FP type needs to be softened, emit a library call using the 'si'
5466     // version. If we left it to default legalization we'd end up with 'di'. If
5467     // the FP type doesn't need to be softened just let generic type
5468     // legalization promote the result type.
5469     RTLIB::Libcall LC;
5470     if (IsSigned)
5471       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
5472     else
5473       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
5474     MakeLibCallOptions CallOptions;
5475     EVT OpVT = Op0.getValueType();
5476     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
5477     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
5478     SDValue Result;
5479     std::tie(Result, Chain) =
5480         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
5481     Results.push_back(Result);
5482     if (IsStrict)
5483       Results.push_back(Chain);
5484     break;
5485   }
5486   case ISD::READCYCLECOUNTER: {
5487     assert(!Subtarget.is64Bit() &&
5488            "READCYCLECOUNTER only has custom type legalization on riscv32");
5489 
5490     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
5491     SDValue RCW =
5492         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
5493 
5494     Results.push_back(
5495         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
5496     Results.push_back(RCW.getValue(2));
5497     break;
5498   }
5499   case ISD::MUL: {
5500     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
5501     unsigned XLen = Subtarget.getXLen();
5502     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
5503     if (Size > XLen) {
5504       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
5505       SDValue LHS = N->getOperand(0);
5506       SDValue RHS = N->getOperand(1);
5507       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
5508 
5509       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
5510       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
5511       // We need exactly one side to be unsigned.
5512       if (LHSIsU == RHSIsU)
5513         return;
5514 
5515       auto MakeMULPair = [&](SDValue S, SDValue U) {
5516         MVT XLenVT = Subtarget.getXLenVT();
5517         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
5518         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
5519         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
5520         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
5521         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
5522       };
5523 
5524       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
5525       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
5526 
5527       // The other operand should be signed, but still prefer MULH when
5528       // possible.
5529       if (RHSIsU && LHSIsS && !RHSIsS)
5530         Results.push_back(MakeMULPair(LHS, RHS));
5531       else if (LHSIsU && RHSIsS && !LHSIsS)
5532         Results.push_back(MakeMULPair(RHS, LHS));
5533 
5534       return;
5535     }
5536     LLVM_FALLTHROUGH;
5537   }
5538   case ISD::ADD:
5539   case ISD::SUB:
5540     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5541            "Unexpected custom legalisation");
5542     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
5543     break;
5544   case ISD::SHL:
5545   case ISD::SRA:
5546   case ISD::SRL:
5547     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5548            "Unexpected custom legalisation");
5549     if (N->getOperand(1).getOpcode() != ISD::Constant) {
5550       Results.push_back(customLegalizeToWOp(N, DAG));
5551       break;
5552     }
5553 
5554     // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
5555     // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
5556     // shift amount.
5557     if (N->getOpcode() == ISD::SHL) {
5558       SDLoc DL(N);
5559       SDValue NewOp0 =
5560           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5561       SDValue NewOp1 =
5562           DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
5563       SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
5564       SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
5565                                    DAG.getValueType(MVT::i32));
5566       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
5567     }
5568 
5569     break;
5570   case ISD::ROTL:
5571   case ISD::ROTR:
5572     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5573            "Unexpected custom legalisation");
5574     Results.push_back(customLegalizeToWOp(N, DAG));
5575     break;
5576   case ISD::CTTZ:
5577   case ISD::CTTZ_ZERO_UNDEF:
5578   case ISD::CTLZ:
5579   case ISD::CTLZ_ZERO_UNDEF: {
5580     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5581            "Unexpected custom legalisation");
5582 
5583     SDValue NewOp0 =
5584         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5585     bool IsCTZ =
5586         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
5587     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
5588     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
5589     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5590     return;
5591   }
5592   case ISD::SDIV:
5593   case ISD::UDIV:
5594   case ISD::UREM: {
5595     MVT VT = N->getSimpleValueType(0);
5596     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
5597            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
5598            "Unexpected custom legalisation");
5599     // Don't promote division/remainder by constant since we should expand those
5600     // to multiply by magic constant.
5601     // FIXME: What if the expansion is disabled for minsize.
5602     if (N->getOperand(1).getOpcode() == ISD::Constant)
5603       return;
5604 
5605     // If the input is i32, use ANY_EXTEND since the W instructions don't read
5606     // the upper 32 bits. For other types we need to sign or zero extend
5607     // based on the opcode.
5608     unsigned ExtOpc = ISD::ANY_EXTEND;
5609     if (VT != MVT::i32)
5610       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
5611                                            : ISD::ZERO_EXTEND;
5612 
5613     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
5614     break;
5615   }
5616   case ISD::UADDO:
5617   case ISD::USUBO: {
5618     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5619            "Unexpected custom legalisation");
5620     bool IsAdd = N->getOpcode() == ISD::UADDO;
5621     // Create an ADDW or SUBW.
5622     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5623     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5624     SDValue Res =
5625         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5626     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
5627                       DAG.getValueType(MVT::i32));
5628 
5629     // Sign extend the LHS and perform an unsigned compare with the ADDW result.
5630     // Since the inputs are sign extended from i32, this is equivalent to
5631     // comparing the lower 32 bits.
5632     LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
5633     SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
5634                                     IsAdd ? ISD::SETULT : ISD::SETUGT);
5635 
5636     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5637     Results.push_back(Overflow);
5638     return;
5639   }
5640   case ISD::UADDSAT:
5641   case ISD::USUBSAT: {
5642     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5643            "Unexpected custom legalisation");
5644     if (Subtarget.hasStdExtZbb()) {
5645       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5646       // sign extend allows overflow of the lower 32 bits to be detected on
5647       // the promoted size.
5648       SDValue LHS =
5649           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
5650       SDValue RHS =
5651           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
5652       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
5653       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5654       return;
5655     }
5656 
5657     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
5658     // promotion for UADDO/USUBO.
5659     Results.push_back(expandAddSubSat(N, DAG));
5660     return;
5661   }
5662   case ISD::BITCAST: {
5663     EVT VT = N->getValueType(0);
5664     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
5665     SDValue Op0 = N->getOperand(0);
5666     EVT Op0VT = Op0.getValueType();
5667     MVT XLenVT = Subtarget.getXLenVT();
5668     if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) {
5669       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
5670       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
5671     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
5672                Subtarget.hasStdExtF()) {
5673       SDValue FPConv =
5674           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
5675       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
5676     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
5677                isTypeLegal(Op0VT)) {
5678       // Custom-legalize bitcasts from fixed-length vector types to illegal
5679       // scalar types in order to improve codegen. Bitcast the vector to a
5680       // one-element vector type whose element type is the same as the result
5681       // type, and extract the first element.
5682       LLVMContext &Context = *DAG.getContext();
5683       SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
5684       Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
5685                                     DAG.getConstant(0, DL, XLenVT)));
5686     }
5687     break;
5688   }
5689   case RISCVISD::GREV:
5690   case RISCVISD::GORC: {
5691     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5692            "Unexpected custom legalisation");
5693     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
5694     // This is similar to customLegalizeToWOp, except that we pass the second
5695     // operand (a TargetConstant) straight through: it is already of type
5696     // XLenVT.
5697     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
5698     SDValue NewOp0 =
5699         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5700     SDValue NewOp1 =
5701         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5702     SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
5703     // ReplaceNodeResults requires we maintain the same type for the return
5704     // value.
5705     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
5706     break;
5707   }
5708   case RISCVISD::SHFL: {
5709     // There is no SHFLIW instruction, but we can just promote the operation.
5710     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5711            "Unexpected custom legalisation");
5712     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
5713     SDValue NewOp0 =
5714         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5715     SDValue NewOp1 =
5716         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5717     SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1);
5718     // ReplaceNodeResults requires we maintain the same type for the return
5719     // value.
5720     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
5721     break;
5722   }
5723   case ISD::BSWAP:
5724   case ISD::BITREVERSE: {
5725     MVT VT = N->getSimpleValueType(0);
5726     MVT XLenVT = Subtarget.getXLenVT();
5727     assert((VT == MVT::i8 || VT == MVT::i16 ||
5728             (VT == MVT::i32 && Subtarget.is64Bit())) &&
5729            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
5730     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
5731     unsigned Imm = VT.getSizeInBits() - 1;
5732     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
5733     if (N->getOpcode() == ISD::BSWAP)
5734       Imm &= ~0x7U;
5735     unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV;
5736     SDValue GREVI =
5737         DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT));
5738     // ReplaceNodeResults requires we maintain the same type for the return
5739     // value.
5740     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI));
5741     break;
5742   }
5743   case ISD::FSHL:
5744   case ISD::FSHR: {
5745     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5746            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
5747     SDValue NewOp0 =
5748         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5749     SDValue NewOp1 =
5750         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5751     SDValue NewOp2 =
5752         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
5753     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
5754     // Mask the shift amount to 5 bits.
5755     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
5756                          DAG.getConstant(0x1f, DL, MVT::i64));
5757     unsigned Opc =
5758         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
5759     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
5760     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
5761     break;
5762   }
5763   case ISD::EXTRACT_VECTOR_ELT: {
5764     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
5765     // type is illegal (currently only vXi64 RV32).
5766     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
5767     // transferred to the destination register. We issue two of these from the
5768     // upper- and lower- halves of the SEW-bit vector element, slid down to the
5769     // first element.
5770     SDValue Vec = N->getOperand(0);
5771     SDValue Idx = N->getOperand(1);
5772 
5773     // The vector type hasn't been legalized yet so we can't issue target
5774     // specific nodes if it needs legalization.
5775     // FIXME: We would manually legalize if it's important.
5776     if (!isTypeLegal(Vec.getValueType()))
5777       return;
5778 
5779     MVT VecVT = Vec.getSimpleValueType();
5780 
5781     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
5782            VecVT.getVectorElementType() == MVT::i64 &&
5783            "Unexpected EXTRACT_VECTOR_ELT legalization");
5784 
5785     // If this is a fixed vector, we need to convert it to a scalable vector.
5786     MVT ContainerVT = VecVT;
5787     if (VecVT.isFixedLengthVector()) {
5788       ContainerVT = getContainerForFixedLengthVector(VecVT);
5789       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5790     }
5791 
5792     MVT XLenVT = Subtarget.getXLenVT();
5793 
5794     // Use a VL of 1 to avoid processing more elements than we need.
5795     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5796     SDValue VL = DAG.getConstant(1, DL, XLenVT);
5797     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
5798 
5799     // Unless the index is known to be 0, we must slide the vector down to get
5800     // the desired element into index 0.
5801     if (!isNullConstant(Idx)) {
5802       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
5803                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
5804     }
5805 
5806     // Extract the lower XLEN bits of the correct vector element.
5807     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
5808 
5809     // To extract the upper XLEN bits of the vector element, shift the first
5810     // element right by 32 bits and re-extract the lower XLEN bits.
5811     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
5812                                      DAG.getConstant(32, DL, XLenVT), VL);
5813     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
5814                                  ThirtyTwoV, Mask, VL);
5815 
5816     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
5817 
5818     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
5819     break;
5820   }
5821   case ISD::INTRINSIC_WO_CHAIN: {
5822     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
5823     switch (IntNo) {
5824     default:
5825       llvm_unreachable(
5826           "Don't know how to custom type legalize this intrinsic!");
5827     case Intrinsic::riscv_orc_b: {
5828       // Lower to the GORCI encoding for orc.b with the operand extended.
5829       SDValue NewOp =
5830           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5831       // If Zbp is enabled, use GORCIW which will sign extend the result.
5832       unsigned Opc =
5833           Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC;
5834       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp,
5835                                 DAG.getConstant(7, DL, MVT::i64));
5836       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5837       return;
5838     }
5839     case Intrinsic::riscv_grev:
5840     case Intrinsic::riscv_gorc: {
5841       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5842              "Unexpected custom legalisation");
5843       SDValue NewOp1 =
5844           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5845       SDValue NewOp2 =
5846           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
5847       unsigned Opc =
5848           IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW;
5849       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
5850       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5851       break;
5852     }
5853     case Intrinsic::riscv_shfl:
5854     case Intrinsic::riscv_unshfl: {
5855       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5856              "Unexpected custom legalisation");
5857       SDValue NewOp1 =
5858           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5859       SDValue NewOp2 =
5860           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
5861       unsigned Opc =
5862           IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
5863       if (isa<ConstantSDNode>(N->getOperand(2))) {
5864         NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
5865                              DAG.getConstant(0xf, DL, MVT::i64));
5866         Opc =
5867             IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
5868       }
5869       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
5870       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5871       break;
5872     }
5873     case Intrinsic::riscv_bcompress:
5874     case Intrinsic::riscv_bdecompress: {
5875       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5876              "Unexpected custom legalisation");
5877       SDValue NewOp1 =
5878           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5879       SDValue NewOp2 =
5880           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
5881       unsigned Opc = IntNo == Intrinsic::riscv_bcompress
5882                          ? RISCVISD::BCOMPRESSW
5883                          : RISCVISD::BDECOMPRESSW;
5884       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
5885       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
5886       break;
5887     }
5888     case Intrinsic::riscv_vmv_x_s: {
5889       EVT VT = N->getValueType(0);
5890       MVT XLenVT = Subtarget.getXLenVT();
5891       if (VT.bitsLT(XLenVT)) {
5892         // Simple case just extract using vmv.x.s and truncate.
5893         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
5894                                       Subtarget.getXLenVT(), N->getOperand(1));
5895         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
5896         return;
5897       }
5898 
5899       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
5900              "Unexpected custom legalization");
5901 
5902       // We need to do the move in two steps.
5903       SDValue Vec = N->getOperand(1);
5904       MVT VecVT = Vec.getSimpleValueType();
5905 
5906       // First extract the lower XLEN bits of the element.
5907       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
5908 
5909       // To extract the upper XLEN bits of the vector element, shift the first
5910       // element right by 32 bits and re-extract the lower XLEN bits.
5911       SDValue VL = DAG.getConstant(1, DL, XLenVT);
5912       MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
5913       SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
5914       SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
5915                                        DAG.getConstant(32, DL, XLenVT), VL);
5916       SDValue LShr32 =
5917           DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
5918       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
5919 
5920       Results.push_back(
5921           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
5922       break;
5923     }
5924     }
5925     break;
5926   }
5927   case ISD::VECREDUCE_ADD:
5928   case ISD::VECREDUCE_AND:
5929   case ISD::VECREDUCE_OR:
5930   case ISD::VECREDUCE_XOR:
5931   case ISD::VECREDUCE_SMAX:
5932   case ISD::VECREDUCE_UMAX:
5933   case ISD::VECREDUCE_SMIN:
5934   case ISD::VECREDUCE_UMIN:
5935     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
5936       Results.push_back(V);
5937     break;
5938   case ISD::VP_REDUCE_ADD:
5939   case ISD::VP_REDUCE_AND:
5940   case ISD::VP_REDUCE_OR:
5941   case ISD::VP_REDUCE_XOR:
5942   case ISD::VP_REDUCE_SMAX:
5943   case ISD::VP_REDUCE_UMAX:
5944   case ISD::VP_REDUCE_SMIN:
5945   case ISD::VP_REDUCE_UMIN:
5946     if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
5947       Results.push_back(V);
5948     break;
5949   case ISD::FLT_ROUNDS_: {
5950     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
5951     SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
5952     Results.push_back(Res.getValue(0));
5953     Results.push_back(Res.getValue(1));
5954     break;
5955   }
5956   }
5957 }
5958 
5959 // A structure to hold one of the bit-manipulation patterns below. Together, a
5960 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
5961 //   (or (and (shl x, 1), 0xAAAAAAAA),
5962 //       (and (srl x, 1), 0x55555555))
5963 struct RISCVBitmanipPat {
5964   SDValue Op;
5965   unsigned ShAmt;
5966   bool IsSHL;
5967 
5968   bool formsPairWith(const RISCVBitmanipPat &Other) const {
5969     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
5970   }
5971 };
5972 
5973 // Matches patterns of the form
5974 //   (and (shl x, C2), (C1 << C2))
5975 //   (and (srl x, C2), C1)
5976 //   (shl (and x, C1), C2)
5977 //   (srl (and x, (C1 << C2)), C2)
5978 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
5979 // The expected masks for each shift amount are specified in BitmanipMasks where
5980 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
5981 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
5982 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
5983 // XLen is 64.
5984 static Optional<RISCVBitmanipPat>
5985 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
5986   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
5987          "Unexpected number of masks");
5988   Optional<uint64_t> Mask;
5989   // Optionally consume a mask around the shift operation.
5990   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
5991     Mask = Op.getConstantOperandVal(1);
5992     Op = Op.getOperand(0);
5993   }
5994   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
5995     return None;
5996   bool IsSHL = Op.getOpcode() == ISD::SHL;
5997 
5998   if (!isa<ConstantSDNode>(Op.getOperand(1)))
5999     return None;
6000   uint64_t ShAmt = Op.getConstantOperandVal(1);
6001 
6002   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
6003   if (ShAmt >= Width || !isPowerOf2_64(ShAmt))
6004     return None;
6005   // If we don't have enough masks for 64 bit, then we must be trying to
6006   // match SHFL so we're only allowed to shift 1/4 of the width.
6007   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
6008     return None;
6009 
6010   SDValue Src = Op.getOperand(0);
6011 
6012   // The expected mask is shifted left when the AND is found around SHL
6013   // patterns.
6014   //   ((x >> 1) & 0x55555555)
6015   //   ((x << 1) & 0xAAAAAAAA)
6016   bool SHLExpMask = IsSHL;
6017 
6018   if (!Mask) {
6019     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
6020     // the mask is all ones: consume that now.
6021     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
6022       Mask = Src.getConstantOperandVal(1);
6023       Src = Src.getOperand(0);
6024       // The expected mask is now in fact shifted left for SRL, so reverse the
6025       // decision.
6026       //   ((x & 0xAAAAAAAA) >> 1)
6027       //   ((x & 0x55555555) << 1)
6028       SHLExpMask = !SHLExpMask;
6029     } else {
6030       // Use a default shifted mask of all-ones if there's no AND, truncated
6031       // down to the expected width. This simplifies the logic later on.
6032       Mask = maskTrailingOnes<uint64_t>(Width);
6033       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
6034     }
6035   }
6036 
6037   unsigned MaskIdx = Log2_32(ShAmt);
6038   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
6039 
6040   if (SHLExpMask)
6041     ExpMask <<= ShAmt;
6042 
6043   if (Mask != ExpMask)
6044     return None;
6045 
6046   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
6047 }
6048 
6049 // Matches any of the following bit-manipulation patterns:
6050 //   (and (shl x, 1), (0x55555555 << 1))
6051 //   (and (srl x, 1), 0x55555555)
6052 //   (shl (and x, 0x55555555), 1)
6053 //   (srl (and x, (0x55555555 << 1)), 1)
6054 // where the shift amount and mask may vary thus:
6055 //   [1]  = 0x55555555 / 0xAAAAAAAA
6056 //   [2]  = 0x33333333 / 0xCCCCCCCC
6057 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
6058 //   [8]  = 0x00FF00FF / 0xFF00FF00
6059 //   [16] = 0x0000FFFF / 0xFFFFFFFF
6060 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
6061 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
6062   // These are the unshifted masks which we use to match bit-manipulation
6063   // patterns. They may be shifted left in certain circumstances.
6064   static const uint64_t BitmanipMasks[] = {
6065       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
6066       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
6067 
6068   return matchRISCVBitmanipPat(Op, BitmanipMasks);
6069 }
6070 
6071 // Match the following pattern as a GREVI(W) operation
6072 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
6073 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
6074                                const RISCVSubtarget &Subtarget) {
6075   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6076   EVT VT = Op.getValueType();
6077 
6078   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
6079     auto LHS = matchGREVIPat(Op.getOperand(0));
6080     auto RHS = matchGREVIPat(Op.getOperand(1));
6081     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
6082       SDLoc DL(Op);
6083       return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op,
6084                          DAG.getConstant(LHS->ShAmt, DL, VT));
6085     }
6086   }
6087   return SDValue();
6088 }
6089 
6090 // Matches any the following pattern as a GORCI(W) operation
6091 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
6092 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
6093 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
6094 // Note that with the variant of 3.,
6095 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
6096 // the inner pattern will first be matched as GREVI and then the outer
6097 // pattern will be matched to GORC via the first rule above.
6098 // 4.  (or (rotl/rotr x, bitwidth/2), x)
6099 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
6100                                const RISCVSubtarget &Subtarget) {
6101   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6102   EVT VT = Op.getValueType();
6103 
6104   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
6105     SDLoc DL(Op);
6106     SDValue Op0 = Op.getOperand(0);
6107     SDValue Op1 = Op.getOperand(1);
6108 
6109     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
6110       if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X &&
6111           isa<ConstantSDNode>(Reverse.getOperand(1)) &&
6112           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
6113         return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1));
6114       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
6115       if ((Reverse.getOpcode() == ISD::ROTL ||
6116            Reverse.getOpcode() == ISD::ROTR) &&
6117           Reverse.getOperand(0) == X &&
6118           isa<ConstantSDNode>(Reverse.getOperand(1))) {
6119         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
6120         if (RotAmt == (VT.getSizeInBits() / 2))
6121           return DAG.getNode(RISCVISD::GORC, DL, VT, X,
6122                              DAG.getConstant(RotAmt, DL, VT));
6123       }
6124       return SDValue();
6125     };
6126 
6127     // Check for either commutable permutation of (or (GREVI x, shamt), x)
6128     if (SDValue V = MatchOROfReverse(Op0, Op1))
6129       return V;
6130     if (SDValue V = MatchOROfReverse(Op1, Op0))
6131       return V;
6132 
6133     // OR is commutable so canonicalize its OR operand to the left
6134     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
6135       std::swap(Op0, Op1);
6136     if (Op0.getOpcode() != ISD::OR)
6137       return SDValue();
6138     SDValue OrOp0 = Op0.getOperand(0);
6139     SDValue OrOp1 = Op0.getOperand(1);
6140     auto LHS = matchGREVIPat(OrOp0);
6141     // OR is commutable so swap the operands and try again: x might have been
6142     // on the left
6143     if (!LHS) {
6144       std::swap(OrOp0, OrOp1);
6145       LHS = matchGREVIPat(OrOp0);
6146     }
6147     auto RHS = matchGREVIPat(Op1);
6148     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
6149       return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op,
6150                          DAG.getConstant(LHS->ShAmt, DL, VT));
6151     }
6152   }
6153   return SDValue();
6154 }
6155 
6156 // Matches any of the following bit-manipulation patterns:
6157 //   (and (shl x, 1), (0x22222222 << 1))
6158 //   (and (srl x, 1), 0x22222222)
6159 //   (shl (and x, 0x22222222), 1)
6160 //   (srl (and x, (0x22222222 << 1)), 1)
6161 // where the shift amount and mask may vary thus:
6162 //   [1]  = 0x22222222 / 0x44444444
6163 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
6164 //   [4]  = 0x00F000F0 / 0x0F000F00
6165 //   [8]  = 0x0000FF00 / 0x00FF0000
6166 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
6167 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
6168   // These are the unshifted masks which we use to match bit-manipulation
6169   // patterns. They may be shifted left in certain circumstances.
6170   static const uint64_t BitmanipMasks[] = {
6171       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
6172       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
6173 
6174   return matchRISCVBitmanipPat(Op, BitmanipMasks);
6175 }
6176 
6177 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
6178 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
6179                                const RISCVSubtarget &Subtarget) {
6180   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
6181   EVT VT = Op.getValueType();
6182 
6183   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
6184     return SDValue();
6185 
6186   SDValue Op0 = Op.getOperand(0);
6187   SDValue Op1 = Op.getOperand(1);
6188 
6189   // Or is commutable so canonicalize the second OR to the LHS.
6190   if (Op0.getOpcode() != ISD::OR)
6191     std::swap(Op0, Op1);
6192   if (Op0.getOpcode() != ISD::OR)
6193     return SDValue();
6194 
6195   // We found an inner OR, so our operands are the operands of the inner OR
6196   // and the other operand of the outer OR.
6197   SDValue A = Op0.getOperand(0);
6198   SDValue B = Op0.getOperand(1);
6199   SDValue C = Op1;
6200 
6201   auto Match1 = matchSHFLPat(A);
6202   auto Match2 = matchSHFLPat(B);
6203 
6204   // If neither matched, we failed.
6205   if (!Match1 && !Match2)
6206     return SDValue();
6207 
6208   // We had at least one match. if one failed, try the remaining C operand.
6209   if (!Match1) {
6210     std::swap(A, C);
6211     Match1 = matchSHFLPat(A);
6212     if (!Match1)
6213       return SDValue();
6214   } else if (!Match2) {
6215     std::swap(B, C);
6216     Match2 = matchSHFLPat(B);
6217     if (!Match2)
6218       return SDValue();
6219   }
6220   assert(Match1 && Match2);
6221 
6222   // Make sure our matches pair up.
6223   if (!Match1->formsPairWith(*Match2))
6224     return SDValue();
6225 
6226   // All the remains is to make sure C is an AND with the same input, that masks
6227   // out the bits that are being shuffled.
6228   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
6229       C.getOperand(0) != Match1->Op)
6230     return SDValue();
6231 
6232   uint64_t Mask = C.getConstantOperandVal(1);
6233 
6234   static const uint64_t BitmanipMasks[] = {
6235       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
6236       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
6237   };
6238 
6239   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
6240   unsigned MaskIdx = Log2_32(Match1->ShAmt);
6241   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
6242 
6243   if (Mask != ExpMask)
6244     return SDValue();
6245 
6246   SDLoc DL(Op);
6247   return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op,
6248                      DAG.getConstant(Match1->ShAmt, DL, VT));
6249 }
6250 
6251 // Optimize (add (shl x, c0), (shl y, c1)) ->
6252 //          (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
6253 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
6254                                   const RISCVSubtarget &Subtarget) {
6255   // Perform this optimization only in the zba extension.
6256   if (!Subtarget.hasStdExtZba())
6257     return SDValue();
6258 
6259   // Skip for vector types and larger types.
6260   EVT VT = N->getValueType(0);
6261   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
6262     return SDValue();
6263 
6264   // The two operand nodes must be SHL and have no other use.
6265   SDValue N0 = N->getOperand(0);
6266   SDValue N1 = N->getOperand(1);
6267   if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
6268       !N0->hasOneUse() || !N1->hasOneUse())
6269     return SDValue();
6270 
6271   // Check c0 and c1.
6272   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
6273   auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
6274   if (!N0C || !N1C)
6275     return SDValue();
6276   int64_t C0 = N0C->getSExtValue();
6277   int64_t C1 = N1C->getSExtValue();
6278   if (C0 <= 0 || C1 <= 0)
6279     return SDValue();
6280 
6281   // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
6282   int64_t Bits = std::min(C0, C1);
6283   int64_t Diff = std::abs(C0 - C1);
6284   if (Diff != 1 && Diff != 2 && Diff != 3)
6285     return SDValue();
6286 
6287   // Build nodes.
6288   SDLoc DL(N);
6289   SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
6290   SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
6291   SDValue NA0 =
6292       DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
6293   SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
6294   return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
6295 }
6296 
6297 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
6298 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
6299 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
6300 // not undo itself, but they are redundant.
6301 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
6302   SDValue Src = N->getOperand(0);
6303 
6304   if (Src.getOpcode() != N->getOpcode())
6305     return SDValue();
6306 
6307   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
6308       !isa<ConstantSDNode>(Src.getOperand(1)))
6309     return SDValue();
6310 
6311   unsigned ShAmt1 = N->getConstantOperandVal(1);
6312   unsigned ShAmt2 = Src.getConstantOperandVal(1);
6313   Src = Src.getOperand(0);
6314 
6315   unsigned CombinedShAmt;
6316   if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW)
6317     CombinedShAmt = ShAmt1 | ShAmt2;
6318   else
6319     CombinedShAmt = ShAmt1 ^ ShAmt2;
6320 
6321   if (CombinedShAmt == 0)
6322     return Src;
6323 
6324   SDLoc DL(N);
6325   return DAG.getNode(
6326       N->getOpcode(), DL, N->getValueType(0), Src,
6327       DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
6328 }
6329 
6330 // Combine a constant select operand into its use:
6331 //
6332 // (and (select cond, -1, c), x)
6333 //   -> (select cond, x, (and x, c))  [AllOnes=1]
6334 // (or  (select cond, 0, c), x)
6335 //   -> (select cond, x, (or x, c))  [AllOnes=0]
6336 // (xor (select cond, 0, c), x)
6337 //   -> (select cond, x, (xor x, c))  [AllOnes=0]
6338 // (add (select cond, 0, c), x)
6339 //   -> (select cond, x, (add x, c))  [AllOnes=0]
6340 // (sub x, (select cond, 0, c))
6341 //   -> (select cond, x, (sub x, c))  [AllOnes=0]
6342 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
6343                                    SelectionDAG &DAG, bool AllOnes) {
6344   EVT VT = N->getValueType(0);
6345 
6346   // Skip vectors.
6347   if (VT.isVector())
6348     return SDValue();
6349 
6350   if ((Slct.getOpcode() != ISD::SELECT &&
6351        Slct.getOpcode() != RISCVISD::SELECT_CC) ||
6352       !Slct.hasOneUse())
6353     return SDValue();
6354 
6355   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
6356     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
6357   };
6358 
6359   bool SwapSelectOps;
6360   unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
6361   SDValue TrueVal = Slct.getOperand(1 + OpOffset);
6362   SDValue FalseVal = Slct.getOperand(2 + OpOffset);
6363   SDValue NonConstantVal;
6364   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
6365     SwapSelectOps = false;
6366     NonConstantVal = FalseVal;
6367   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
6368     SwapSelectOps = true;
6369     NonConstantVal = TrueVal;
6370   } else
6371     return SDValue();
6372 
6373   // Slct is now know to be the desired identity constant when CC is true.
6374   TrueVal = OtherOp;
6375   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
6376   // Unless SwapSelectOps says the condition should be false.
6377   if (SwapSelectOps)
6378     std::swap(TrueVal, FalseVal);
6379 
6380   if (Slct.getOpcode() == RISCVISD::SELECT_CC)
6381     return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
6382                        {Slct.getOperand(0), Slct.getOperand(1),
6383                         Slct.getOperand(2), TrueVal, FalseVal});
6384 
6385   return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
6386                      {Slct.getOperand(0), TrueVal, FalseVal});
6387 }
6388 
6389 // Attempt combineSelectAndUse on each operand of a commutative operator N.
6390 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
6391                                               bool AllOnes) {
6392   SDValue N0 = N->getOperand(0);
6393   SDValue N1 = N->getOperand(1);
6394   if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes))
6395     return Result;
6396   if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes))
6397     return Result;
6398   return SDValue();
6399 }
6400 
6401 // Transform (add (mul x, c0), c1) ->
6402 //           (add (mul (add x, c1/c0), c0), c1%c0).
6403 // if c1/c0 and c1%c0 are simm12, while c1 is not.
6404 // Or transform (add (mul x, c0), c1) ->
6405 //              (mul (add x, c1/c0), c0).
6406 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
6407 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
6408                                      const RISCVSubtarget &Subtarget) {
6409   // Skip for vector types and larger types.
6410   EVT VT = N->getValueType(0);
6411   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
6412     return SDValue();
6413   // The first operand node must be a MUL and has no other use.
6414   SDValue N0 = N->getOperand(0);
6415   if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
6416     return SDValue();
6417   // Check if c0 and c1 match above conditions.
6418   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
6419   auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
6420   if (!N0C || !N1C)
6421     return SDValue();
6422   int64_t C0 = N0C->getSExtValue();
6423   int64_t C1 = N1C->getSExtValue();
6424   if (C0 == -1 || C0 == 0 || C0 == 1 || (C1 / C0) == 0 || isInt<12>(C1) ||
6425       !isInt<12>(C1 % C0) || !isInt<12>(C1 / C0))
6426     return SDValue();
6427   // If C0 * (C1 / C0) is a 12-bit integer, this transform will be reversed.
6428   if (isInt<12>(C0 * (C1 / C0)))
6429     return SDValue();
6430   // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
6431   SDLoc DL(N);
6432   SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
6433                              DAG.getConstant(C1 / C0, DL, VT));
6434   SDValue New1 =
6435       DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
6436   if ((C1 % C0) == 0)
6437     return New1;
6438   return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(C1 % C0, DL, VT));
6439 }
6440 
6441 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
6442                                  const RISCVSubtarget &Subtarget) {
6443   // Transform (add (mul x, c0), c1) ->
6444   //           (add (mul (add x, c1/c0), c0), c1%c0).
6445   // if c1/c0 and c1%c0 are simm12, while c1 is not.
6446   // Or transform (add (mul x, c0), c1) ->
6447   //              (mul (add x, c1/c0), c0).
6448   // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
6449   if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
6450     return V;
6451   // Fold (add (shl x, c0), (shl y, c1)) ->
6452   //      (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
6453   if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
6454     return V;
6455   // fold (add (select lhs, rhs, cc, 0, y), x) ->
6456   //      (select lhs, rhs, cc, x, (add x, y))
6457   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
6458 }
6459 
6460 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
6461   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
6462   //      (select lhs, rhs, cc, x, (sub x, y))
6463   SDValue N0 = N->getOperand(0);
6464   SDValue N1 = N->getOperand(1);
6465   return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
6466 }
6467 
6468 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) {
6469   // fold (and (select lhs, rhs, cc, -1, y), x) ->
6470   //      (select lhs, rhs, cc, x, (and x, y))
6471   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
6472 }
6473 
6474 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
6475                                 const RISCVSubtarget &Subtarget) {
6476   if (Subtarget.hasStdExtZbp()) {
6477     if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
6478       return GREV;
6479     if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget))
6480       return GORC;
6481     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget))
6482       return SHFL;
6483   }
6484 
6485   // fold (or (select cond, 0, y), x) ->
6486   //      (select cond, x, (or x, y))
6487   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
6488 }
6489 
6490 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
6491   // fold (xor (select cond, 0, y), x) ->
6492   //      (select cond, x, (xor x, y))
6493   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
6494 }
6495 
6496 // Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
6497 // has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free
6498 // by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be
6499 // removed during type legalization leaving an ADD/SUB/MUL use that won't use
6500 // ADDW/SUBW/MULW.
6501 static SDValue performANY_EXTENDCombine(SDNode *N,
6502                                         TargetLowering::DAGCombinerInfo &DCI,
6503                                         const RISCVSubtarget &Subtarget) {
6504   if (!Subtarget.is64Bit())
6505     return SDValue();
6506 
6507   SelectionDAG &DAG = DCI.DAG;
6508 
6509   SDValue Src = N->getOperand(0);
6510   EVT VT = N->getValueType(0);
6511   if (VT != MVT::i64 || Src.getValueType() != MVT::i32)
6512     return SDValue();
6513 
6514   // The opcode must be one that can implicitly sign_extend.
6515   // FIXME: Additional opcodes.
6516   switch (Src.getOpcode()) {
6517   default:
6518     return SDValue();
6519   case ISD::MUL:
6520     if (!Subtarget.hasStdExtM())
6521       return SDValue();
6522     LLVM_FALLTHROUGH;
6523   case ISD::ADD:
6524   case ISD::SUB:
6525     break;
6526   }
6527 
6528   // Only handle cases where the result is used by a CopyToReg. That likely
6529   // means the value is a liveout of the basic block. This helps prevent
6530   // infinite combine loops like PR51206.
6531   if (none_of(N->uses(),
6532               [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; }))
6533     return SDValue();
6534 
6535   SmallVector<SDNode *, 4> SetCCs;
6536   for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
6537                             UE = Src.getNode()->use_end();
6538        UI != UE; ++UI) {
6539     SDNode *User = *UI;
6540     if (User == N)
6541       continue;
6542     if (UI.getUse().getResNo() != Src.getResNo())
6543       continue;
6544     // All i32 setccs are legalized by sign extending operands.
6545     if (User->getOpcode() == ISD::SETCC) {
6546       SetCCs.push_back(User);
6547       continue;
6548     }
6549     // We don't know if we can extend this user.
6550     break;
6551   }
6552 
6553   // If we don't have any SetCCs, this isn't worthwhile.
6554   if (SetCCs.empty())
6555     return SDValue();
6556 
6557   SDLoc DL(N);
6558   SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src);
6559   DCI.CombineTo(N, SExt);
6560 
6561   // Promote all the setccs.
6562   for (SDNode *SetCC : SetCCs) {
6563     SmallVector<SDValue, 4> Ops;
6564 
6565     for (unsigned j = 0; j != 2; ++j) {
6566       SDValue SOp = SetCC->getOperand(j);
6567       if (SOp == Src)
6568         Ops.push_back(SExt);
6569       else
6570         Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp));
6571     }
6572 
6573     Ops.push_back(SetCC->getOperand(2));
6574     DCI.CombineTo(SetCC,
6575                   DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
6576   }
6577   return SDValue(N, 0);
6578 }
6579 
6580 // Try to form VWMUL or VWMULU.
6581 // FIXME: Support VWMULSU.
6582 static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
6583                                     SelectionDAG &DAG) {
6584   assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
6585   bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
6586   bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
6587   if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
6588     return SDValue();
6589 
6590   SDValue Mask = N->getOperand(2);
6591   SDValue VL = N->getOperand(3);
6592 
6593   // Make sure the mask and VL match.
6594   if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)
6595     return SDValue();
6596 
6597   MVT VT = N->getSimpleValueType(0);
6598 
6599   // Determine the narrow size for a widening multiply.
6600   unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
6601   MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
6602                                   VT.getVectorElementCount());
6603 
6604   SDLoc DL(N);
6605 
6606   // See if the other operand is the same opcode.
6607   if (Op0.getOpcode() == Op1.getOpcode()) {
6608     if (!Op1.hasOneUse())
6609       return SDValue();
6610 
6611     // Make sure the mask and VL match.
6612     if (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
6613       return SDValue();
6614 
6615     Op1 = Op1.getOperand(0);
6616   } else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
6617     // The operand is a splat of a scalar.
6618 
6619     // The VL must be the same.
6620     if (Op1.getOperand(1) != VL)
6621       return SDValue();
6622 
6623     // Get the scalar value.
6624     Op1 = Op1.getOperand(0);
6625 
6626     // See if have enough sign bits or zero bits in the scalar to use a
6627     // widening multiply by splatting to smaller element size.
6628     unsigned EltBits = VT.getScalarSizeInBits();
6629     unsigned ScalarBits = Op1.getValueSizeInBits();
6630     // Make sure we're getting all element bits from the scalar register.
6631     // FIXME: Support implicit sign extension of vmv.v.x?
6632     if (ScalarBits < EltBits)
6633       return SDValue();
6634 
6635     if (IsSignExt) {
6636       if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
6637         return SDValue();
6638     } else {
6639       APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
6640       if (!DAG.MaskedValueIsZero(Op1, Mask))
6641         return SDValue();
6642     }
6643 
6644     Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL);
6645   } else
6646     return SDValue();
6647 
6648   Op0 = Op0.getOperand(0);
6649 
6650   // Re-introduce narrower extends if needed.
6651   unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
6652   if (Op0.getValueType() != NarrowVT)
6653     Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
6654   if (Op1.getValueType() != NarrowVT)
6655     Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
6656 
6657   unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
6658   return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
6659 }
6660 
6661 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
6662                                                DAGCombinerInfo &DCI) const {
6663   SelectionDAG &DAG = DCI.DAG;
6664 
6665   // Helper to call SimplifyDemandedBits on an operand of N where only some low
6666   // bits are demanded. N will be added to the Worklist if it was not deleted.
6667   // Caller should return SDValue(N, 0) if this returns true.
6668   auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
6669     SDValue Op = N->getOperand(OpNo);
6670     APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
6671     if (!SimplifyDemandedBits(Op, Mask, DCI))
6672       return false;
6673 
6674     if (N->getOpcode() != ISD::DELETED_NODE)
6675       DCI.AddToWorklist(N);
6676     return true;
6677   };
6678 
6679   switch (N->getOpcode()) {
6680   default:
6681     break;
6682   case RISCVISD::SplitF64: {
6683     SDValue Op0 = N->getOperand(0);
6684     // If the input to SplitF64 is just BuildPairF64 then the operation is
6685     // redundant. Instead, use BuildPairF64's operands directly.
6686     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
6687       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6688 
6689     SDLoc DL(N);
6690 
6691     // It's cheaper to materialise two 32-bit integers than to load a double
6692     // from the constant pool and transfer it to integer registers through the
6693     // stack.
6694     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
6695       APInt V = C->getValueAPF().bitcastToAPInt();
6696       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6697       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6698       return DCI.CombineTo(N, Lo, Hi);
6699     }
6700 
6701     // This is a target-specific version of a DAGCombine performed in
6702     // DAGCombiner::visitBITCAST. It performs the equivalent of:
6703     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
6704     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
6705     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
6706         !Op0.getNode()->hasOneUse())
6707       break;
6708     SDValue NewSplitF64 =
6709         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
6710                     Op0.getOperand(0));
6711     SDValue Lo = NewSplitF64.getValue(0);
6712     SDValue Hi = NewSplitF64.getValue(1);
6713     APInt SignBit = APInt::getSignMask(32);
6714     if (Op0.getOpcode() == ISD::FNEG) {
6715       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
6716                                   DAG.getConstant(SignBit, DL, MVT::i32));
6717       return DCI.CombineTo(N, Lo, NewHi);
6718     }
6719     assert(Op0.getOpcode() == ISD::FABS);
6720     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
6721                                 DAG.getConstant(~SignBit, DL, MVT::i32));
6722     return DCI.CombineTo(N, Lo, NewHi);
6723   }
6724   case RISCVISD::SLLW:
6725   case RISCVISD::SRAW:
6726   case RISCVISD::SRLW:
6727   case RISCVISD::ROLW:
6728   case RISCVISD::RORW: {
6729     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
6730     if (SimplifyDemandedLowBitsHelper(0, 32) ||
6731         SimplifyDemandedLowBitsHelper(1, 5))
6732       return SDValue(N, 0);
6733     break;
6734   }
6735   case RISCVISD::CLZW:
6736   case RISCVISD::CTZW: {
6737     // Only the lower 32 bits of the first operand are read
6738     if (SimplifyDemandedLowBitsHelper(0, 32))
6739       return SDValue(N, 0);
6740     break;
6741   }
6742   case RISCVISD::FSL:
6743   case RISCVISD::FSR: {
6744     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
6745     unsigned BitWidth = N->getOperand(2).getValueSizeInBits();
6746     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
6747     if (SimplifyDemandedLowBitsHelper(2, Log2_32(BitWidth) + 1))
6748       return SDValue(N, 0);
6749     break;
6750   }
6751   case RISCVISD::FSLW:
6752   case RISCVISD::FSRW: {
6753     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
6754     // read.
6755     if (SimplifyDemandedLowBitsHelper(0, 32) ||
6756         SimplifyDemandedLowBitsHelper(1, 32) ||
6757         SimplifyDemandedLowBitsHelper(2, 6))
6758       return SDValue(N, 0);
6759     break;
6760   }
6761   case RISCVISD::GREV:
6762   case RISCVISD::GORC: {
6763     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
6764     unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
6765     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
6766     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth)))
6767       return SDValue(N, 0);
6768 
6769     return combineGREVI_GORCI(N, DCI.DAG);
6770   }
6771   case RISCVISD::GREVW:
6772   case RISCVISD::GORCW: {
6773     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
6774     if (SimplifyDemandedLowBitsHelper(0, 32) ||
6775         SimplifyDemandedLowBitsHelper(1, 5))
6776       return SDValue(N, 0);
6777 
6778     return combineGREVI_GORCI(N, DCI.DAG);
6779   }
6780   case RISCVISD::SHFL:
6781   case RISCVISD::UNSHFL: {
6782     // Only the lower log2(Bitwidth)-1 bits of the the shift amount are read.
6783     unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
6784     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
6785     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) - 1))
6786       return SDValue(N, 0);
6787 
6788     break;
6789   }
6790   case RISCVISD::SHFLW:
6791   case RISCVISD::UNSHFLW: {
6792     // Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
6793     SDValue LHS = N->getOperand(0);
6794     SDValue RHS = N->getOperand(1);
6795     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
6796     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
6797     if (SimplifyDemandedLowBitsHelper(0, 32) ||
6798         SimplifyDemandedLowBitsHelper(1, 4))
6799       return SDValue(N, 0);
6800 
6801     break;
6802   }
6803   case RISCVISD::BCOMPRESSW:
6804   case RISCVISD::BDECOMPRESSW: {
6805     // Only the lower 32 bits of LHS and RHS are read.
6806     if (SimplifyDemandedLowBitsHelper(0, 32) ||
6807         SimplifyDemandedLowBitsHelper(1, 32))
6808       return SDValue(N, 0);
6809 
6810     break;
6811   }
6812   case RISCVISD::FMV_X_ANYEXTH:
6813   case RISCVISD::FMV_X_ANYEXTW_RV64: {
6814     SDLoc DL(N);
6815     SDValue Op0 = N->getOperand(0);
6816     MVT VT = N->getSimpleValueType(0);
6817     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
6818     // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
6819     // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
6820     if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
6821          Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
6822         (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
6823          Op0->getOpcode() == RISCVISD::FMV_H_X)) {
6824       assert(Op0.getOperand(0).getValueType() == VT &&
6825              "Unexpected value type!");
6826       return Op0.getOperand(0);
6827     }
6828 
6829     // This is a target-specific version of a DAGCombine performed in
6830     // DAGCombiner::visitBITCAST. It performs the equivalent of:
6831     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
6832     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
6833     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
6834         !Op0.getNode()->hasOneUse())
6835       break;
6836     SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
6837     unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
6838     APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits());
6839     if (Op0.getOpcode() == ISD::FNEG)
6840       return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
6841                          DAG.getConstant(SignBit, DL, VT));
6842 
6843     assert(Op0.getOpcode() == ISD::FABS);
6844     return DAG.getNode(ISD::AND, DL, VT, NewFMV,
6845                        DAG.getConstant(~SignBit, DL, VT));
6846   }
6847   case ISD::ADD:
6848     return performADDCombine(N, DAG, Subtarget);
6849   case ISD::SUB:
6850     return performSUBCombine(N, DAG);
6851   case ISD::AND:
6852     return performANDCombine(N, DAG);
6853   case ISD::OR:
6854     return performORCombine(N, DAG, Subtarget);
6855   case ISD::XOR:
6856     return performXORCombine(N, DAG);
6857   case ISD::ANY_EXTEND:
6858     return performANY_EXTENDCombine(N, DCI, Subtarget);
6859   case ISD::ZERO_EXTEND:
6860     // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
6861     // type legalization. This is safe because fp_to_uint produces poison if
6862     // it overflows.
6863     if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit() &&
6864         N->getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
6865         isTypeLegal(N->getOperand(0).getOperand(0).getValueType()))
6866       return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
6867                          N->getOperand(0).getOperand(0));
6868     return SDValue();
6869   case RISCVISD::SELECT_CC: {
6870     // Transform
6871     SDValue LHS = N->getOperand(0);
6872     SDValue RHS = N->getOperand(1);
6873     SDValue TrueV = N->getOperand(3);
6874     SDValue FalseV = N->getOperand(4);
6875 
6876     // If the True and False values are the same, we don't need a select_cc.
6877     if (TrueV == FalseV)
6878       return TrueV;
6879 
6880     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
6881     if (!ISD::isIntEqualitySetCC(CCVal))
6882       break;
6883 
6884     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
6885     //      (select_cc X, Y, lt, trueV, falseV)
6886     // Sometimes the setcc is introduced after select_cc has been formed.
6887     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6888         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
6889       // If we're looking for eq 0 instead of ne 0, we need to invert the
6890       // condition.
6891       bool Invert = CCVal == ISD::SETEQ;
6892       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6893       if (Invert)
6894         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6895 
6896       SDLoc DL(N);
6897       RHS = LHS.getOperand(1);
6898       LHS = LHS.getOperand(0);
6899       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6900 
6901       SDValue TargetCC = DAG.getCondCode(CCVal);
6902       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
6903                          {LHS, RHS, TargetCC, TrueV, FalseV});
6904     }
6905 
6906     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
6907     //      (select_cc X, Y, eq/ne, trueV, falseV)
6908     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
6909       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
6910                          {LHS.getOperand(0), LHS.getOperand(1),
6911                           N->getOperand(2), TrueV, FalseV});
6912     // (select_cc X, 1, setne, trueV, falseV) ->
6913     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
6914     // This can occur when legalizing some floating point comparisons.
6915     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6916     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6917       SDLoc DL(N);
6918       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6919       SDValue TargetCC = DAG.getCondCode(CCVal);
6920       RHS = DAG.getConstant(0, DL, LHS.getValueType());
6921       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
6922                          {LHS, RHS, TargetCC, TrueV, FalseV});
6923     }
6924 
6925     break;
6926   }
6927   case RISCVISD::BR_CC: {
6928     SDValue LHS = N->getOperand(1);
6929     SDValue RHS = N->getOperand(2);
6930     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
6931     if (!ISD::isIntEqualitySetCC(CCVal))
6932       break;
6933 
6934     // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
6935     //      (br_cc X, Y, lt, dest)
6936     // Sometimes the setcc is introduced after br_cc has been formed.
6937     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6938         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
6939       // If we're looking for eq 0 instead of ne 0, we need to invert the
6940       // condition.
6941       bool Invert = CCVal == ISD::SETEQ;
6942       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6943       if (Invert)
6944         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6945 
6946       SDLoc DL(N);
6947       RHS = LHS.getOperand(1);
6948       LHS = LHS.getOperand(0);
6949       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6950 
6951       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
6952                          N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
6953                          N->getOperand(4));
6954     }
6955 
6956     // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
6957     //      (br_cc X, Y, eq/ne, trueV, falseV)
6958     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
6959       return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
6960                          N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
6961                          N->getOperand(3), N->getOperand(4));
6962 
6963     // (br_cc X, 1, setne, br_cc) ->
6964     // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
6965     // This can occur when legalizing some floating point comparisons.
6966     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6967     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6968       SDLoc DL(N);
6969       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6970       SDValue TargetCC = DAG.getCondCode(CCVal);
6971       RHS = DAG.getConstant(0, DL, LHS.getValueType());
6972       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
6973                          N->getOperand(0), LHS, RHS, TargetCC,
6974                          N->getOperand(4));
6975     }
6976     break;
6977   }
6978   case ISD::FCOPYSIGN: {
6979     EVT VT = N->getValueType(0);
6980     if (!VT.isVector())
6981       break;
6982     // There is a form of VFSGNJ which injects the negated sign of its second
6983     // operand. Try and bubble any FNEG up after the extend/round to produce
6984     // this optimized pattern. Avoid modifying cases where FP_ROUND and
6985     // TRUNC=1.
6986     SDValue In2 = N->getOperand(1);
6987     // Avoid cases where the extend/round has multiple uses, as duplicating
6988     // those is typically more expensive than removing a fneg.
6989     if (!In2.hasOneUse())
6990       break;
6991     if (In2.getOpcode() != ISD::FP_EXTEND &&
6992         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
6993       break;
6994     In2 = In2.getOperand(0);
6995     if (In2.getOpcode() != ISD::FNEG)
6996       break;
6997     SDLoc DL(N);
6998     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
6999     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
7000                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
7001   }
7002   case ISD::MGATHER:
7003   case ISD::MSCATTER:
7004   case ISD::VP_GATHER:
7005   case ISD::VP_SCATTER: {
7006     if (!DCI.isBeforeLegalize())
7007       break;
7008     SDValue Index, ScaleOp;
7009     bool IsIndexScaled = false;
7010     bool IsIndexSigned = false;
7011     if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
7012       Index = VPGSN->getIndex();
7013       ScaleOp = VPGSN->getScale();
7014       IsIndexScaled = VPGSN->isIndexScaled();
7015       IsIndexSigned = VPGSN->isIndexSigned();
7016     } else {
7017       const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
7018       Index = MGSN->getIndex();
7019       ScaleOp = MGSN->getScale();
7020       IsIndexScaled = MGSN->isIndexScaled();
7021       IsIndexSigned = MGSN->isIndexSigned();
7022     }
7023     EVT IndexVT = Index.getValueType();
7024     MVT XLenVT = Subtarget.getXLenVT();
7025     // RISCV indexed loads only support the "unsigned unscaled" addressing
7026     // mode, so anything else must be manually legalized.
7027     bool NeedsIdxLegalization =
7028         IsIndexScaled ||
7029         (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
7030     if (!NeedsIdxLegalization)
7031       break;
7032 
7033     SDLoc DL(N);
7034 
7035     // Any index legalization should first promote to XLenVT, so we don't lose
7036     // bits when scaling. This may create an illegal index type so we let
7037     // LLVM's legalization take care of the splitting.
7038     // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
7039     if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
7040       IndexVT = IndexVT.changeVectorElementType(XLenVT);
7041       Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
7042                           DL, IndexVT, Index);
7043     }
7044 
7045     unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
7046     if (IsIndexScaled && Scale != 1) {
7047       // Manually scale the indices by the element size.
7048       // TODO: Sanitize the scale operand here?
7049       // TODO: For VP nodes, should we use VP_SHL here?
7050       assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
7051       SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
7052       Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
7053     }
7054 
7055     ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
7056     if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
7057       return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
7058                              {VPGN->getChain(), VPGN->getBasePtr(), Index,
7059                               VPGN->getScale(), VPGN->getMask(),
7060                               VPGN->getVectorLength()},
7061                              VPGN->getMemOperand(), NewIndexTy);
7062     if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
7063       return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
7064                               {VPSN->getChain(), VPSN->getValue(),
7065                                VPSN->getBasePtr(), Index, VPSN->getScale(),
7066                                VPSN->getMask(), VPSN->getVectorLength()},
7067                               VPSN->getMemOperand(), NewIndexTy);
7068     if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
7069       return DAG.getMaskedGather(
7070           N->getVTList(), MGN->getMemoryVT(), DL,
7071           {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
7072            MGN->getBasePtr(), Index, MGN->getScale()},
7073           MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
7074     const auto *MSN = cast<MaskedScatterSDNode>(N);
7075     return DAG.getMaskedScatter(
7076         N->getVTList(), MSN->getMemoryVT(), DL,
7077         {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
7078          Index, MSN->getScale()},
7079         MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
7080   }
7081   case RISCVISD::SRA_VL:
7082   case RISCVISD::SRL_VL:
7083   case RISCVISD::SHL_VL: {
7084     SDValue ShAmt = N->getOperand(1);
7085     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
7086       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
7087       SDLoc DL(N);
7088       SDValue VL = N->getOperand(3);
7089       EVT VT = N->getValueType(0);
7090       ShAmt =
7091           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL);
7092       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
7093                          N->getOperand(2), N->getOperand(3));
7094     }
7095     break;
7096   }
7097   case ISD::SRA:
7098   case ISD::SRL:
7099   case ISD::SHL: {
7100     SDValue ShAmt = N->getOperand(1);
7101     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
7102       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
7103       SDLoc DL(N);
7104       EVT VT = N->getValueType(0);
7105       ShAmt =
7106           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0));
7107       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
7108     }
7109     break;
7110   }
7111   case RISCVISD::MUL_VL: {
7112     SDValue Op0 = N->getOperand(0);
7113     SDValue Op1 = N->getOperand(1);
7114     if (SDValue V = combineMUL_VLToVWMUL(N, Op0, Op1, DAG))
7115       return V;
7116     if (SDValue V = combineMUL_VLToVWMUL(N, Op1, Op0, DAG))
7117       return V;
7118     return SDValue();
7119   }
7120   case ISD::STORE: {
7121     auto *Store = cast<StoreSDNode>(N);
7122     SDValue Val = Store->getValue();
7123     // Combine store of vmv.x.s to vse with VL of 1.
7124     // FIXME: Support FP.
7125     if (Val.getOpcode() == RISCVISD::VMV_X_S) {
7126       SDValue Src = Val.getOperand(0);
7127       EVT VecVT = Src.getValueType();
7128       EVT MemVT = Store->getMemoryVT();
7129       // The memory VT and the element type must match.
7130       if (VecVT.getVectorElementType() == MemVT) {
7131         SDLoc DL(N);
7132         MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
7133         return DAG.getStoreVP(Store->getChain(), DL, Src, Store->getBasePtr(),
7134                               DAG.getConstant(1, DL, MaskVT),
7135                               DAG.getConstant(1, DL, Subtarget.getXLenVT()),
7136                               Store->getPointerInfo(),
7137                               Store->getOriginalAlign(),
7138                               Store->getMemOperand()->getFlags());
7139       }
7140     }
7141 
7142     break;
7143   }
7144   }
7145 
7146   return SDValue();
7147 }
7148 
7149 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
7150     const SDNode *N, CombineLevel Level) const {
7151   // The following folds are only desirable if `(OP _, c1 << c2)` can be
7152   // materialised in fewer instructions than `(OP _, c1)`:
7153   //
7154   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7155   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7156   SDValue N0 = N->getOperand(0);
7157   EVT Ty = N0.getValueType();
7158   if (Ty.isScalarInteger() &&
7159       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
7160     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
7161     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
7162     if (C1 && C2) {
7163       const APInt &C1Int = C1->getAPIntValue();
7164       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
7165 
7166       // We can materialise `c1 << c2` into an add immediate, so it's "free",
7167       // and the combine should happen, to potentially allow further combines
7168       // later.
7169       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
7170           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
7171         return true;
7172 
7173       // We can materialise `c1` in an add immediate, so it's "free", and the
7174       // combine should be prevented.
7175       if (C1Int.getMinSignedBits() <= 64 &&
7176           isLegalAddImmediate(C1Int.getSExtValue()))
7177         return false;
7178 
7179       // Neither constant will fit into an immediate, so find materialisation
7180       // costs.
7181       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
7182                                               Subtarget.getFeatureBits(),
7183                                               /*CompressionCost*/true);
7184       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
7185           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
7186           /*CompressionCost*/true);
7187 
7188       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
7189       // combine should be prevented.
7190       if (C1Cost < ShiftedC1Cost)
7191         return false;
7192     }
7193   }
7194   return true;
7195 }
7196 
7197 bool RISCVTargetLowering::targetShrinkDemandedConstant(
7198     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
7199     TargetLoweringOpt &TLO) const {
7200   // Delay this optimization as late as possible.
7201   if (!TLO.LegalOps)
7202     return false;
7203 
7204   EVT VT = Op.getValueType();
7205   if (VT.isVector())
7206     return false;
7207 
7208   // Only handle AND for now.
7209   if (Op.getOpcode() != ISD::AND)
7210     return false;
7211 
7212   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
7213   if (!C)
7214     return false;
7215 
7216   const APInt &Mask = C->getAPIntValue();
7217 
7218   // Clear all non-demanded bits initially.
7219   APInt ShrunkMask = Mask & DemandedBits;
7220 
7221   // Try to make a smaller immediate by setting undemanded bits.
7222 
7223   APInt ExpandedMask = Mask | ~DemandedBits;
7224 
7225   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
7226     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
7227   };
7228   auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
7229     if (NewMask == Mask)
7230       return true;
7231     SDLoc DL(Op);
7232     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
7233     SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
7234     return TLO.CombineTo(Op, NewOp);
7235   };
7236 
7237   // If the shrunk mask fits in sign extended 12 bits, let the target
7238   // independent code apply it.
7239   if (ShrunkMask.isSignedIntN(12))
7240     return false;
7241 
7242   // Preserve (and X, 0xffff) when zext.h is supported.
7243   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
7244     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
7245     if (IsLegalMask(NewMask))
7246       return UseMask(NewMask);
7247   }
7248 
7249   // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
7250   if (VT == MVT::i64) {
7251     APInt NewMask = APInt(64, 0xffffffff);
7252     if (IsLegalMask(NewMask))
7253       return UseMask(NewMask);
7254   }
7255 
7256   // For the remaining optimizations, we need to be able to make a negative
7257   // number through a combination of mask and undemanded bits.
7258   if (!ExpandedMask.isNegative())
7259     return false;
7260 
7261   // What is the fewest number of bits we need to represent the negative number.
7262   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
7263 
7264   // Try to make a 12 bit negative immediate. If that fails try to make a 32
7265   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
7266   APInt NewMask = ShrunkMask;
7267   if (MinSignedBits <= 12)
7268     NewMask.setBitsFrom(11);
7269   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
7270     NewMask.setBitsFrom(31);
7271   else
7272     return false;
7273 
7274   // Sanity check that our new mask is a subset of the demanded mask.
7275   assert(IsLegalMask(NewMask));
7276   return UseMask(NewMask);
7277 }
7278 
7279 static void computeGREV(APInt &Src, unsigned ShAmt) {
7280   ShAmt &= Src.getBitWidth() - 1;
7281   uint64_t x = Src.getZExtValue();
7282   if (ShAmt & 1)
7283     x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
7284   if (ShAmt & 2)
7285     x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2);
7286   if (ShAmt & 4)
7287     x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4);
7288   if (ShAmt & 8)
7289     x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8);
7290   if (ShAmt & 16)
7291     x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16);
7292   if (ShAmt & 32)
7293     x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32);
7294   Src = x;
7295 }
7296 
7297 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
7298                                                         KnownBits &Known,
7299                                                         const APInt &DemandedElts,
7300                                                         const SelectionDAG &DAG,
7301                                                         unsigned Depth) const {
7302   unsigned BitWidth = Known.getBitWidth();
7303   unsigned Opc = Op.getOpcode();
7304   assert((Opc >= ISD::BUILTIN_OP_END ||
7305           Opc == ISD::INTRINSIC_WO_CHAIN ||
7306           Opc == ISD::INTRINSIC_W_CHAIN ||
7307           Opc == ISD::INTRINSIC_VOID) &&
7308          "Should use MaskedValueIsZero if you don't know whether Op"
7309          " is a target node!");
7310 
7311   Known.resetAll();
7312   switch (Opc) {
7313   default: break;
7314   case RISCVISD::SELECT_CC: {
7315     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
7316     // If we don't know any bits, early out.
7317     if (Known.isUnknown())
7318       break;
7319     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
7320 
7321     // Only known if known in both the LHS and RHS.
7322     Known = KnownBits::commonBits(Known, Known2);
7323     break;
7324   }
7325   case RISCVISD::REMUW: {
7326     KnownBits Known2;
7327     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
7328     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
7329     // We only care about the lower 32 bits.
7330     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
7331     // Restore the original width by sign extending.
7332     Known = Known.sext(BitWidth);
7333     break;
7334   }
7335   case RISCVISD::DIVUW: {
7336     KnownBits Known2;
7337     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
7338     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
7339     // We only care about the lower 32 bits.
7340     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
7341     // Restore the original width by sign extending.
7342     Known = Known.sext(BitWidth);
7343     break;
7344   }
7345   case RISCVISD::CTZW: {
7346     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
7347     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
7348     unsigned LowBits = Log2_32(PossibleTZ) + 1;
7349     Known.Zero.setBitsFrom(LowBits);
7350     break;
7351   }
7352   case RISCVISD::CLZW: {
7353     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
7354     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
7355     unsigned LowBits = Log2_32(PossibleLZ) + 1;
7356     Known.Zero.setBitsFrom(LowBits);
7357     break;
7358   }
7359   case RISCVISD::GREV:
7360   case RISCVISD::GREVW: {
7361     if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7362       Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
7363       if (Opc == RISCVISD::GREVW)
7364         Known = Known.trunc(32);
7365       unsigned ShAmt = C->getZExtValue();
7366       computeGREV(Known.Zero, ShAmt);
7367       computeGREV(Known.One, ShAmt);
7368       if (Opc == RISCVISD::GREVW)
7369         Known = Known.sext(BitWidth);
7370     }
7371     break;
7372   }
7373   case RISCVISD::READ_VLENB:
7374     // We assume VLENB is at least 16 bytes.
7375     Known.Zero.setLowBits(4);
7376     // We assume VLENB is no more than 65536 / 8 bytes.
7377     Known.Zero.setBitsFrom(14);
7378     break;
7379   case ISD::INTRINSIC_W_CHAIN: {
7380     unsigned IntNo = Op.getConstantOperandVal(1);
7381     switch (IntNo) {
7382     default:
7383       // We can't do anything for most intrinsics.
7384       break;
7385     case Intrinsic::riscv_vsetvli:
7386     case Intrinsic::riscv_vsetvlimax:
7387       // Assume that VL output is positive and would fit in an int32_t.
7388       // TODO: VLEN might be capped at 16 bits in a future V spec update.
7389       if (BitWidth >= 32)
7390         Known.Zero.setBitsFrom(31);
7391       break;
7392     }
7393     break;
7394   }
7395   }
7396 }
7397 
7398 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
7399     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
7400     unsigned Depth) const {
7401   switch (Op.getOpcode()) {
7402   default:
7403     break;
7404   case RISCVISD::SELECT_CC: {
7405     unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
7406     if (Tmp == 1) return 1;  // Early out.
7407     unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
7408     return std::min(Tmp, Tmp2);
7409   }
7410   case RISCVISD::SLLW:
7411   case RISCVISD::SRAW:
7412   case RISCVISD::SRLW:
7413   case RISCVISD::DIVW:
7414   case RISCVISD::DIVUW:
7415   case RISCVISD::REMUW:
7416   case RISCVISD::ROLW:
7417   case RISCVISD::RORW:
7418   case RISCVISD::GREVW:
7419   case RISCVISD::GORCW:
7420   case RISCVISD::FSLW:
7421   case RISCVISD::FSRW:
7422   case RISCVISD::SHFLW:
7423   case RISCVISD::UNSHFLW:
7424   case RISCVISD::BCOMPRESSW:
7425   case RISCVISD::BDECOMPRESSW:
7426   case RISCVISD::FCVT_W_RTZ_RV64:
7427   case RISCVISD::FCVT_WU_RTZ_RV64:
7428     // TODO: As the result is sign-extended, this is conservatively correct. A
7429     // more precise answer could be calculated for SRAW depending on known
7430     // bits in the shift amount.
7431     return 33;
7432   case RISCVISD::SHFL:
7433   case RISCVISD::UNSHFL: {
7434     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
7435     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
7436     // will stay within the upper 32 bits. If there were more than 32 sign bits
7437     // before there will be at least 33 sign bits after.
7438     if (Op.getValueType() == MVT::i64 &&
7439         isa<ConstantSDNode>(Op.getOperand(1)) &&
7440         (Op.getConstantOperandVal(1) & 0x10) == 0) {
7441       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
7442       if (Tmp > 32)
7443         return 33;
7444     }
7445     break;
7446   }
7447   case RISCVISD::VMV_X_S:
7448     // The number of sign bits of the scalar result is computed by obtaining the
7449     // element type of the input vector operand, subtracting its width from the
7450     // XLEN, and then adding one (sign bit within the element type). If the
7451     // element type is wider than XLen, the least-significant XLEN bits are
7452     // taken.
7453     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
7454       return 1;
7455     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
7456   }
7457 
7458   return 1;
7459 }
7460 
7461 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
7462                                                   MachineBasicBlock *BB) {
7463   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
7464 
7465   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
7466   // Should the count have wrapped while it was being read, we need to try
7467   // again.
7468   // ...
7469   // read:
7470   // rdcycleh x3 # load high word of cycle
7471   // rdcycle  x2 # load low word of cycle
7472   // rdcycleh x4 # load high word of cycle
7473   // bne x3, x4, read # check if high word reads match, otherwise try again
7474   // ...
7475 
7476   MachineFunction &MF = *BB->getParent();
7477   const BasicBlock *LLVM_BB = BB->getBasicBlock();
7478   MachineFunction::iterator It = ++BB->getIterator();
7479 
7480   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
7481   MF.insert(It, LoopMBB);
7482 
7483   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
7484   MF.insert(It, DoneMBB);
7485 
7486   // Transfer the remainder of BB and its successor edges to DoneMBB.
7487   DoneMBB->splice(DoneMBB->begin(), BB,
7488                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
7489   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
7490 
7491   BB->addSuccessor(LoopMBB);
7492 
7493   MachineRegisterInfo &RegInfo = MF.getRegInfo();
7494   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
7495   Register LoReg = MI.getOperand(0).getReg();
7496   Register HiReg = MI.getOperand(1).getReg();
7497   DebugLoc DL = MI.getDebugLoc();
7498 
7499   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
7500   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
7501       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
7502       .addReg(RISCV::X0);
7503   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
7504       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
7505       .addReg(RISCV::X0);
7506   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
7507       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
7508       .addReg(RISCV::X0);
7509 
7510   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
7511       .addReg(HiReg)
7512       .addReg(ReadAgainReg)
7513       .addMBB(LoopMBB);
7514 
7515   LoopMBB->addSuccessor(LoopMBB);
7516   LoopMBB->addSuccessor(DoneMBB);
7517 
7518   MI.eraseFromParent();
7519 
7520   return DoneMBB;
7521 }
7522 
7523 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
7524                                              MachineBasicBlock *BB) {
7525   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
7526 
7527   MachineFunction &MF = *BB->getParent();
7528   DebugLoc DL = MI.getDebugLoc();
7529   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
7530   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
7531   Register LoReg = MI.getOperand(0).getReg();
7532   Register HiReg = MI.getOperand(1).getReg();
7533   Register SrcReg = MI.getOperand(2).getReg();
7534   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
7535   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
7536 
7537   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
7538                           RI);
7539   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
7540   MachineMemOperand *MMOLo =
7541       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
7542   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
7543       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
7544   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
7545       .addFrameIndex(FI)
7546       .addImm(0)
7547       .addMemOperand(MMOLo);
7548   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
7549       .addFrameIndex(FI)
7550       .addImm(4)
7551       .addMemOperand(MMOHi);
7552   MI.eraseFromParent(); // The pseudo instruction is gone now.
7553   return BB;
7554 }
7555 
7556 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
7557                                                  MachineBasicBlock *BB) {
7558   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
7559          "Unexpected instruction");
7560 
7561   MachineFunction &MF = *BB->getParent();
7562   DebugLoc DL = MI.getDebugLoc();
7563   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
7564   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
7565   Register DstReg = MI.getOperand(0).getReg();
7566   Register LoReg = MI.getOperand(1).getReg();
7567   Register HiReg = MI.getOperand(2).getReg();
7568   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
7569   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
7570 
7571   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
7572   MachineMemOperand *MMOLo =
7573       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
7574   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
7575       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
7576   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
7577       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
7578       .addFrameIndex(FI)
7579       .addImm(0)
7580       .addMemOperand(MMOLo);
7581   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
7582       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
7583       .addFrameIndex(FI)
7584       .addImm(4)
7585       .addMemOperand(MMOHi);
7586   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
7587   MI.eraseFromParent(); // The pseudo instruction is gone now.
7588   return BB;
7589 }
7590 
7591 static bool isSelectPseudo(MachineInstr &MI) {
7592   switch (MI.getOpcode()) {
7593   default:
7594     return false;
7595   case RISCV::Select_GPR_Using_CC_GPR:
7596   case RISCV::Select_FPR16_Using_CC_GPR:
7597   case RISCV::Select_FPR32_Using_CC_GPR:
7598   case RISCV::Select_FPR64_Using_CC_GPR:
7599     return true;
7600   }
7601 }
7602 
7603 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
7604                                            MachineBasicBlock *BB,
7605                                            const RISCVSubtarget &Subtarget) {
7606   // To "insert" Select_* instructions, we actually have to insert the triangle
7607   // control-flow pattern.  The incoming instructions know the destination vreg
7608   // to set, the condition code register to branch on, the true/false values to
7609   // select between, and the condcode to use to select the appropriate branch.
7610   //
7611   // We produce the following control flow:
7612   //     HeadMBB
7613   //     |  \
7614   //     |  IfFalseMBB
7615   //     | /
7616   //    TailMBB
7617   //
7618   // When we find a sequence of selects we attempt to optimize their emission
7619   // by sharing the control flow. Currently we only handle cases where we have
7620   // multiple selects with the exact same condition (same LHS, RHS and CC).
7621   // The selects may be interleaved with other instructions if the other
7622   // instructions meet some requirements we deem safe:
7623   // - They are debug instructions. Otherwise,
7624   // - They do not have side-effects, do not access memory and their inputs do
7625   //   not depend on the results of the select pseudo-instructions.
7626   // The TrueV/FalseV operands of the selects cannot depend on the result of
7627   // previous selects in the sequence.
7628   // These conditions could be further relaxed. See the X86 target for a
7629   // related approach and more information.
7630   Register LHS = MI.getOperand(1).getReg();
7631   Register RHS = MI.getOperand(2).getReg();
7632   auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
7633 
7634   SmallVector<MachineInstr *, 4> SelectDebugValues;
7635   SmallSet<Register, 4> SelectDests;
7636   SelectDests.insert(MI.getOperand(0).getReg());
7637 
7638   MachineInstr *LastSelectPseudo = &MI;
7639 
7640   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7641        SequenceMBBI != E; ++SequenceMBBI) {
7642     if (SequenceMBBI->isDebugInstr())
7643       continue;
7644     else if (isSelectPseudo(*SequenceMBBI)) {
7645       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7646           SequenceMBBI->getOperand(2).getReg() != RHS ||
7647           SequenceMBBI->getOperand(3).getImm() != CC ||
7648           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7649           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7650         break;
7651       LastSelectPseudo = &*SequenceMBBI;
7652       SequenceMBBI->collectDebugValues(SelectDebugValues);
7653       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7654     } else {
7655       if (SequenceMBBI->hasUnmodeledSideEffects() ||
7656           SequenceMBBI->mayLoadOrStore())
7657         break;
7658       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7659             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7660           }))
7661         break;
7662     }
7663   }
7664 
7665   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
7666   const BasicBlock *LLVM_BB = BB->getBasicBlock();
7667   DebugLoc DL = MI.getDebugLoc();
7668   MachineFunction::iterator I = ++BB->getIterator();
7669 
7670   MachineBasicBlock *HeadMBB = BB;
7671   MachineFunction *F = BB->getParent();
7672   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7673   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7674 
7675   F->insert(I, IfFalseMBB);
7676   F->insert(I, TailMBB);
7677 
7678   // Transfer debug instructions associated with the selects to TailMBB.
7679   for (MachineInstr *DebugInstr : SelectDebugValues) {
7680     TailMBB->push_back(DebugInstr->removeFromParent());
7681   }
7682 
7683   // Move all instructions after the sequence to TailMBB.
7684   TailMBB->splice(TailMBB->end(), HeadMBB,
7685                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7686   // Update machine-CFG edges by transferring all successors of the current
7687   // block to the new block which will contain the Phi nodes for the selects.
7688   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7689   // Set the successors for HeadMBB.
7690   HeadMBB->addSuccessor(IfFalseMBB);
7691   HeadMBB->addSuccessor(TailMBB);
7692 
7693   // Insert appropriate branch.
7694   BuildMI(HeadMBB, DL, TII.getBrCond(CC))
7695     .addReg(LHS)
7696     .addReg(RHS)
7697     .addMBB(TailMBB);
7698 
7699   // IfFalseMBB just falls through to TailMBB.
7700   IfFalseMBB->addSuccessor(TailMBB);
7701 
7702   // Create PHIs for all of the select pseudo-instructions.
7703   auto SelectMBBI = MI.getIterator();
7704   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7705   auto InsertionPoint = TailMBB->begin();
7706   while (SelectMBBI != SelectEnd) {
7707     auto Next = std::next(SelectMBBI);
7708     if (isSelectPseudo(*SelectMBBI)) {
7709       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7710       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7711               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
7712           .addReg(SelectMBBI->getOperand(4).getReg())
7713           .addMBB(HeadMBB)
7714           .addReg(SelectMBBI->getOperand(5).getReg())
7715           .addMBB(IfFalseMBB);
7716       SelectMBBI->eraseFromParent();
7717     }
7718     SelectMBBI = Next;
7719   }
7720 
7721   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
7722   return TailMBB;
7723 }
7724 
7725 MachineBasicBlock *
7726 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
7727                                                  MachineBasicBlock *BB) const {
7728   switch (MI.getOpcode()) {
7729   default:
7730     llvm_unreachable("Unexpected instr type to insert");
7731   case RISCV::ReadCycleWide:
7732     assert(!Subtarget.is64Bit() &&
7733            "ReadCycleWrite is only to be used on riscv32");
7734     return emitReadCycleWidePseudo(MI, BB);
7735   case RISCV::Select_GPR_Using_CC_GPR:
7736   case RISCV::Select_FPR16_Using_CC_GPR:
7737   case RISCV::Select_FPR32_Using_CC_GPR:
7738   case RISCV::Select_FPR64_Using_CC_GPR:
7739     return emitSelectPseudo(MI, BB, Subtarget);
7740   case RISCV::BuildPairF64Pseudo:
7741     return emitBuildPairF64Pseudo(MI, BB);
7742   case RISCV::SplitF64Pseudo:
7743     return emitSplitF64Pseudo(MI, BB);
7744   }
7745 }
7746 
7747 // Calling Convention Implementation.
7748 // The expectations for frontend ABI lowering vary from target to target.
7749 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
7750 // details, but this is a longer term goal. For now, we simply try to keep the
7751 // role of the frontend as simple and well-defined as possible. The rules can
7752 // be summarised as:
7753 // * Never split up large scalar arguments. We handle them here.
7754 // * If a hardfloat calling convention is being used, and the struct may be
7755 // passed in a pair of registers (fp+fp, int+fp), and both registers are
7756 // available, then pass as two separate arguments. If either the GPRs or FPRs
7757 // are exhausted, then pass according to the rule below.
7758 // * If a struct could never be passed in registers or directly in a stack
7759 // slot (as it is larger than 2*XLEN and the floating point rules don't
7760 // apply), then pass it using a pointer with the byval attribute.
7761 // * If a struct is less than 2*XLEN, then coerce to either a two-element
7762 // word-sized array or a 2*XLEN scalar (depending on alignment).
7763 // * The frontend can determine whether a struct is returned by reference or
7764 // not based on its size and fields. If it will be returned by reference, the
7765 // frontend must modify the prototype so a pointer with the sret annotation is
7766 // passed as the first argument. This is not necessary for large scalar
7767 // returns.
7768 // * Struct return values and varargs should be coerced to structs containing
7769 // register-size fields in the same situations they would be for fixed
7770 // arguments.
7771 
7772 static const MCPhysReg ArgGPRs[] = {
7773   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
7774   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
7775 };
7776 static const MCPhysReg ArgFPR16s[] = {
7777   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
7778   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
7779 };
7780 static const MCPhysReg ArgFPR32s[] = {
7781   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
7782   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
7783 };
7784 static const MCPhysReg ArgFPR64s[] = {
7785   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
7786   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
7787 };
7788 // This is an interim calling convention and it may be changed in the future.
7789 static const MCPhysReg ArgVRs[] = {
7790     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
7791     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
7792     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
7793 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
7794                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
7795                                      RISCV::V20M2, RISCV::V22M2};
7796 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
7797                                      RISCV::V20M4};
7798 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
7799 
7800 // Pass a 2*XLEN argument that has been split into two XLEN values through
7801 // registers or the stack as necessary.
7802 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
7803                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
7804                                 MVT ValVT2, MVT LocVT2,
7805                                 ISD::ArgFlagsTy ArgFlags2) {
7806   unsigned XLenInBytes = XLen / 8;
7807   if (Register Reg = State.AllocateReg(ArgGPRs)) {
7808     // At least one half can be passed via register.
7809     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7810                                      VA1.getLocVT(), CCValAssign::Full));
7811   } else {
7812     // Both halves must be passed on the stack, with proper alignment.
7813     Align StackAlign =
7814         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7815     State.addLoc(
7816         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
7817                             State.AllocateStack(XLenInBytes, StackAlign),
7818                             VA1.getLocVT(), CCValAssign::Full));
7819     State.addLoc(CCValAssign::getMem(
7820         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
7821         LocVT2, CCValAssign::Full));
7822     return false;
7823   }
7824 
7825   if (Register Reg = State.AllocateReg(ArgGPRs)) {
7826     // The second half can also be passed via register.
7827     State.addLoc(
7828         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7829   } else {
7830     // The second half is passed via the stack, without additional alignment.
7831     State.addLoc(CCValAssign::getMem(
7832         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
7833         LocVT2, CCValAssign::Full));
7834   }
7835 
7836   return false;
7837 }
7838 
7839 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
7840                                Optional<unsigned> FirstMaskArgument,
7841                                CCState &State, const RISCVTargetLowering &TLI) {
7842   const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
7843   if (RC == &RISCV::VRRegClass) {
7844     // Assign the first mask argument to V0.
7845     // This is an interim calling convention and it may be changed in the
7846     // future.
7847     if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue())
7848       return State.AllocateReg(RISCV::V0);
7849     return State.AllocateReg(ArgVRs);
7850   }
7851   if (RC == &RISCV::VRM2RegClass)
7852     return State.AllocateReg(ArgVRM2s);
7853   if (RC == &RISCV::VRM4RegClass)
7854     return State.AllocateReg(ArgVRM4s);
7855   if (RC == &RISCV::VRM8RegClass)
7856     return State.AllocateReg(ArgVRM8s);
7857   llvm_unreachable("Unhandled register class for ValueType");
7858 }
7859 
7860 // Implements the RISC-V calling convention. Returns true upon failure.
7861 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
7862                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
7863                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
7864                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
7865                      Optional<unsigned> FirstMaskArgument) {
7866   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
7867   assert(XLen == 32 || XLen == 64);
7868   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
7869 
7870   // Any return value split in to more than two values can't be returned
7871   // directly. Vectors are returned via the available vector registers.
7872   if (!LocVT.isVector() && IsRet && ValNo > 1)
7873     return true;
7874 
7875   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
7876   // variadic argument, or if no F16/F32 argument registers are available.
7877   bool UseGPRForF16_F32 = true;
7878   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
7879   // variadic argument, or if no F64 argument registers are available.
7880   bool UseGPRForF64 = true;
7881 
7882   switch (ABI) {
7883   default:
7884     llvm_unreachable("Unexpected ABI");
7885   case RISCVABI::ABI_ILP32:
7886   case RISCVABI::ABI_LP64:
7887     break;
7888   case RISCVABI::ABI_ILP32F:
7889   case RISCVABI::ABI_LP64F:
7890     UseGPRForF16_F32 = !IsFixed;
7891     break;
7892   case RISCVABI::ABI_ILP32D:
7893   case RISCVABI::ABI_LP64D:
7894     UseGPRForF16_F32 = !IsFixed;
7895     UseGPRForF64 = !IsFixed;
7896     break;
7897   }
7898 
7899   // FPR16, FPR32, and FPR64 alias each other.
7900   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
7901     UseGPRForF16_F32 = true;
7902     UseGPRForF64 = true;
7903   }
7904 
7905   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
7906   // similar local variables rather than directly checking against the target
7907   // ABI.
7908 
7909   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
7910     LocVT = XLenVT;
7911     LocInfo = CCValAssign::BCvt;
7912   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
7913     LocVT = MVT::i64;
7914     LocInfo = CCValAssign::BCvt;
7915   }
7916 
7917   // If this is a variadic argument, the RISC-V calling convention requires
7918   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
7919   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
7920   // be used regardless of whether the original argument was split during
7921   // legalisation or not. The argument will not be passed by registers if the
7922   // original type is larger than 2*XLEN, so the register alignment rule does
7923   // not apply.
7924   unsigned TwoXLenInBytes = (2 * XLen) / 8;
7925   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
7926       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
7927     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7928     // Skip 'odd' register if necessary.
7929     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
7930       State.AllocateReg(ArgGPRs);
7931   }
7932 
7933   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7934   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7935       State.getPendingArgFlags();
7936 
7937   assert(PendingLocs.size() == PendingArgFlags.size() &&
7938          "PendingLocs and PendingArgFlags out of sync");
7939 
7940   // Handle passing f64 on RV32D with a soft float ABI or when floating point
7941   // registers are exhausted.
7942   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
7943     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
7944            "Can't lower f64 if it is split");
7945     // Depending on available argument GPRS, f64 may be passed in a pair of
7946     // GPRs, split between a GPR and the stack, or passed completely on the
7947     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7948     // cases.
7949     Register Reg = State.AllocateReg(ArgGPRs);
7950     LocVT = MVT::i32;
7951     if (!Reg) {
7952       unsigned StackOffset = State.AllocateStack(8, Align(8));
7953       State.addLoc(
7954           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7955       return false;
7956     }
7957     if (!State.AllocateReg(ArgGPRs))
7958       State.AllocateStack(4, Align(4));
7959     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7960     return false;
7961   }
7962 
7963   // Fixed-length vectors are located in the corresponding scalable-vector
7964   // container types.
7965   if (ValVT.isFixedLengthVector())
7966     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
7967 
7968   // Split arguments might be passed indirectly, so keep track of the pending
7969   // values. Split vectors are passed via a mix of registers and indirectly, so
7970   // treat them as we would any other argument.
7971   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7972     LocVT = XLenVT;
7973     LocInfo = CCValAssign::Indirect;
7974     PendingLocs.push_back(
7975         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7976     PendingArgFlags.push_back(ArgFlags);
7977     if (!ArgFlags.isSplitEnd()) {
7978       return false;
7979     }
7980   }
7981 
7982   // If the split argument only had two elements, it should be passed directly
7983   // in registers or on the stack.
7984   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7985       PendingLocs.size() <= 2) {
7986     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7987     // Apply the normal calling convention rules to the first half of the
7988     // split argument.
7989     CCValAssign VA = PendingLocs[0];
7990     ISD::ArgFlagsTy AF = PendingArgFlags[0];
7991     PendingLocs.clear();
7992     PendingArgFlags.clear();
7993     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
7994                                ArgFlags);
7995   }
7996 
7997   // Allocate to a register if possible, or else a stack slot.
7998   Register Reg;
7999   unsigned StoreSizeBytes = XLen / 8;
8000   Align StackAlign = Align(XLen / 8);
8001 
8002   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
8003     Reg = State.AllocateReg(ArgFPR16s);
8004   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
8005     Reg = State.AllocateReg(ArgFPR32s);
8006   else if (ValVT == MVT::f64 && !UseGPRForF64)
8007     Reg = State.AllocateReg(ArgFPR64s);
8008   else if (ValVT.isVector()) {
8009     Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
8010     if (!Reg) {
8011       // For return values, the vector must be passed fully via registers or
8012       // via the stack.
8013       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
8014       // but we're using all of them.
8015       if (IsRet)
8016         return true;
8017       // Try using a GPR to pass the address
8018       if ((Reg = State.AllocateReg(ArgGPRs))) {
8019         LocVT = XLenVT;
8020         LocInfo = CCValAssign::Indirect;
8021       } else if (ValVT.isScalableVector()) {
8022         report_fatal_error("Unable to pass scalable vector types on the stack");
8023       } else {
8024         // Pass fixed-length vectors on the stack.
8025         LocVT = ValVT;
8026         StoreSizeBytes = ValVT.getStoreSize();
8027         // Align vectors to their element sizes, being careful for vXi1
8028         // vectors.
8029         StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
8030       }
8031     }
8032   } else {
8033     Reg = State.AllocateReg(ArgGPRs);
8034   }
8035 
8036   unsigned StackOffset =
8037       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
8038 
8039   // If we reach this point and PendingLocs is non-empty, we must be at the
8040   // end of a split argument that must be passed indirectly.
8041   if (!PendingLocs.empty()) {
8042     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
8043     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
8044 
8045     for (auto &It : PendingLocs) {
8046       if (Reg)
8047         It.convertToReg(Reg);
8048       else
8049         It.convertToMem(StackOffset);
8050       State.addLoc(It);
8051     }
8052     PendingLocs.clear();
8053     PendingArgFlags.clear();
8054     return false;
8055   }
8056 
8057   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
8058           (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) &&
8059          "Expected an XLenVT or vector types at this stage");
8060 
8061   if (Reg) {
8062     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8063     return false;
8064   }
8065 
8066   // When a floating-point value is passed on the stack, no bit-conversion is
8067   // needed.
8068   if (ValVT.isFloatingPoint()) {
8069     LocVT = ValVT;
8070     LocInfo = CCValAssign::Full;
8071   }
8072   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8073   return false;
8074 }
8075 
8076 template <typename ArgTy>
8077 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
8078   for (const auto &ArgIdx : enumerate(Args)) {
8079     MVT ArgVT = ArgIdx.value().VT;
8080     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
8081       return ArgIdx.index();
8082   }
8083   return None;
8084 }
8085 
8086 void RISCVTargetLowering::analyzeInputArgs(
8087     MachineFunction &MF, CCState &CCInfo,
8088     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8089     RISCVCCAssignFn Fn) const {
8090   unsigned NumArgs = Ins.size();
8091   FunctionType *FType = MF.getFunction().getFunctionType();
8092 
8093   Optional<unsigned> FirstMaskArgument;
8094   if (Subtarget.hasStdExtV())
8095     FirstMaskArgument = preAssignMask(Ins);
8096 
8097   for (unsigned i = 0; i != NumArgs; ++i) {
8098     MVT ArgVT = Ins[i].VT;
8099     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
8100 
8101     Type *ArgTy = nullptr;
8102     if (IsRet)
8103       ArgTy = FType->getReturnType();
8104     else if (Ins[i].isOrigArg())
8105       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8106 
8107     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
8108     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
8109            ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
8110            FirstMaskArgument)) {
8111       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
8112                         << EVT(ArgVT).getEVTString() << '\n');
8113       llvm_unreachable(nullptr);
8114     }
8115   }
8116 }
8117 
8118 void RISCVTargetLowering::analyzeOutputArgs(
8119     MachineFunction &MF, CCState &CCInfo,
8120     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8121     CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
8122   unsigned NumArgs = Outs.size();
8123 
8124   Optional<unsigned> FirstMaskArgument;
8125   if (Subtarget.hasStdExtV())
8126     FirstMaskArgument = preAssignMask(Outs);
8127 
8128   for (unsigned i = 0; i != NumArgs; i++) {
8129     MVT ArgVT = Outs[i].VT;
8130     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
8131     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8132 
8133     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
8134     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
8135            ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
8136            FirstMaskArgument)) {
8137       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
8138                         << EVT(ArgVT).getEVTString() << "\n");
8139       llvm_unreachable(nullptr);
8140     }
8141   }
8142 }
8143 
8144 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8145 // values.
8146 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
8147                                    const CCValAssign &VA, const SDLoc &DL,
8148                                    const RISCVSubtarget &Subtarget) {
8149   switch (VA.getLocInfo()) {
8150   default:
8151     llvm_unreachable("Unexpected CCValAssign::LocInfo");
8152   case CCValAssign::Full:
8153     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
8154       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
8155     break;
8156   case CCValAssign::BCvt:
8157     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
8158       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
8159     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8160       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
8161     else
8162       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8163     break;
8164   }
8165   return Val;
8166 }
8167 
8168 // The caller is responsible for loading the full value if the argument is
8169 // passed with CCValAssign::Indirect.
8170 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
8171                                 const CCValAssign &VA, const SDLoc &DL,
8172                                 const RISCVTargetLowering &TLI) {
8173   MachineFunction &MF = DAG.getMachineFunction();
8174   MachineRegisterInfo &RegInfo = MF.getRegInfo();
8175   EVT LocVT = VA.getLocVT();
8176   SDValue Val;
8177   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8178   Register VReg = RegInfo.createVirtualRegister(RC);
8179   RegInfo.addLiveIn(VA.getLocReg(), VReg);
8180   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8181 
8182   if (VA.getLocInfo() == CCValAssign::Indirect)
8183     return Val;
8184 
8185   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
8186 }
8187 
8188 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
8189                                    const CCValAssign &VA, const SDLoc &DL,
8190                                    const RISCVSubtarget &Subtarget) {
8191   EVT LocVT = VA.getLocVT();
8192 
8193   switch (VA.getLocInfo()) {
8194   default:
8195     llvm_unreachable("Unexpected CCValAssign::LocInfo");
8196   case CCValAssign::Full:
8197     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
8198       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
8199     break;
8200   case CCValAssign::BCvt:
8201     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
8202       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
8203     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8204       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
8205     else
8206       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8207     break;
8208   }
8209   return Val;
8210 }
8211 
8212 // The caller is responsible for loading the full value if the argument is
8213 // passed with CCValAssign::Indirect.
8214 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
8215                                 const CCValAssign &VA, const SDLoc &DL) {
8216   MachineFunction &MF = DAG.getMachineFunction();
8217   MachineFrameInfo &MFI = MF.getFrameInfo();
8218   EVT LocVT = VA.getLocVT();
8219   EVT ValVT = VA.getValVT();
8220   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
8221   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8222                                  /*Immutable=*/true);
8223   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
8224   SDValue Val;
8225 
8226   ISD::LoadExtType ExtType;
8227   switch (VA.getLocInfo()) {
8228   default:
8229     llvm_unreachable("Unexpected CCValAssign::LocInfo");
8230   case CCValAssign::Full:
8231   case CCValAssign::Indirect:
8232   case CCValAssign::BCvt:
8233     ExtType = ISD::NON_EXTLOAD;
8234     break;
8235   }
8236   Val = DAG.getExtLoad(
8237       ExtType, DL, LocVT, Chain, FIN,
8238       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
8239   return Val;
8240 }
8241 
8242 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
8243                                        const CCValAssign &VA, const SDLoc &DL) {
8244   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8245          "Unexpected VA");
8246   MachineFunction &MF = DAG.getMachineFunction();
8247   MachineFrameInfo &MFI = MF.getFrameInfo();
8248   MachineRegisterInfo &RegInfo = MF.getRegInfo();
8249 
8250   if (VA.isMemLoc()) {
8251     // f64 is passed on the stack.
8252     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
8253     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8254     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
8255                        MachinePointerInfo::getFixedStack(MF, FI));
8256   }
8257 
8258   assert(VA.isRegLoc() && "Expected register VA assignment");
8259 
8260   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
8261   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8262   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8263   SDValue Hi;
8264   if (VA.getLocReg() == RISCV::X17) {
8265     // Second half of f64 is passed on the stack.
8266     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
8267     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8268     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8269                      MachinePointerInfo::getFixedStack(MF, FI));
8270   } else {
8271     // Second half of f64 is passed in another GPR.
8272     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
8273     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
8274     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8275   }
8276   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8277 }
8278 
8279 // FastCC has less than 1% performance improvement for some particular
8280 // benchmark. But theoretically, it may has benenfit for some cases.
8281 static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
8282                             unsigned ValNo, MVT ValVT, MVT LocVT,
8283                             CCValAssign::LocInfo LocInfo,
8284                             ISD::ArgFlagsTy ArgFlags, CCState &State,
8285                             bool IsFixed, bool IsRet, Type *OrigTy,
8286                             const RISCVTargetLowering &TLI,
8287                             Optional<unsigned> FirstMaskArgument) {
8288 
8289   // X5 and X6 might be used for save-restore libcall.
8290   static const MCPhysReg GPRList[] = {
8291       RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
8292       RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
8293       RISCV::X29, RISCV::X30, RISCV::X31};
8294 
8295   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8296     if (unsigned Reg = State.AllocateReg(GPRList)) {
8297       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8298       return false;
8299     }
8300   }
8301 
8302   if (LocVT == MVT::f16) {
8303     static const MCPhysReg FPR16List[] = {
8304         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
8305         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
8306         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
8307         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
8308     if (unsigned Reg = State.AllocateReg(FPR16List)) {
8309       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8310       return false;
8311     }
8312   }
8313 
8314   if (LocVT == MVT::f32) {
8315     static const MCPhysReg FPR32List[] = {
8316         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
8317         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
8318         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
8319         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
8320     if (unsigned Reg = State.AllocateReg(FPR32List)) {
8321       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8322       return false;
8323     }
8324   }
8325 
8326   if (LocVT == MVT::f64) {
8327     static const MCPhysReg FPR64List[] = {
8328         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
8329         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
8330         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
8331         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
8332     if (unsigned Reg = State.AllocateReg(FPR64List)) {
8333       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8334       return false;
8335     }
8336   }
8337 
8338   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
8339     unsigned Offset4 = State.AllocateStack(4, Align(4));
8340     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
8341     return false;
8342   }
8343 
8344   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
8345     unsigned Offset5 = State.AllocateStack(8, Align(8));
8346     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
8347     return false;
8348   }
8349 
8350   if (LocVT.isVector()) {
8351     if (unsigned Reg =
8352             allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
8353       // Fixed-length vectors are located in the corresponding scalable-vector
8354       // container types.
8355       if (ValVT.isFixedLengthVector())
8356         LocVT = TLI.getContainerForFixedLengthVector(LocVT);
8357       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8358     } else {
8359       // Try and pass the address via a "fast" GPR.
8360       if (unsigned GPRReg = State.AllocateReg(GPRList)) {
8361         LocInfo = CCValAssign::Indirect;
8362         LocVT = TLI.getSubtarget().getXLenVT();
8363         State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
8364       } else if (ValVT.isFixedLengthVector()) {
8365         auto StackAlign =
8366             MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
8367         unsigned StackOffset =
8368             State.AllocateStack(ValVT.getStoreSize(), StackAlign);
8369         State.addLoc(
8370             CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8371       } else {
8372         // Can't pass scalable vectors on the stack.
8373         return true;
8374       }
8375     }
8376 
8377     return false;
8378   }
8379 
8380   return true; // CC didn't match.
8381 }
8382 
8383 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8384                          CCValAssign::LocInfo LocInfo,
8385                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
8386 
8387   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8388     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
8389     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
8390     static const MCPhysReg GPRList[] = {
8391         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
8392         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
8393     if (unsigned Reg = State.AllocateReg(GPRList)) {
8394       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8395       return false;
8396     }
8397   }
8398 
8399   if (LocVT == MVT::f32) {
8400     // Pass in STG registers: F1, ..., F6
8401     //                        fs0 ... fs5
8402     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
8403                                           RISCV::F18_F, RISCV::F19_F,
8404                                           RISCV::F20_F, RISCV::F21_F};
8405     if (unsigned Reg = State.AllocateReg(FPR32List)) {
8406       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8407       return false;
8408     }
8409   }
8410 
8411   if (LocVT == MVT::f64) {
8412     // Pass in STG registers: D1, ..., D6
8413     //                        fs6 ... fs11
8414     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
8415                                           RISCV::F24_D, RISCV::F25_D,
8416                                           RISCV::F26_D, RISCV::F27_D};
8417     if (unsigned Reg = State.AllocateReg(FPR64List)) {
8418       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8419       return false;
8420     }
8421   }
8422 
8423   report_fatal_error("No registers left in GHC calling convention");
8424   return true;
8425 }
8426 
8427 // Transform physical registers into virtual registers.
8428 SDValue RISCVTargetLowering::LowerFormalArguments(
8429     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8430     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8431     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8432 
8433   MachineFunction &MF = DAG.getMachineFunction();
8434 
8435   switch (CallConv) {
8436   default:
8437     report_fatal_error("Unsupported calling convention");
8438   case CallingConv::C:
8439   case CallingConv::Fast:
8440     break;
8441   case CallingConv::GHC:
8442     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
8443         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
8444       report_fatal_error(
8445         "GHC calling convention requires the F and D instruction set extensions");
8446   }
8447 
8448   const Function &Func = MF.getFunction();
8449   if (Func.hasFnAttribute("interrupt")) {
8450     if (!Func.arg_empty())
8451       report_fatal_error(
8452         "Functions with the interrupt attribute cannot have arguments!");
8453 
8454     StringRef Kind =
8455       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
8456 
8457     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
8458       report_fatal_error(
8459         "Function interrupt attribute argument not supported!");
8460   }
8461 
8462   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8463   MVT XLenVT = Subtarget.getXLenVT();
8464   unsigned XLenInBytes = Subtarget.getXLen() / 8;
8465   // Used with vargs to acumulate store chains.
8466   std::vector<SDValue> OutChains;
8467 
8468   // Assign locations to all of the incoming arguments.
8469   SmallVector<CCValAssign, 16> ArgLocs;
8470   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8471 
8472   if (CallConv == CallingConv::GHC)
8473     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
8474   else
8475     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
8476                      CallConv == CallingConv::Fast ? CC_RISCV_FastCC
8477                                                    : CC_RISCV);
8478 
8479   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
8480     CCValAssign &VA = ArgLocs[i];
8481     SDValue ArgValue;
8482     // Passing f64 on RV32D with a soft float ABI must be handled as a special
8483     // case.
8484     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
8485       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
8486     else if (VA.isRegLoc())
8487       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
8488     else
8489       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8490 
8491     if (VA.getLocInfo() == CCValAssign::Indirect) {
8492       // If the original argument was split and passed by reference (e.g. i128
8493       // on RV32), we need to load all parts of it here (using the same
8494       // address). Vectors may be partly split to registers and partly to the
8495       // stack, in which case the base address is partly offset and subsequent
8496       // stores are relative to that.
8497       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8498                                    MachinePointerInfo()));
8499       unsigned ArgIndex = Ins[i].OrigArgIndex;
8500       unsigned ArgPartOffset = Ins[i].PartOffset;
8501       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
8502       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
8503         CCValAssign &PartVA = ArgLocs[i + 1];
8504         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
8505         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8506         if (PartVA.getValVT().isScalableVector())
8507           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
8508         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8509         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8510                                      MachinePointerInfo()));
8511         ++i;
8512       }
8513       continue;
8514     }
8515     InVals.push_back(ArgValue);
8516   }
8517 
8518   if (IsVarArg) {
8519     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
8520     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8521     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
8522     MachineFrameInfo &MFI = MF.getFrameInfo();
8523     MachineRegisterInfo &RegInfo = MF.getRegInfo();
8524     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
8525 
8526     // Offset of the first variable argument from stack pointer, and size of
8527     // the vararg save area. For now, the varargs save area is either zero or
8528     // large enough to hold a0-a7.
8529     int VaArgOffset, VarArgsSaveSize;
8530 
8531     // If all registers are allocated, then all varargs must be passed on the
8532     // stack and we don't need to save any argregs.
8533     if (ArgRegs.size() == Idx) {
8534       VaArgOffset = CCInfo.getNextStackOffset();
8535       VarArgsSaveSize = 0;
8536     } else {
8537       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
8538       VaArgOffset = -VarArgsSaveSize;
8539     }
8540 
8541     // Record the frame index of the first variable argument
8542     // which is a value necessary to VASTART.
8543     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
8544     RVFI->setVarArgsFrameIndex(FI);
8545 
8546     // If saving an odd number of registers then create an extra stack slot to
8547     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
8548     // offsets to even-numbered registered remain 2*XLEN-aligned.
8549     if (Idx % 2) {
8550       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
8551       VarArgsSaveSize += XLenInBytes;
8552     }
8553 
8554     // Copy the integer registers that may have been used for passing varargs
8555     // to the vararg save area.
8556     for (unsigned I = Idx; I < ArgRegs.size();
8557          ++I, VaArgOffset += XLenInBytes) {
8558       const Register Reg = RegInfo.createVirtualRegister(RC);
8559       RegInfo.addLiveIn(ArgRegs[I], Reg);
8560       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
8561       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
8562       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8563       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8564                                    MachinePointerInfo::getFixedStack(MF, FI));
8565       cast<StoreSDNode>(Store.getNode())
8566           ->getMemOperand()
8567           ->setValue((Value *)nullptr);
8568       OutChains.push_back(Store);
8569     }
8570     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
8571   }
8572 
8573   // All stores are grouped in one node to allow the matching between
8574   // the size of Ins and InVals. This only happens for vararg functions.
8575   if (!OutChains.empty()) {
8576     OutChains.push_back(Chain);
8577     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8578   }
8579 
8580   return Chain;
8581 }
8582 
8583 /// isEligibleForTailCallOptimization - Check whether the call is eligible
8584 /// for tail call optimization.
8585 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
8586 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
8587     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8588     const SmallVector<CCValAssign, 16> &ArgLocs) const {
8589 
8590   auto &Callee = CLI.Callee;
8591   auto CalleeCC = CLI.CallConv;
8592   auto &Outs = CLI.Outs;
8593   auto &Caller = MF.getFunction();
8594   auto CallerCC = Caller.getCallingConv();
8595 
8596   // Exception-handling functions need a special set of instructions to
8597   // indicate a return to the hardware. Tail-calling another function would
8598   // probably break this.
8599   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
8600   // should be expanded as new function attributes are introduced.
8601   if (Caller.hasFnAttribute("interrupt"))
8602     return false;
8603 
8604   // Do not tail call opt if the stack is used to pass parameters.
8605   if (CCInfo.getNextStackOffset() != 0)
8606     return false;
8607 
8608   // Do not tail call opt if any parameters need to be passed indirectly.
8609   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
8610   // passed indirectly. So the address of the value will be passed in a
8611   // register, or if not available, then the address is put on the stack. In
8612   // order to pass indirectly, space on the stack often needs to be allocated
8613   // in order to store the value. In this case the CCInfo.getNextStackOffset()
8614   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
8615   // are passed CCValAssign::Indirect.
8616   for (auto &VA : ArgLocs)
8617     if (VA.getLocInfo() == CCValAssign::Indirect)
8618       return false;
8619 
8620   // Do not tail call opt if either caller or callee uses struct return
8621   // semantics.
8622   auto IsCallerStructRet = Caller.hasStructRetAttr();
8623   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8624   if (IsCallerStructRet || IsCalleeStructRet)
8625     return false;
8626 
8627   // Externally-defined functions with weak linkage should not be
8628   // tail-called. The behaviour of branch instructions in this situation (as
8629   // used for tail calls) is implementation-defined, so we cannot rely on the
8630   // linker replacing the tail call with a return.
8631   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
8632     const GlobalValue *GV = G->getGlobal();
8633     if (GV->hasExternalWeakLinkage())
8634       return false;
8635   }
8636 
8637   // The callee has to preserve all registers the caller needs to preserve.
8638   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
8639   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8640   if (CalleeCC != CallerCC) {
8641     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8642     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8643       return false;
8644   }
8645 
8646   // Byval parameters hand the function a pointer directly into the stack area
8647   // we want to reuse during a tail call. Working around this *is* possible
8648   // but less efficient and uglier in LowerCall.
8649   for (auto &Arg : Outs)
8650     if (Arg.Flags.isByVal())
8651       return false;
8652 
8653   return true;
8654 }
8655 
8656 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
8657   return DAG.getDataLayout().getPrefTypeAlign(
8658       VT.getTypeForEVT(*DAG.getContext()));
8659 }
8660 
8661 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8662 // and output parameter nodes.
8663 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
8664                                        SmallVectorImpl<SDValue> &InVals) const {
8665   SelectionDAG &DAG = CLI.DAG;
8666   SDLoc &DL = CLI.DL;
8667   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
8668   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8669   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
8670   SDValue Chain = CLI.Chain;
8671   SDValue Callee = CLI.Callee;
8672   bool &IsTailCall = CLI.IsTailCall;
8673   CallingConv::ID CallConv = CLI.CallConv;
8674   bool IsVarArg = CLI.IsVarArg;
8675   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8676   MVT XLenVT = Subtarget.getXLenVT();
8677 
8678   MachineFunction &MF = DAG.getMachineFunction();
8679 
8680   // Analyze the operands of the call, assigning locations to each operand.
8681   SmallVector<CCValAssign, 16> ArgLocs;
8682   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8683 
8684   if (CallConv == CallingConv::GHC)
8685     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
8686   else
8687     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
8688                       CallConv == CallingConv::Fast ? CC_RISCV_FastCC
8689                                                     : CC_RISCV);
8690 
8691   // Check if it's really possible to do a tail call.
8692   if (IsTailCall)
8693     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8694 
8695   if (IsTailCall)
8696     ++NumTailCalls;
8697   else if (CLI.CB && CLI.CB->isMustTailCall())
8698     report_fatal_error("failed to perform tail call elimination on a call "
8699                        "site marked musttail");
8700 
8701   // Get a count of how many bytes are to be pushed on the stack.
8702   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
8703 
8704   // Create local copies for byval args
8705   SmallVector<SDValue, 8> ByValArgs;
8706   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8707     ISD::ArgFlagsTy Flags = Outs[i].Flags;
8708     if (!Flags.isByVal())
8709       continue;
8710 
8711     SDValue Arg = OutVals[i];
8712     unsigned Size = Flags.getByValSize();
8713     Align Alignment = Flags.getNonZeroByValAlign();
8714 
8715     int FI =
8716         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8717     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8718     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
8719 
8720     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8721                           /*IsVolatile=*/false,
8722                           /*AlwaysInline=*/false, IsTailCall,
8723                           MachinePointerInfo(), MachinePointerInfo());
8724     ByValArgs.push_back(FIPtr);
8725   }
8726 
8727   if (!IsTailCall)
8728     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8729 
8730   // Copy argument values to their designated locations.
8731   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
8732   SmallVector<SDValue, 8> MemOpChains;
8733   SDValue StackPtr;
8734   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
8735     CCValAssign &VA = ArgLocs[i];
8736     SDValue ArgValue = OutVals[i];
8737     ISD::ArgFlagsTy Flags = Outs[i].Flags;
8738 
8739     // Handle passing f64 on RV32D with a soft float ABI as a special case.
8740     bool IsF64OnRV32DSoftABI =
8741         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
8742     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
8743       SDValue SplitF64 = DAG.getNode(
8744           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8745       SDValue Lo = SplitF64.getValue(0);
8746       SDValue Hi = SplitF64.getValue(1);
8747 
8748       Register RegLo = VA.getLocReg();
8749       RegsToPass.push_back(std::make_pair(RegLo, Lo));
8750 
8751       if (RegLo == RISCV::X17) {
8752         // Second half of f64 is passed on the stack.
8753         // Work out the address of the stack slot.
8754         if (!StackPtr.getNode())
8755           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
8756         // Emit the store.
8757         MemOpChains.push_back(
8758             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
8759       } else {
8760         // Second half of f64 is passed in another GPR.
8761         assert(RegLo < RISCV::X31 && "Invalid register pair");
8762         Register RegHigh = RegLo + 1;
8763         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8764       }
8765       continue;
8766     }
8767 
8768     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
8769     // as any other MemLoc.
8770 
8771     // Promote the value if needed.
8772     // For now, only handle fully promoted and indirect arguments.
8773     if (VA.getLocInfo() == CCValAssign::Indirect) {
8774       // Store the argument in a stack slot and pass its address.
8775       Align StackAlign =
8776           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
8777                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
8778       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8779       // If the original argument was split (e.g. i128), we need
8780       // to store the required parts of it here (and pass just one address).
8781       // Vectors may be partly split to registers and partly to the stack, in
8782       // which case the base address is partly offset and subsequent stores are
8783       // relative to that.
8784       unsigned ArgIndex = Outs[i].OrigArgIndex;
8785       unsigned ArgPartOffset = Outs[i].PartOffset;
8786       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
8787       // Calculate the total size to store. We don't have access to what we're
8788       // actually storing other than performing the loop and collecting the
8789       // info.
8790       SmallVector<std::pair<SDValue, SDValue>> Parts;
8791       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
8792         SDValue PartValue = OutVals[i + 1];
8793         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
8794         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8795         EVT PartVT = PartValue.getValueType();
8796         if (PartVT.isScalableVector())
8797           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
8798         StoredSize += PartVT.getStoreSize();
8799         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8800         Parts.push_back(std::make_pair(PartValue, Offset));
8801         ++i;
8802       }
8803       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8804       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8805       MemOpChains.push_back(
8806           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8807                        MachinePointerInfo::getFixedStack(MF, FI)));
8808       for (const auto &Part : Parts) {
8809         SDValue PartValue = Part.first;
8810         SDValue PartOffset = Part.second;
8811         SDValue Address =
8812             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8813         MemOpChains.push_back(
8814             DAG.getStore(Chain, DL, PartValue, Address,
8815                          MachinePointerInfo::getFixedStack(MF, FI)));
8816       }
8817       ArgValue = SpillSlot;
8818     } else {
8819       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
8820     }
8821 
8822     // Use local copy if it is a byval arg.
8823     if (Flags.isByVal())
8824       ArgValue = ByValArgs[j++];
8825 
8826     if (VA.isRegLoc()) {
8827       // Queue up the argument copies and emit them at the end.
8828       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8829     } else {
8830       assert(VA.isMemLoc() && "Argument not register or memory");
8831       assert(!IsTailCall && "Tail call not allowed if stack is used "
8832                             "for passing parameters");
8833 
8834       // Work out the address of the stack slot.
8835       if (!StackPtr.getNode())
8836         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
8837       SDValue Address =
8838           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8839                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
8840 
8841       // Emit the store.
8842       MemOpChains.push_back(
8843           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8844     }
8845   }
8846 
8847   // Join the stores, which are independent of one another.
8848   if (!MemOpChains.empty())
8849     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8850 
8851   SDValue Glue;
8852 
8853   // Build a sequence of copy-to-reg nodes, chained and glued together.
8854   for (auto &Reg : RegsToPass) {
8855     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8856     Glue = Chain.getValue(1);
8857   }
8858 
8859   // Validate that none of the argument registers have been marked as
8860   // reserved, if so report an error. Do the same for the return address if this
8861   // is not a tailcall.
8862   validateCCReservedRegs(RegsToPass, MF);
8863   if (!IsTailCall &&
8864       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
8865     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
8866         MF.getFunction(),
8867         "Return address register required, but has been reserved."});
8868 
8869   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8870   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8871   // split it and then direct call can be matched by PseudoCALL.
8872   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
8873     const GlobalValue *GV = S->getGlobal();
8874 
8875     unsigned OpFlags = RISCVII::MO_CALL;
8876     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
8877       OpFlags = RISCVII::MO_PLT;
8878 
8879     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
8880   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8881     unsigned OpFlags = RISCVII::MO_CALL;
8882 
8883     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
8884                                                  nullptr))
8885       OpFlags = RISCVII::MO_PLT;
8886 
8887     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8888   }
8889 
8890   // The first call operand is the chain and the second is the target address.
8891   SmallVector<SDValue, 8> Ops;
8892   Ops.push_back(Chain);
8893   Ops.push_back(Callee);
8894 
8895   // Add argument registers to the end of the list so that they are
8896   // known live into the call.
8897   for (auto &Reg : RegsToPass)
8898     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8899 
8900   if (!IsTailCall) {
8901     // Add a register mask operand representing the call-preserved registers.
8902     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8903     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8904     assert(Mask && "Missing call preserved mask for calling convention");
8905     Ops.push_back(DAG.getRegisterMask(Mask));
8906   }
8907 
8908   // Glue the call to the argument copies, if any.
8909   if (Glue.getNode())
8910     Ops.push_back(Glue);
8911 
8912   // Emit the call.
8913   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8914 
8915   if (IsTailCall) {
8916     MF.getFrameInfo().setHasTailCall();
8917     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
8918   }
8919 
8920   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
8921   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8922   Glue = Chain.getValue(1);
8923 
8924   // Mark the end of the call, which is glued to the call itself.
8925   Chain = DAG.getCALLSEQ_END(Chain,
8926                              DAG.getConstant(NumBytes, DL, PtrVT, true),
8927                              DAG.getConstant(0, DL, PtrVT, true),
8928                              Glue, DL);
8929   Glue = Chain.getValue(1);
8930 
8931   // Assign locations to each value returned by this call.
8932   SmallVector<CCValAssign, 16> RVLocs;
8933   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8934   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
8935 
8936   // Copy all of the result registers out of their specified physreg.
8937   for (auto &VA : RVLocs) {
8938     // Copy the value out
8939     SDValue RetValue =
8940         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8941     // Glue the RetValue to the end of the call sequence
8942     Chain = RetValue.getValue(1);
8943     Glue = RetValue.getValue(2);
8944 
8945     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8946       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
8947       SDValue RetValue2 =
8948           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
8949       Chain = RetValue2.getValue(1);
8950       Glue = RetValue2.getValue(2);
8951       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
8952                              RetValue2);
8953     }
8954 
8955     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
8956 
8957     InVals.push_back(RetValue);
8958   }
8959 
8960   return Chain;
8961 }
8962 
8963 bool RISCVTargetLowering::CanLowerReturn(
8964     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8965     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
8966   SmallVector<CCValAssign, 16> RVLocs;
8967   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8968 
8969   Optional<unsigned> FirstMaskArgument;
8970   if (Subtarget.hasStdExtV())
8971     FirstMaskArgument = preAssignMask(Outs);
8972 
8973   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8974     MVT VT = Outs[i].VT;
8975     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
8976     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
8977     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
8978                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
8979                  *this, FirstMaskArgument))
8980       return false;
8981   }
8982   return true;
8983 }
8984 
8985 SDValue
8986 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
8987                                  bool IsVarArg,
8988                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
8989                                  const SmallVectorImpl<SDValue> &OutVals,
8990                                  const SDLoc &DL, SelectionDAG &DAG) const {
8991   const MachineFunction &MF = DAG.getMachineFunction();
8992   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
8993 
8994   // Stores the assignment of the return value to a location.
8995   SmallVector<CCValAssign, 16> RVLocs;
8996 
8997   // Info about the registers and stack slot.
8998   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8999                  *DAG.getContext());
9000 
9001   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
9002                     nullptr, CC_RISCV);
9003 
9004   if (CallConv == CallingConv::GHC && !RVLocs.empty())
9005     report_fatal_error("GHC functions return void only");
9006 
9007   SDValue Glue;
9008   SmallVector<SDValue, 4> RetOps(1, Chain);
9009 
9010   // Copy the result values into the output registers.
9011   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
9012     SDValue Val = OutVals[i];
9013     CCValAssign &VA = RVLocs[i];
9014     assert(VA.isRegLoc() && "Can only return in registers!");
9015 
9016     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9017       // Handle returning f64 on RV32D with a soft float ABI.
9018       assert(VA.isRegLoc() && "Expected return via registers");
9019       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
9020                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
9021       SDValue Lo = SplitF64.getValue(0);
9022       SDValue Hi = SplitF64.getValue(1);
9023       Register RegLo = VA.getLocReg();
9024       assert(RegLo < RISCV::X31 && "Invalid register pair");
9025       Register RegHi = RegLo + 1;
9026 
9027       if (STI.isRegisterReservedByUser(RegLo) ||
9028           STI.isRegisterReservedByUser(RegHi))
9029         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
9030             MF.getFunction(),
9031             "Return value register required, but has been reserved."});
9032 
9033       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
9034       Glue = Chain.getValue(1);
9035       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
9036       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
9037       Glue = Chain.getValue(1);
9038       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
9039     } else {
9040       // Handle a 'normal' return.
9041       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
9042       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
9043 
9044       if (STI.isRegisterReservedByUser(VA.getLocReg()))
9045         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
9046             MF.getFunction(),
9047             "Return value register required, but has been reserved."});
9048 
9049       // Guarantee that all emitted copies are stuck together.
9050       Glue = Chain.getValue(1);
9051       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
9052     }
9053   }
9054 
9055   RetOps[0] = Chain; // Update chain.
9056 
9057   // Add the glue node if we have it.
9058   if (Glue.getNode()) {
9059     RetOps.push_back(Glue);
9060   }
9061 
9062   unsigned RetOpc = RISCVISD::RET_FLAG;
9063   // Interrupt service routines use different return instructions.
9064   const Function &Func = DAG.getMachineFunction().getFunction();
9065   if (Func.hasFnAttribute("interrupt")) {
9066     if (!Func.getReturnType()->isVoidTy())
9067       report_fatal_error(
9068           "Functions with the interrupt attribute must have void return type!");
9069 
9070     MachineFunction &MF = DAG.getMachineFunction();
9071     StringRef Kind =
9072       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
9073 
9074     if (Kind == "user")
9075       RetOpc = RISCVISD::URET_FLAG;
9076     else if (Kind == "supervisor")
9077       RetOpc = RISCVISD::SRET_FLAG;
9078     else
9079       RetOpc = RISCVISD::MRET_FLAG;
9080   }
9081 
9082   return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
9083 }
9084 
9085 void RISCVTargetLowering::validateCCReservedRegs(
9086     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
9087     MachineFunction &MF) const {
9088   const Function &F = MF.getFunction();
9089   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
9090 
9091   if (llvm::any_of(Regs, [&STI](auto Reg) {
9092         return STI.isRegisterReservedByUser(Reg.first);
9093       }))
9094     F.getContext().diagnose(DiagnosticInfoUnsupported{
9095         F, "Argument register required, but has been reserved."});
9096 }
9097 
9098 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
9099   return CI->isTailCall();
9100 }
9101 
9102 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
9103 #define NODE_NAME_CASE(NODE)                                                   \
9104   case RISCVISD::NODE:                                                         \
9105     return "RISCVISD::" #NODE;
9106   // clang-format off
9107   switch ((RISCVISD::NodeType)Opcode) {
9108   case RISCVISD::FIRST_NUMBER:
9109     break;
9110   NODE_NAME_CASE(RET_FLAG)
9111   NODE_NAME_CASE(URET_FLAG)
9112   NODE_NAME_CASE(SRET_FLAG)
9113   NODE_NAME_CASE(MRET_FLAG)
9114   NODE_NAME_CASE(CALL)
9115   NODE_NAME_CASE(SELECT_CC)
9116   NODE_NAME_CASE(BR_CC)
9117   NODE_NAME_CASE(BuildPairF64)
9118   NODE_NAME_CASE(SplitF64)
9119   NODE_NAME_CASE(TAIL)
9120   NODE_NAME_CASE(MULHSU)
9121   NODE_NAME_CASE(SLLW)
9122   NODE_NAME_CASE(SRAW)
9123   NODE_NAME_CASE(SRLW)
9124   NODE_NAME_CASE(DIVW)
9125   NODE_NAME_CASE(DIVUW)
9126   NODE_NAME_CASE(REMUW)
9127   NODE_NAME_CASE(ROLW)
9128   NODE_NAME_CASE(RORW)
9129   NODE_NAME_CASE(CLZW)
9130   NODE_NAME_CASE(CTZW)
9131   NODE_NAME_CASE(FSLW)
9132   NODE_NAME_CASE(FSRW)
9133   NODE_NAME_CASE(FSL)
9134   NODE_NAME_CASE(FSR)
9135   NODE_NAME_CASE(FMV_H_X)
9136   NODE_NAME_CASE(FMV_X_ANYEXTH)
9137   NODE_NAME_CASE(FMV_W_X_RV64)
9138   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
9139   NODE_NAME_CASE(FCVT_X_RTZ)
9140   NODE_NAME_CASE(FCVT_XU_RTZ)
9141   NODE_NAME_CASE(FCVT_W_RTZ_RV64)
9142   NODE_NAME_CASE(FCVT_WU_RTZ_RV64)
9143   NODE_NAME_CASE(READ_CYCLE_WIDE)
9144   NODE_NAME_CASE(GREV)
9145   NODE_NAME_CASE(GREVW)
9146   NODE_NAME_CASE(GORC)
9147   NODE_NAME_CASE(GORCW)
9148   NODE_NAME_CASE(SHFL)
9149   NODE_NAME_CASE(SHFLW)
9150   NODE_NAME_CASE(UNSHFL)
9151   NODE_NAME_CASE(UNSHFLW)
9152   NODE_NAME_CASE(BCOMPRESS)
9153   NODE_NAME_CASE(BCOMPRESSW)
9154   NODE_NAME_CASE(BDECOMPRESS)
9155   NODE_NAME_CASE(BDECOMPRESSW)
9156   NODE_NAME_CASE(VMV_V_X_VL)
9157   NODE_NAME_CASE(VFMV_V_F_VL)
9158   NODE_NAME_CASE(VMV_X_S)
9159   NODE_NAME_CASE(VMV_S_X_VL)
9160   NODE_NAME_CASE(VFMV_S_F_VL)
9161   NODE_NAME_CASE(SPLAT_VECTOR_I64)
9162   NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
9163   NODE_NAME_CASE(READ_VLENB)
9164   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
9165   NODE_NAME_CASE(VSLIDEUP_VL)
9166   NODE_NAME_CASE(VSLIDE1UP_VL)
9167   NODE_NAME_CASE(VSLIDEDOWN_VL)
9168   NODE_NAME_CASE(VSLIDE1DOWN_VL)
9169   NODE_NAME_CASE(VID_VL)
9170   NODE_NAME_CASE(VFNCVT_ROD_VL)
9171   NODE_NAME_CASE(VECREDUCE_ADD_VL)
9172   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
9173   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
9174   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
9175   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
9176   NODE_NAME_CASE(VECREDUCE_AND_VL)
9177   NODE_NAME_CASE(VECREDUCE_OR_VL)
9178   NODE_NAME_CASE(VECREDUCE_XOR_VL)
9179   NODE_NAME_CASE(VECREDUCE_FADD_VL)
9180   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
9181   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
9182   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
9183   NODE_NAME_CASE(ADD_VL)
9184   NODE_NAME_CASE(AND_VL)
9185   NODE_NAME_CASE(MUL_VL)
9186   NODE_NAME_CASE(OR_VL)
9187   NODE_NAME_CASE(SDIV_VL)
9188   NODE_NAME_CASE(SHL_VL)
9189   NODE_NAME_CASE(SREM_VL)
9190   NODE_NAME_CASE(SRA_VL)
9191   NODE_NAME_CASE(SRL_VL)
9192   NODE_NAME_CASE(SUB_VL)
9193   NODE_NAME_CASE(UDIV_VL)
9194   NODE_NAME_CASE(UREM_VL)
9195   NODE_NAME_CASE(XOR_VL)
9196   NODE_NAME_CASE(SADDSAT_VL)
9197   NODE_NAME_CASE(UADDSAT_VL)
9198   NODE_NAME_CASE(SSUBSAT_VL)
9199   NODE_NAME_CASE(USUBSAT_VL)
9200   NODE_NAME_CASE(FADD_VL)
9201   NODE_NAME_CASE(FSUB_VL)
9202   NODE_NAME_CASE(FMUL_VL)
9203   NODE_NAME_CASE(FDIV_VL)
9204   NODE_NAME_CASE(FNEG_VL)
9205   NODE_NAME_CASE(FABS_VL)
9206   NODE_NAME_CASE(FSQRT_VL)
9207   NODE_NAME_CASE(FMA_VL)
9208   NODE_NAME_CASE(FCOPYSIGN_VL)
9209   NODE_NAME_CASE(SMIN_VL)
9210   NODE_NAME_CASE(SMAX_VL)
9211   NODE_NAME_CASE(UMIN_VL)
9212   NODE_NAME_CASE(UMAX_VL)
9213   NODE_NAME_CASE(FMINNUM_VL)
9214   NODE_NAME_CASE(FMAXNUM_VL)
9215   NODE_NAME_CASE(MULHS_VL)
9216   NODE_NAME_CASE(MULHU_VL)
9217   NODE_NAME_CASE(FP_TO_SINT_VL)
9218   NODE_NAME_CASE(FP_TO_UINT_VL)
9219   NODE_NAME_CASE(SINT_TO_FP_VL)
9220   NODE_NAME_CASE(UINT_TO_FP_VL)
9221   NODE_NAME_CASE(FP_EXTEND_VL)
9222   NODE_NAME_CASE(FP_ROUND_VL)
9223   NODE_NAME_CASE(VWMUL_VL)
9224   NODE_NAME_CASE(VWMULU_VL)
9225   NODE_NAME_CASE(SETCC_VL)
9226   NODE_NAME_CASE(VSELECT_VL)
9227   NODE_NAME_CASE(VMAND_VL)
9228   NODE_NAME_CASE(VMOR_VL)
9229   NODE_NAME_CASE(VMXOR_VL)
9230   NODE_NAME_CASE(VMCLR_VL)
9231   NODE_NAME_CASE(VMSET_VL)
9232   NODE_NAME_CASE(VRGATHER_VX_VL)
9233   NODE_NAME_CASE(VRGATHER_VV_VL)
9234   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
9235   NODE_NAME_CASE(VSEXT_VL)
9236   NODE_NAME_CASE(VZEXT_VL)
9237   NODE_NAME_CASE(VPOPC_VL)
9238   NODE_NAME_CASE(VLE_VL)
9239   NODE_NAME_CASE(VSE_VL)
9240   NODE_NAME_CASE(READ_CSR)
9241   NODE_NAME_CASE(WRITE_CSR)
9242   NODE_NAME_CASE(SWAP_CSR)
9243   }
9244   // clang-format on
9245   return nullptr;
9246 #undef NODE_NAME_CASE
9247 }
9248 
9249 /// getConstraintType - Given a constraint letter, return the type of
9250 /// constraint it is for this target.
9251 RISCVTargetLowering::ConstraintType
9252 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
9253   if (Constraint.size() == 1) {
9254     switch (Constraint[0]) {
9255     default:
9256       break;
9257     case 'f':
9258       return C_RegisterClass;
9259     case 'I':
9260     case 'J':
9261     case 'K':
9262       return C_Immediate;
9263     case 'A':
9264       return C_Memory;
9265     case 'S': // A symbolic address
9266       return C_Other;
9267     }
9268   } else {
9269     if (Constraint == "vr" || Constraint == "vm")
9270       return C_RegisterClass;
9271   }
9272   return TargetLowering::getConstraintType(Constraint);
9273 }
9274 
9275 std::pair<unsigned, const TargetRegisterClass *>
9276 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
9277                                                   StringRef Constraint,
9278                                                   MVT VT) const {
9279   // First, see if this is a constraint that directly corresponds to a
9280   // RISCV register class.
9281   if (Constraint.size() == 1) {
9282     switch (Constraint[0]) {
9283     case 'r':
9284       return std::make_pair(0U, &RISCV::GPRRegClass);
9285     case 'f':
9286       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
9287         return std::make_pair(0U, &RISCV::FPR16RegClass);
9288       if (Subtarget.hasStdExtF() && VT == MVT::f32)
9289         return std::make_pair(0U, &RISCV::FPR32RegClass);
9290       if (Subtarget.hasStdExtD() && VT == MVT::f64)
9291         return std::make_pair(0U, &RISCV::FPR64RegClass);
9292       break;
9293     default:
9294       break;
9295     }
9296   } else {
9297     if (Constraint == "vr") {
9298       for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
9299                              &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
9300         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
9301           return std::make_pair(0U, RC);
9302       }
9303     } else if (Constraint == "vm") {
9304       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
9305         return std::make_pair(0U, &RISCV::VMRegClass);
9306     }
9307   }
9308 
9309   // Clang will correctly decode the usage of register name aliases into their
9310   // official names. However, other frontends like `rustc` do not. This allows
9311   // users of these frontends to use the ABI names for registers in LLVM-style
9312   // register constraints.
9313   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
9314                                .Case("{zero}", RISCV::X0)
9315                                .Case("{ra}", RISCV::X1)
9316                                .Case("{sp}", RISCV::X2)
9317                                .Case("{gp}", RISCV::X3)
9318                                .Case("{tp}", RISCV::X4)
9319                                .Case("{t0}", RISCV::X5)
9320                                .Case("{t1}", RISCV::X6)
9321                                .Case("{t2}", RISCV::X7)
9322                                .Cases("{s0}", "{fp}", RISCV::X8)
9323                                .Case("{s1}", RISCV::X9)
9324                                .Case("{a0}", RISCV::X10)
9325                                .Case("{a1}", RISCV::X11)
9326                                .Case("{a2}", RISCV::X12)
9327                                .Case("{a3}", RISCV::X13)
9328                                .Case("{a4}", RISCV::X14)
9329                                .Case("{a5}", RISCV::X15)
9330                                .Case("{a6}", RISCV::X16)
9331                                .Case("{a7}", RISCV::X17)
9332                                .Case("{s2}", RISCV::X18)
9333                                .Case("{s3}", RISCV::X19)
9334                                .Case("{s4}", RISCV::X20)
9335                                .Case("{s5}", RISCV::X21)
9336                                .Case("{s6}", RISCV::X22)
9337                                .Case("{s7}", RISCV::X23)
9338                                .Case("{s8}", RISCV::X24)
9339                                .Case("{s9}", RISCV::X25)
9340                                .Case("{s10}", RISCV::X26)
9341                                .Case("{s11}", RISCV::X27)
9342                                .Case("{t3}", RISCV::X28)
9343                                .Case("{t4}", RISCV::X29)
9344                                .Case("{t5}", RISCV::X30)
9345                                .Case("{t6}", RISCV::X31)
9346                                .Default(RISCV::NoRegister);
9347   if (XRegFromAlias != RISCV::NoRegister)
9348     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
9349 
9350   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
9351   // TableGen record rather than the AsmName to choose registers for InlineAsm
9352   // constraints, plus we want to match those names to the widest floating point
9353   // register type available, manually select floating point registers here.
9354   //
9355   // The second case is the ABI name of the register, so that frontends can also
9356   // use the ABI names in register constraint lists.
9357   if (Subtarget.hasStdExtF()) {
9358     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
9359                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
9360                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
9361                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
9362                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
9363                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
9364                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
9365                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
9366                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
9367                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
9368                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
9369                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
9370                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
9371                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
9372                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
9373                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
9374                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
9375                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
9376                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
9377                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
9378                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
9379                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
9380                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
9381                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
9382                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
9383                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
9384                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
9385                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
9386                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
9387                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
9388                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
9389                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
9390                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
9391                         .Default(RISCV::NoRegister);
9392     if (FReg != RISCV::NoRegister) {
9393       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
9394       if (Subtarget.hasStdExtD()) {
9395         unsigned RegNo = FReg - RISCV::F0_F;
9396         unsigned DReg = RISCV::F0_D + RegNo;
9397         return std::make_pair(DReg, &RISCV::FPR64RegClass);
9398       }
9399       return std::make_pair(FReg, &RISCV::FPR32RegClass);
9400     }
9401   }
9402 
9403   if (Subtarget.hasStdExtV()) {
9404     Register VReg = StringSwitch<Register>(Constraint.lower())
9405                         .Case("{v0}", RISCV::V0)
9406                         .Case("{v1}", RISCV::V1)
9407                         .Case("{v2}", RISCV::V2)
9408                         .Case("{v3}", RISCV::V3)
9409                         .Case("{v4}", RISCV::V4)
9410                         .Case("{v5}", RISCV::V5)
9411                         .Case("{v6}", RISCV::V6)
9412                         .Case("{v7}", RISCV::V7)
9413                         .Case("{v8}", RISCV::V8)
9414                         .Case("{v9}", RISCV::V9)
9415                         .Case("{v10}", RISCV::V10)
9416                         .Case("{v11}", RISCV::V11)
9417                         .Case("{v12}", RISCV::V12)
9418                         .Case("{v13}", RISCV::V13)
9419                         .Case("{v14}", RISCV::V14)
9420                         .Case("{v15}", RISCV::V15)
9421                         .Case("{v16}", RISCV::V16)
9422                         .Case("{v17}", RISCV::V17)
9423                         .Case("{v18}", RISCV::V18)
9424                         .Case("{v19}", RISCV::V19)
9425                         .Case("{v20}", RISCV::V20)
9426                         .Case("{v21}", RISCV::V21)
9427                         .Case("{v22}", RISCV::V22)
9428                         .Case("{v23}", RISCV::V23)
9429                         .Case("{v24}", RISCV::V24)
9430                         .Case("{v25}", RISCV::V25)
9431                         .Case("{v26}", RISCV::V26)
9432                         .Case("{v27}", RISCV::V27)
9433                         .Case("{v28}", RISCV::V28)
9434                         .Case("{v29}", RISCV::V29)
9435                         .Case("{v30}", RISCV::V30)
9436                         .Case("{v31}", RISCV::V31)
9437                         .Default(RISCV::NoRegister);
9438     if (VReg != RISCV::NoRegister) {
9439       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
9440         return std::make_pair(VReg, &RISCV::VMRegClass);
9441       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
9442         return std::make_pair(VReg, &RISCV::VRRegClass);
9443       for (const auto *RC :
9444            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
9445         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
9446           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
9447           return std::make_pair(VReg, RC);
9448         }
9449       }
9450     }
9451   }
9452 
9453   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9454 }
9455 
9456 unsigned
9457 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
9458   // Currently only support length 1 constraints.
9459   if (ConstraintCode.size() == 1) {
9460     switch (ConstraintCode[0]) {
9461     case 'A':
9462       return InlineAsm::Constraint_A;
9463     default:
9464       break;
9465     }
9466   }
9467 
9468   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
9469 }
9470 
9471 void RISCVTargetLowering::LowerAsmOperandForConstraint(
9472     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
9473     SelectionDAG &DAG) const {
9474   // Currently only support length 1 constraints.
9475   if (Constraint.length() == 1) {
9476     switch (Constraint[0]) {
9477     case 'I':
9478       // Validate & create a 12-bit signed immediate operand.
9479       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9480         uint64_t CVal = C->getSExtValue();
9481         if (isInt<12>(CVal))
9482           Ops.push_back(
9483               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
9484       }
9485       return;
9486     case 'J':
9487       // Validate & create an integer zero operand.
9488       if (auto *C = dyn_cast<ConstantSDNode>(Op))
9489         if (C->getZExtValue() == 0)
9490           Ops.push_back(
9491               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
9492       return;
9493     case 'K':
9494       // Validate & create a 5-bit unsigned immediate operand.
9495       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9496         uint64_t CVal = C->getZExtValue();
9497         if (isUInt<5>(CVal))
9498           Ops.push_back(
9499               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
9500       }
9501       return;
9502     case 'S':
9503       if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
9504         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
9505                                                  GA->getValueType(0)));
9506       } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
9507         Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
9508                                                 BA->getValueType(0)));
9509       }
9510       return;
9511     default:
9512       break;
9513     }
9514   }
9515   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
9516 }
9517 
9518 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
9519                                                    Instruction *Inst,
9520                                                    AtomicOrdering Ord) const {
9521   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
9522     return Builder.CreateFence(Ord);
9523   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
9524     return Builder.CreateFence(AtomicOrdering::Release);
9525   return nullptr;
9526 }
9527 
9528 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
9529                                                     Instruction *Inst,
9530                                                     AtomicOrdering Ord) const {
9531   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
9532     return Builder.CreateFence(AtomicOrdering::Acquire);
9533   return nullptr;
9534 }
9535 
9536 TargetLowering::AtomicExpansionKind
9537 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
9538   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
9539   // point operations can't be used in an lr/sc sequence without breaking the
9540   // forward-progress guarantee.
9541   if (AI->isFloatingPointOperation())
9542     return AtomicExpansionKind::CmpXChg;
9543 
9544   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9545   if (Size == 8 || Size == 16)
9546     return AtomicExpansionKind::MaskedIntrinsic;
9547   return AtomicExpansionKind::None;
9548 }
9549 
9550 static Intrinsic::ID
9551 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
9552   if (XLen == 32) {
9553     switch (BinOp) {
9554     default:
9555       llvm_unreachable("Unexpected AtomicRMW BinOp");
9556     case AtomicRMWInst::Xchg:
9557       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
9558     case AtomicRMWInst::Add:
9559       return Intrinsic::riscv_masked_atomicrmw_add_i32;
9560     case AtomicRMWInst::Sub:
9561       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
9562     case AtomicRMWInst::Nand:
9563       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
9564     case AtomicRMWInst::Max:
9565       return Intrinsic::riscv_masked_atomicrmw_max_i32;
9566     case AtomicRMWInst::Min:
9567       return Intrinsic::riscv_masked_atomicrmw_min_i32;
9568     case AtomicRMWInst::UMax:
9569       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
9570     case AtomicRMWInst::UMin:
9571       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
9572     }
9573   }
9574 
9575   if (XLen == 64) {
9576     switch (BinOp) {
9577     default:
9578       llvm_unreachable("Unexpected AtomicRMW BinOp");
9579     case AtomicRMWInst::Xchg:
9580       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
9581     case AtomicRMWInst::Add:
9582       return Intrinsic::riscv_masked_atomicrmw_add_i64;
9583     case AtomicRMWInst::Sub:
9584       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
9585     case AtomicRMWInst::Nand:
9586       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
9587     case AtomicRMWInst::Max:
9588       return Intrinsic::riscv_masked_atomicrmw_max_i64;
9589     case AtomicRMWInst::Min:
9590       return Intrinsic::riscv_masked_atomicrmw_min_i64;
9591     case AtomicRMWInst::UMax:
9592       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
9593     case AtomicRMWInst::UMin:
9594       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
9595     }
9596   }
9597 
9598   llvm_unreachable("Unexpected XLen\n");
9599 }
9600 
9601 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
9602     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9603     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9604   unsigned XLen = Subtarget.getXLen();
9605   Value *Ordering =
9606       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
9607   Type *Tys[] = {AlignedAddr->getType()};
9608   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
9609       AI->getModule(),
9610       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
9611 
9612   if (XLen == 64) {
9613     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9614     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9615     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9616   }
9617 
9618   Value *Result;
9619 
9620   // Must pass the shift amount needed to sign extend the loaded value prior
9621   // to performing a signed comparison for min/max. ShiftAmt is the number of
9622   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
9623   // is the number of bits to left+right shift the value in order to
9624   // sign-extend.
9625   if (AI->getOperation() == AtomicRMWInst::Min ||
9626       AI->getOperation() == AtomicRMWInst::Max) {
9627     const DataLayout &DL = AI->getModule()->getDataLayout();
9628     unsigned ValWidth =
9629         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9630     Value *SextShamt =
9631         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
9632     Result = Builder.CreateCall(LrwOpScwLoop,
9633                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9634   } else {
9635     Result =
9636         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9637   }
9638 
9639   if (XLen == 64)
9640     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9641   return Result;
9642 }
9643 
9644 TargetLowering::AtomicExpansionKind
9645 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
9646     AtomicCmpXchgInst *CI) const {
9647   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
9648   if (Size == 8 || Size == 16)
9649     return AtomicExpansionKind::MaskedIntrinsic;
9650   return AtomicExpansionKind::None;
9651 }
9652 
9653 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
9654     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9655     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9656   unsigned XLen = Subtarget.getXLen();
9657   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
9658   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
9659   if (XLen == 64) {
9660     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9661     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9662     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9663     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
9664   }
9665   Type *Tys[] = {AlignedAddr->getType()};
9666   Function *MaskedCmpXchg =
9667       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
9668   Value *Result = Builder.CreateCall(
9669       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
9670   if (XLen == 64)
9671     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9672   return Result;
9673 }
9674 
9675 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
9676   return false;
9677 }
9678 
9679 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
9680                                                      EVT VT) const {
9681   VT = VT.getScalarType();
9682 
9683   if (!VT.isSimple())
9684     return false;
9685 
9686   switch (VT.getSimpleVT().SimpleTy) {
9687   case MVT::f16:
9688     return Subtarget.hasStdExtZfh();
9689   case MVT::f32:
9690     return Subtarget.hasStdExtF();
9691   case MVT::f64:
9692     return Subtarget.hasStdExtD();
9693   default:
9694     break;
9695   }
9696 
9697   return false;
9698 }
9699 
9700 Register RISCVTargetLowering::getExceptionPointerRegister(
9701     const Constant *PersonalityFn) const {
9702   return RISCV::X10;
9703 }
9704 
9705 Register RISCVTargetLowering::getExceptionSelectorRegister(
9706     const Constant *PersonalityFn) const {
9707   return RISCV::X11;
9708 }
9709 
9710 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
9711   // Return false to suppress the unnecessary extensions if the LibCall
9712   // arguments or return value is f32 type for LP64 ABI.
9713   RISCVABI::ABI ABI = Subtarget.getTargetABI();
9714   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
9715     return false;
9716 
9717   return true;
9718 }
9719 
9720 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
9721   if (Subtarget.is64Bit() && Type == MVT::i32)
9722     return true;
9723 
9724   return IsSigned;
9725 }
9726 
9727 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
9728                                                  SDValue C) const {
9729   // Check integral scalar types.
9730   if (VT.isScalarInteger()) {
9731     // Omit the optimization if the sub target has the M extension and the data
9732     // size exceeds XLen.
9733     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
9734       return false;
9735     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9736       // Break the MUL to a SLLI and an ADD/SUB.
9737       const APInt &Imm = ConstNode->getAPIntValue();
9738       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9739           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9740         return true;
9741       // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
9742       if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
9743           ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9744            (Imm - 8).isPowerOf2()))
9745         return true;
9746       // Omit the following optimization if the sub target has the M extension
9747       // and the data size >= XLen.
9748       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
9749         return false;
9750       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
9751       // a pair of LUI/ADDI.
9752       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
9753         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
9754         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
9755             (1 - ImmS).isPowerOf2())
9756         return true;
9757       }
9758     }
9759   }
9760 
9761   return false;
9762 }
9763 
9764 bool RISCVTargetLowering::isMulAddWithConstProfitable(
9765     const SDValue &AddNode, const SDValue &ConstNode) const {
9766   // Let the DAGCombiner decide for vectors.
9767   EVT VT = AddNode.getValueType();
9768   if (VT.isVector())
9769     return true;
9770 
9771   // Let the DAGCombiner decide for larger types.
9772   if (VT.getScalarSizeInBits() > Subtarget.getXLen())
9773     return true;
9774 
9775   // It is worse if c1 is simm12 while c1*c2 is not.
9776   ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
9777   ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
9778   const APInt &C1 = C1Node->getAPIntValue();
9779   const APInt &C2 = C2Node->getAPIntValue();
9780   if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
9781     return false;
9782 
9783   // Default to true and let the DAGCombiner decide.
9784   return true;
9785 }
9786 
9787 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
9788     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
9789     bool *Fast) const {
9790   if (!VT.isVector())
9791     return false;
9792 
9793   EVT ElemVT = VT.getVectorElementType();
9794   if (Alignment >= ElemVT.getStoreSize()) {
9795     if (Fast)
9796       *Fast = true;
9797     return true;
9798   }
9799 
9800   return false;
9801 }
9802 
9803 bool RISCVTargetLowering::splitValueIntoRegisterParts(
9804     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9805     unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
9806   bool IsABIRegCopy = CC.hasValue();
9807   EVT ValueVT = Val.getValueType();
9808   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
9809     // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
9810     // and cast to f32.
9811     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9812     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9813     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9814                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9815     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9816     Parts[0] = Val;
9817     return true;
9818   }
9819 
9820   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
9821     LLVMContext &Context = *DAG.getContext();
9822     EVT ValueEltVT = ValueVT.getVectorElementType();
9823     EVT PartEltVT = PartVT.getVectorElementType();
9824     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
9825     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
9826     if (PartVTBitSize % ValueVTBitSize == 0) {
9827       // If the element types are different, bitcast to the same element type of
9828       // PartVT first.
9829       if (ValueEltVT != PartEltVT) {
9830         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
9831         assert(Count != 0 && "The number of element should not be zero.");
9832         EVT SameEltTypeVT =
9833             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
9834         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
9835       }
9836       Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
9837                         Val, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
9838       Parts[0] = Val;
9839       return true;
9840     }
9841   }
9842   return false;
9843 }
9844 
9845 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
9846     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9847     MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
9848   bool IsABIRegCopy = CC.hasValue();
9849   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
9850     SDValue Val = Parts[0];
9851 
9852     // Cast the f32 to i32, truncate to i16, and cast back to f16.
9853     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9854     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9855     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
9856     return Val;
9857   }
9858 
9859   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
9860     LLVMContext &Context = *DAG.getContext();
9861     SDValue Val = Parts[0];
9862     EVT ValueEltVT = ValueVT.getVectorElementType();
9863     EVT PartEltVT = PartVT.getVectorElementType();
9864     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
9865     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
9866     if (PartVTBitSize % ValueVTBitSize == 0) {
9867       EVT SameEltTypeVT = ValueVT;
9868       // If the element types are different, convert it to the same element type
9869       // of PartVT.
9870       if (ValueEltVT != PartEltVT) {
9871         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
9872         assert(Count != 0 && "The number of element should not be zero.");
9873         SameEltTypeVT =
9874             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
9875       }
9876       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val,
9877                         DAG.getConstant(0, DL, Subtarget.getXLenVT()));
9878       if (ValueEltVT != PartEltVT)
9879         Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9880       return Val;
9881     }
9882   }
9883   return SDValue();
9884 }
9885 
9886 #define GET_REGISTER_MATCHER
9887 #include "RISCVGenAsmMatcher.inc"
9888 
9889 Register
9890 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
9891                                        const MachineFunction &MF) const {
9892   Register Reg = MatchRegisterAltName(RegName);
9893   if (Reg == RISCV::NoRegister)
9894     Reg = MatchRegisterName(RegName);
9895   if (Reg == RISCV::NoRegister)
9896     report_fatal_error(
9897         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
9898   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9899   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
9900     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9901                              StringRef(RegName) + "\"."));
9902   return Reg;
9903 }
9904 
9905 namespace llvm {
9906 namespace RISCVVIntrinsicsTable {
9907 
9908 #define GET_RISCVVIntrinsicsTable_IMPL
9909 #include "RISCVGenSearchableTables.inc"
9910 
9911 } // namespace RISCVVIntrinsicsTable
9912 
9913 } // namespace llvm
9914