1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtZfh())
87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88   if (Subtarget.hasStdExtF())
89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90   if (Subtarget.hasStdExtD())
91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93   static const MVT::SimpleValueType BoolVecVTs[] = {
94       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
95       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
96   static const MVT::SimpleValueType IntVecVTs[] = {
97       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
98       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
99       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
100       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
101       MVT::nxv4i64, MVT::nxv8i64};
102   static const MVT::SimpleValueType F16VecVTs[] = {
103       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
104       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
105   static const MVT::SimpleValueType F32VecVTs[] = {
106       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
107   static const MVT::SimpleValueType F64VecVTs[] = {
108       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
109 
110   if (Subtarget.hasStdExtV()) {
111     auto addRegClassForRVV = [this](MVT VT) {
112       unsigned Size = VT.getSizeInBits().getKnownMinValue();
113       assert(Size <= 512 && isPowerOf2_32(Size));
114       const TargetRegisterClass *RC;
115       if (Size <= 64)
116         RC = &RISCV::VRRegClass;
117       else if (Size == 128)
118         RC = &RISCV::VRM2RegClass;
119       else if (Size == 256)
120         RC = &RISCV::VRM4RegClass;
121       else
122         RC = &RISCV::VRM8RegClass;
123 
124       addRegisterClass(VT, RC);
125     };
126 
127     for (MVT VT : BoolVecVTs)
128       addRegClassForRVV(VT);
129     for (MVT VT : IntVecVTs)
130       addRegClassForRVV(VT);
131 
132     if (Subtarget.hasStdExtZfh())
133       for (MVT VT : F16VecVTs)
134         addRegClassForRVV(VT);
135 
136     if (Subtarget.hasStdExtF())
137       for (MVT VT : F32VecVTs)
138         addRegClassForRVV(VT);
139 
140     if (Subtarget.hasStdExtD())
141       for (MVT VT : F64VecVTs)
142         addRegClassForRVV(VT);
143 
144     if (Subtarget.useRVVForFixedLengthVectors()) {
145       auto addRegClassForFixedVectors = [this](MVT VT) {
146         unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
147         const TargetRegisterClass *RC;
148         if (LMul == 1)
149           RC = &RISCV::VRRegClass;
150         else if (LMul == 2)
151           RC = &RISCV::VRM2RegClass;
152         else if (LMul == 4)
153           RC = &RISCV::VRM4RegClass;
154         else if (LMul == 8)
155           RC = &RISCV::VRM8RegClass;
156         else
157           llvm_unreachable("Unexpected LMul!");
158 
159         addRegisterClass(VT, RC);
160       };
161       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
162         if (useRVVForFixedLengthVectorVT(VT))
163           addRegClassForFixedVectors(VT);
164 
165       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
166         if (useRVVForFixedLengthVectorVT(VT))
167           addRegClassForFixedVectors(VT);
168     }
169   }
170 
171   // Compute derived properties from the register classes.
172   computeRegisterProperties(STI.getRegisterInfo());
173 
174   setStackPointerRegisterToSaveRestore(RISCV::X2);
175 
176   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
177     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
178 
179   // TODO: add all necessary setOperationAction calls.
180   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
181 
182   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
183   setOperationAction(ISD::BR_CC, XLenVT, Expand);
184   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
185 
186   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
187   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
188 
189   setOperationAction(ISD::VASTART, MVT::Other, Custom);
190   setOperationAction(ISD::VAARG, MVT::Other, Expand);
191   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
192   setOperationAction(ISD::VAEND, MVT::Other, Expand);
193 
194   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
195   if (!Subtarget.hasStdExtZbb()) {
196     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
197     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
198   }
199 
200   if (Subtarget.is64Bit()) {
201     setOperationAction(ISD::ADD, MVT::i32, Custom);
202     setOperationAction(ISD::SUB, MVT::i32, Custom);
203     setOperationAction(ISD::SHL, MVT::i32, Custom);
204     setOperationAction(ISD::SRA, MVT::i32, Custom);
205     setOperationAction(ISD::SRL, MVT::i32, Custom);
206   }
207 
208   if (!Subtarget.hasStdExtM()) {
209     setOperationAction(ISD::MUL, XLenVT, Expand);
210     setOperationAction(ISD::MULHS, XLenVT, Expand);
211     setOperationAction(ISD::MULHU, XLenVT, Expand);
212     setOperationAction(ISD::SDIV, XLenVT, Expand);
213     setOperationAction(ISD::UDIV, XLenVT, Expand);
214     setOperationAction(ISD::SREM, XLenVT, Expand);
215     setOperationAction(ISD::UREM, XLenVT, Expand);
216   }
217 
218   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
219     setOperationAction(ISD::MUL, MVT::i32, Custom);
220 
221     setOperationAction(ISD::SDIV, MVT::i8, Custom);
222     setOperationAction(ISD::UDIV, MVT::i8, Custom);
223     setOperationAction(ISD::UREM, MVT::i8, Custom);
224     setOperationAction(ISD::SDIV, MVT::i16, Custom);
225     setOperationAction(ISD::UDIV, MVT::i16, Custom);
226     setOperationAction(ISD::UREM, MVT::i16, Custom);
227     setOperationAction(ISD::SDIV, MVT::i32, Custom);
228     setOperationAction(ISD::UDIV, MVT::i32, Custom);
229     setOperationAction(ISD::UREM, MVT::i32, Custom);
230   }
231 
232   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
233   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
234   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
235   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
236 
237   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
238   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
239   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
240 
241   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
242     if (Subtarget.is64Bit()) {
243       setOperationAction(ISD::ROTL, MVT::i32, Custom);
244       setOperationAction(ISD::ROTR, MVT::i32, Custom);
245     }
246   } else {
247     setOperationAction(ISD::ROTL, XLenVT, Expand);
248     setOperationAction(ISD::ROTR, XLenVT, Expand);
249   }
250 
251   if (Subtarget.hasStdExtZbp()) {
252     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
253     // more combining.
254     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
255     setOperationAction(ISD::BSWAP, XLenVT, Custom);
256 
257     if (Subtarget.is64Bit()) {
258       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
259       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
260     }
261   } else {
262     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
263     // pattern match it directly in isel.
264     setOperationAction(ISD::BSWAP, XLenVT,
265                        Subtarget.hasStdExtZbb() ? Legal : Expand);
266   }
267 
268   if (Subtarget.hasStdExtZbb()) {
269     setOperationAction(ISD::SMIN, XLenVT, Legal);
270     setOperationAction(ISD::SMAX, XLenVT, Legal);
271     setOperationAction(ISD::UMIN, XLenVT, Legal);
272     setOperationAction(ISD::UMAX, XLenVT, Legal);
273   } else {
274     setOperationAction(ISD::CTTZ, XLenVT, Expand);
275     setOperationAction(ISD::CTLZ, XLenVT, Expand);
276     setOperationAction(ISD::CTPOP, XLenVT, Expand);
277   }
278 
279   if (Subtarget.hasStdExtZbt()) {
280     setOperationAction(ISD::FSHL, XLenVT, Custom);
281     setOperationAction(ISD::FSHR, XLenVT, Custom);
282     setOperationAction(ISD::SELECT, XLenVT, Legal);
283 
284     if (Subtarget.is64Bit()) {
285       setOperationAction(ISD::FSHL, MVT::i32, Custom);
286       setOperationAction(ISD::FSHR, MVT::i32, Custom);
287     }
288   } else {
289     setOperationAction(ISD::SELECT, XLenVT, Custom);
290   }
291 
292   ISD::CondCode FPCCToExpand[] = {
293       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
294       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
295       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
296 
297   ISD::NodeType FPOpToExpand[] = {
298       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
299       ISD::FP_TO_FP16};
300 
301   if (Subtarget.hasStdExtZfh())
302     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
303 
304   if (Subtarget.hasStdExtZfh()) {
305     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
306     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
307     for (auto CC : FPCCToExpand)
308       setCondCodeAction(CC, MVT::f16, Expand);
309     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
310     setOperationAction(ISD::SELECT, MVT::f16, Custom);
311     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
312     for (auto Op : FPOpToExpand)
313       setOperationAction(Op, MVT::f16, Expand);
314   }
315 
316   if (Subtarget.hasStdExtF()) {
317     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
318     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
319     for (auto CC : FPCCToExpand)
320       setCondCodeAction(CC, MVT::f32, Expand);
321     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
322     setOperationAction(ISD::SELECT, MVT::f32, Custom);
323     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
324     for (auto Op : FPOpToExpand)
325       setOperationAction(Op, MVT::f32, Expand);
326     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
327     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
328   }
329 
330   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
331     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
332 
333   if (Subtarget.hasStdExtD()) {
334     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
335     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
336     for (auto CC : FPCCToExpand)
337       setCondCodeAction(CC, MVT::f64, Expand);
338     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
339     setOperationAction(ISD::SELECT, MVT::f64, Custom);
340     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
341     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
342     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
343     for (auto Op : FPOpToExpand)
344       setOperationAction(Op, MVT::f64, Expand);
345     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
346     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
347   }
348 
349   if (Subtarget.is64Bit()) {
350     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
351     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
352     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
353     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
354   }
355 
356   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
357   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
358   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
359   setOperationAction(ISD::JumpTable, XLenVT, Custom);
360 
361   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
362 
363   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
364   // Unfortunately this can't be determined just from the ISA naming string.
365   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
366                      Subtarget.is64Bit() ? Legal : Custom);
367 
368   setOperationAction(ISD::TRAP, MVT::Other, Legal);
369   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
370   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
371 
372   if (Subtarget.hasStdExtA()) {
373     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
374     setMinCmpXchgSizeInBits(32);
375   } else {
376     setMaxAtomicSizeInBitsSupported(0);
377   }
378 
379   setBooleanContents(ZeroOrOneBooleanContent);
380 
381   if (Subtarget.hasStdExtV()) {
382     setBooleanVectorContents(ZeroOrOneBooleanContent);
383 
384     setOperationAction(ISD::VSCALE, XLenVT, Custom);
385 
386     // RVV intrinsics may have illegal operands.
387     // We also need to custom legalize vmv.x.s.
388     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
389     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
390     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
391     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
392     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
393     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
394 
395     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
396 
397     if (Subtarget.is64Bit()) {
398       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
399       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
400     } else {
401       // We must custom-lower certain vXi64 operations on RV32 due to the vector
402       // element type being illegal.
403       setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
404       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
405       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
406 
407       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
408       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
409       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
410       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
411       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
412       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
413       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
414       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
415     }
416 
417     for (MVT VT : BoolVecVTs) {
418       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
419 
420       // Mask VTs are custom-expanded into a series of standard nodes
421       setOperationAction(ISD::TRUNCATE, VT, Custom);
422       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
423       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
424     }
425 
426     for (MVT VT : IntVecVTs) {
427       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
428 
429       setOperationAction(ISD::SMIN, VT, Legal);
430       setOperationAction(ISD::SMAX, VT, Legal);
431       setOperationAction(ISD::UMIN, VT, Legal);
432       setOperationAction(ISD::UMAX, VT, Legal);
433 
434       if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64)
435         setOperationAction(ISD::ABS, VT, Custom);
436 
437       setOperationAction(ISD::ROTL, VT, Expand);
438       setOperationAction(ISD::ROTR, VT, Expand);
439 
440       // Custom-lower extensions and truncations from/to mask types.
441       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
442       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
443       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
444 
445       // RVV has native int->float & float->int conversions where the
446       // element type sizes are within one power-of-two of each other. Any
447       // wider distances between type sizes have to be lowered as sequences
448       // which progressively narrow the gap in stages.
449       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
450       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
451       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
452       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
453 
454       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
455       // nodes which truncate by one power of two at a time.
456       setOperationAction(ISD::TRUNCATE, VT, Custom);
457 
458       // Custom-lower insert/extract operations to simplify patterns.
459       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
460       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
461 
462       // Custom-lower reduction operations to set up the corresponding custom
463       // nodes' operands.
464       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
465       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
466       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
467       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
468       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
469       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
470       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
471       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
472 
473       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
474       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
475 
476       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
477     }
478 
479     // Expand various CCs to best match the RVV ISA, which natively supports UNE
480     // but no other unordered comparisons, and supports all ordered comparisons
481     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
482     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
483     // and we pattern-match those back to the "original", swapping operands once
484     // more. This way we catch both operations and both "vf" and "fv" forms with
485     // fewer patterns.
486     ISD::CondCode VFPCCToExpand[] = {
487         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
488         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
489         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
490     };
491 
492     // Sets common operation actions on RVV floating-point vector types.
493     const auto SetCommonVFPActions = [&](MVT VT) {
494       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
495       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
496       // sizes are within one power-of-two of each other. Therefore conversions
497       // between vXf16 and vXf64 must be lowered as sequences which convert via
498       // vXf32.
499       setOperationAction(ISD::FP_ROUND, VT, Custom);
500       setOperationAction(ISD::FP_EXTEND, VT, Custom);
501       // Custom-lower insert/extract operations to simplify patterns.
502       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
503       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
504       // Expand various condition codes (explained above).
505       for (auto CC : VFPCCToExpand)
506         setCondCodeAction(CC, VT, Expand);
507 
508       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
509       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
510       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
511 
512       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
513       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
514 
515       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
516     };
517 
518     if (Subtarget.hasStdExtZfh())
519       for (MVT VT : F16VecVTs)
520         SetCommonVFPActions(VT);
521 
522     if (Subtarget.hasStdExtF())
523       for (MVT VT : F32VecVTs)
524         SetCommonVFPActions(VT);
525 
526     if (Subtarget.hasStdExtD())
527       for (MVT VT : F64VecVTs)
528         SetCommonVFPActions(VT);
529 
530     if (Subtarget.useRVVForFixedLengthVectors()) {
531       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
532         if (!useRVVForFixedLengthVectorVT(VT))
533           continue;
534 
535         // By default everything must be expanded.
536         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
537           setOperationAction(Op, VT, Expand);
538         for (MVT OtherVT : MVT::fixedlen_vector_valuetypes())
539           setTruncStoreAction(VT, OtherVT, Expand);
540 
541         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
542         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
543         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
544 
545         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
546         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
547 
548         setOperationAction(ISD::LOAD, VT, Custom);
549         setOperationAction(ISD::STORE, VT, Custom);
550 
551         setOperationAction(ISD::SETCC, VT, Custom);
552 
553         setOperationAction(ISD::TRUNCATE, VT, Custom);
554 
555         // Operations below are different for between masks and other vectors.
556         if (VT.getVectorElementType() == MVT::i1) {
557           setOperationAction(ISD::AND, VT, Custom);
558           setOperationAction(ISD::OR, VT, Custom);
559           setOperationAction(ISD::XOR, VT, Custom);
560           continue;
561         }
562 
563         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
564         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
565         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
566 
567         setOperationAction(ISD::ADD, VT, Custom);
568         setOperationAction(ISD::MUL, VT, Custom);
569         setOperationAction(ISD::SUB, VT, Custom);
570         setOperationAction(ISD::AND, VT, Custom);
571         setOperationAction(ISD::OR, VT, Custom);
572         setOperationAction(ISD::XOR, VT, Custom);
573         setOperationAction(ISD::SDIV, VT, Custom);
574         setOperationAction(ISD::SREM, VT, Custom);
575         setOperationAction(ISD::UDIV, VT, Custom);
576         setOperationAction(ISD::UREM, VT, Custom);
577         setOperationAction(ISD::SHL, VT, Custom);
578         setOperationAction(ISD::SRA, VT, Custom);
579         setOperationAction(ISD::SRL, VT, Custom);
580 
581         setOperationAction(ISD::SMIN, VT, Custom);
582         setOperationAction(ISD::SMAX, VT, Custom);
583         setOperationAction(ISD::UMIN, VT, Custom);
584         setOperationAction(ISD::UMAX, VT, Custom);
585         setOperationAction(ISD::ABS,  VT, Custom);
586 
587         setOperationAction(ISD::MULHS, VT, Custom);
588         setOperationAction(ISD::MULHU, VT, Custom);
589 
590         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
591         setOperationAction(ISD::UINT_TO_FP, VT, Custom);
592         setOperationAction(ISD::FP_TO_SINT, VT, Custom);
593         setOperationAction(ISD::FP_TO_UINT, VT, Custom);
594 
595         setOperationAction(ISD::VSELECT, VT, Custom);
596 
597         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
598         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
599         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
600 
601         setOperationAction(ISD::BITCAST, VT, Custom);
602 
603         // Custom-lower reduction operations to set up the corresponding custom
604         // nodes' operands.
605         setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
606         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
607         setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
608         setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
609         setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
610         setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
611         setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
612         setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
613       }
614 
615       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
616         if (!useRVVForFixedLengthVectorVT(VT))
617           continue;
618 
619         // By default everything must be expanded.
620         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
621           setOperationAction(Op, VT, Expand);
622         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
623           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
624           setTruncStoreAction(VT, OtherVT, Expand);
625         }
626 
627         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
628         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
629         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
630 
631         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
632         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
633         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
634         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
635 
636         setOperationAction(ISD::LOAD, VT, Custom);
637         setOperationAction(ISD::STORE, VT, Custom);
638         setOperationAction(ISD::FADD, VT, Custom);
639         setOperationAction(ISD::FSUB, VT, Custom);
640         setOperationAction(ISD::FMUL, VT, Custom);
641         setOperationAction(ISD::FDIV, VT, Custom);
642         setOperationAction(ISD::FNEG, VT, Custom);
643         setOperationAction(ISD::FABS, VT, Custom);
644         setOperationAction(ISD::FSQRT, VT, Custom);
645         setOperationAction(ISD::FMA, VT, Custom);
646 
647         setOperationAction(ISD::FP_ROUND, VT, Custom);
648         setOperationAction(ISD::FP_EXTEND, VT, Custom);
649 
650         for (auto CC : VFPCCToExpand)
651           setCondCodeAction(CC, VT, Expand);
652 
653         setOperationAction(ISD::VSELECT, VT, Custom);
654 
655         setOperationAction(ISD::BITCAST, VT, Custom);
656 
657         setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
658         setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
659       }
660     }
661   }
662 
663   // Function alignments.
664   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
665   setMinFunctionAlignment(FunctionAlignment);
666   setPrefFunctionAlignment(FunctionAlignment);
667 
668   setMinimumJumpTableEntries(5);
669 
670   // Jumps are expensive, compared to logic
671   setJumpIsExpensive();
672 
673   // We can use any register for comparisons
674   setHasMultipleConditionRegisters();
675 
676   setTargetDAGCombine(ISD::SETCC);
677   if (Subtarget.hasStdExtZbp()) {
678     setTargetDAGCombine(ISD::OR);
679   }
680   if (Subtarget.hasStdExtV())
681     setTargetDAGCombine(ISD::FCOPYSIGN);
682 }
683 
684 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
685                                             LLVMContext &Context,
686                                             EVT VT) const {
687   if (!VT.isVector())
688     return getPointerTy(DL);
689   if (Subtarget.hasStdExtV() &&
690       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
691     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
692   return VT.changeVectorElementTypeToInteger();
693 }
694 
695 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
696                                              const CallInst &I,
697                                              MachineFunction &MF,
698                                              unsigned Intrinsic) const {
699   switch (Intrinsic) {
700   default:
701     return false;
702   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
703   case Intrinsic::riscv_masked_atomicrmw_add_i32:
704   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
705   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
706   case Intrinsic::riscv_masked_atomicrmw_max_i32:
707   case Intrinsic::riscv_masked_atomicrmw_min_i32:
708   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
709   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
710   case Intrinsic::riscv_masked_cmpxchg_i32:
711     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
712     Info.opc = ISD::INTRINSIC_W_CHAIN;
713     Info.memVT = MVT::getVT(PtrTy->getElementType());
714     Info.ptrVal = I.getArgOperand(0);
715     Info.offset = 0;
716     Info.align = Align(4);
717     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
718                  MachineMemOperand::MOVolatile;
719     return true;
720   }
721 }
722 
723 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
724                                                 const AddrMode &AM, Type *Ty,
725                                                 unsigned AS,
726                                                 Instruction *I) const {
727   // No global is ever allowed as a base.
728   if (AM.BaseGV)
729     return false;
730 
731   // Require a 12-bit signed offset.
732   if (!isInt<12>(AM.BaseOffs))
733     return false;
734 
735   switch (AM.Scale) {
736   case 0: // "r+i" or just "i", depending on HasBaseReg.
737     break;
738   case 1:
739     if (!AM.HasBaseReg) // allow "r+i".
740       break;
741     return false; // disallow "r+r" or "r+r+i".
742   default:
743     return false;
744   }
745 
746   return true;
747 }
748 
749 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
750   return isInt<12>(Imm);
751 }
752 
753 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
754   return isInt<12>(Imm);
755 }
756 
757 // On RV32, 64-bit integers are split into their high and low parts and held
758 // in two different registers, so the trunc is free since the low register can
759 // just be used.
760 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
761   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
762     return false;
763   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
764   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
765   return (SrcBits == 64 && DestBits == 32);
766 }
767 
768 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
769   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
770       !SrcVT.isInteger() || !DstVT.isInteger())
771     return false;
772   unsigned SrcBits = SrcVT.getSizeInBits();
773   unsigned DestBits = DstVT.getSizeInBits();
774   return (SrcBits == 64 && DestBits == 32);
775 }
776 
777 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
778   // Zexts are free if they can be combined with a load.
779   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
780     EVT MemVT = LD->getMemoryVT();
781     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
782          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
783         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
784          LD->getExtensionType() == ISD::ZEXTLOAD))
785       return true;
786   }
787 
788   return TargetLowering::isZExtFree(Val, VT2);
789 }
790 
791 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
792   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
793 }
794 
795 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
796   return Subtarget.hasStdExtZbb();
797 }
798 
799 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
800   return Subtarget.hasStdExtZbb();
801 }
802 
803 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
804                                        bool ForCodeSize) const {
805   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
806     return false;
807   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
808     return false;
809   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
810     return false;
811   if (Imm.isNegZero())
812     return false;
813   return Imm.isZero();
814 }
815 
816 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
817   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
818          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
819          (VT == MVT::f64 && Subtarget.hasStdExtD());
820 }
821 
822 // Changes the condition code and swaps operands if necessary, so the SetCC
823 // operation matches one of the comparisons supported directly in the RISC-V
824 // ISA.
825 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
826   switch (CC) {
827   default:
828     break;
829   case ISD::SETGT:
830   case ISD::SETLE:
831   case ISD::SETUGT:
832   case ISD::SETULE:
833     CC = ISD::getSetCCSwappedOperands(CC);
834     std::swap(LHS, RHS);
835     break;
836   }
837 }
838 
839 // Return the RISC-V branch opcode that matches the given DAG integer
840 // condition code. The CondCode must be one of those supported by the RISC-V
841 // ISA (see normaliseSetCC).
842 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
843   switch (CC) {
844   default:
845     llvm_unreachable("Unsupported CondCode");
846   case ISD::SETEQ:
847     return RISCV::BEQ;
848   case ISD::SETNE:
849     return RISCV::BNE;
850   case ISD::SETLT:
851     return RISCV::BLT;
852   case ISD::SETGE:
853     return RISCV::BGE;
854   case ISD::SETULT:
855     return RISCV::BLTU;
856   case ISD::SETUGE:
857     return RISCV::BGEU;
858   }
859 }
860 
861 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) {
862   assert(VT.isScalableVector() && "Expecting a scalable vector type");
863   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
864   if (VT.getVectorElementType() == MVT::i1)
865     KnownSize *= 8;
866 
867   switch (KnownSize) {
868   default:
869     llvm_unreachable("Invalid LMUL.");
870   case 8:
871     return RISCVVLMUL::LMUL_F8;
872   case 16:
873     return RISCVVLMUL::LMUL_F4;
874   case 32:
875     return RISCVVLMUL::LMUL_F2;
876   case 64:
877     return RISCVVLMUL::LMUL_1;
878   case 128:
879     return RISCVVLMUL::LMUL_2;
880   case 256:
881     return RISCVVLMUL::LMUL_4;
882   case 512:
883     return RISCVVLMUL::LMUL_8;
884   }
885 }
886 
887 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) {
888   switch (LMul) {
889   default:
890     llvm_unreachable("Invalid LMUL.");
891   case RISCVVLMUL::LMUL_F8:
892   case RISCVVLMUL::LMUL_F4:
893   case RISCVVLMUL::LMUL_F2:
894   case RISCVVLMUL::LMUL_1:
895     return RISCV::VRRegClassID;
896   case RISCVVLMUL::LMUL_2:
897     return RISCV::VRM2RegClassID;
898   case RISCVVLMUL::LMUL_4:
899     return RISCV::VRM4RegClassID;
900   case RISCVVLMUL::LMUL_8:
901     return RISCV::VRM8RegClassID;
902   }
903 }
904 
905 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
906   RISCVVLMUL LMUL = getLMUL(VT);
907   if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 ||
908       LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) {
909     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
910                   "Unexpected subreg numbering");
911     return RISCV::sub_vrm1_0 + Index;
912   }
913   if (LMUL == RISCVVLMUL::LMUL_2) {
914     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
915                   "Unexpected subreg numbering");
916     return RISCV::sub_vrm2_0 + Index;
917   }
918   if (LMUL == RISCVVLMUL::LMUL_4) {
919     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
920                   "Unexpected subreg numbering");
921     return RISCV::sub_vrm4_0 + Index;
922   }
923   llvm_unreachable("Invalid vector type.");
924 }
925 
926 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
927   if (VT.getVectorElementType() == MVT::i1)
928     return RISCV::VRRegClassID;
929   return getRegClassIDForLMUL(getLMUL(VT));
930 }
931 
932 // Attempt to decompose a subvector insert/extract between VecVT and
933 // SubVecVT via subregister indices. Returns the subregister index that
934 // can perform the subvector insert/extract with the given element index, as
935 // well as the index corresponding to any leftover subvectors that must be
936 // further inserted/extracted within the register class for SubVecVT.
937 std::pair<unsigned, unsigned>
938 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
939     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
940     const RISCVRegisterInfo *TRI) {
941   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
942                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
943                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
944                 "Register classes not ordered");
945   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
946   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
947   // Try to compose a subregister index that takes us from the incoming
948   // LMUL>1 register class down to the outgoing one. At each step we half
949   // the LMUL:
950   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
951   // Note that this is not guaranteed to find a subregister index, such as
952   // when we are extracting from one VR type to another.
953   unsigned SubRegIdx = RISCV::NoSubRegister;
954   for (const unsigned RCID :
955        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
956     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
957       VecVT = VecVT.getHalfNumVectorElementsVT();
958       bool IsHi =
959           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
960       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
961                                             getSubregIndexByMVT(VecVT, IsHi));
962       if (IsHi)
963         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
964     }
965   return {SubRegIdx, InsertExtractIdx};
966 }
967 
968 // Return the largest legal scalable vector type that matches VT's element type.
969 MVT RISCVTargetLowering::getContainerForFixedLengthVector(
970     const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) {
971   assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) &&
972          "Expected legal fixed length vector!");
973 
974   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
975   assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
976 
977   MVT EltVT = VT.getVectorElementType();
978   switch (EltVT.SimpleTy) {
979   default:
980     llvm_unreachable("unexpected element type for RVV container");
981   case MVT::i1: {
982     // Masks are calculated assuming 8-bit elements since that's when we need
983     // the most elements.
984     unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8;
985     return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock);
986   }
987   case MVT::i8:
988   case MVT::i16:
989   case MVT::i32:
990   case MVT::i64:
991   case MVT::f16:
992   case MVT::f32:
993   case MVT::f64: {
994     unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits();
995     return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock);
996   }
997   }
998 }
999 
1000 MVT RISCVTargetLowering::getContainerForFixedLengthVector(
1001     SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) {
1002   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1003                                           Subtarget);
1004 }
1005 
1006 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1007   return getContainerForFixedLengthVector(*this, VT, getSubtarget());
1008 }
1009 
1010 // Grow V to consume an entire RVV register.
1011 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1012                                        const RISCVSubtarget &Subtarget) {
1013   assert(VT.isScalableVector() &&
1014          "Expected to convert into a scalable vector!");
1015   assert(V.getValueType().isFixedLengthVector() &&
1016          "Expected a fixed length vector operand!");
1017   SDLoc DL(V);
1018   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1019   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1020 }
1021 
1022 // Shrink V so it's just big enough to maintain a VT's worth of data.
1023 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1024                                          const RISCVSubtarget &Subtarget) {
1025   assert(VT.isFixedLengthVector() &&
1026          "Expected to convert into a fixed length vector!");
1027   assert(V.getValueType().isScalableVector() &&
1028          "Expected a scalable vector operand!");
1029   SDLoc DL(V);
1030   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1031   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1032 }
1033 
1034 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1035 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1036 // the vector type that it is contained in.
1037 static std::pair<SDValue, SDValue>
1038 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1039                 const RISCVSubtarget &Subtarget) {
1040   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1041   MVT XLenVT = Subtarget.getXLenVT();
1042   SDValue VL = VecVT.isFixedLengthVector()
1043                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1044                    : DAG.getRegister(RISCV::X0, XLenVT);
1045   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1046   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1047   return {Mask, VL};
1048 }
1049 
1050 // As above but assuming the given type is a scalable vector type.
1051 static std::pair<SDValue, SDValue>
1052 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1053                         const RISCVSubtarget &Subtarget) {
1054   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1055   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1056 }
1057 
1058 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1059 // of either is (currently) supported. This can get us into an infinite loop
1060 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1061 // as a ..., etc.
1062 // Until either (or both) of these can reliably lower any node, reporting that
1063 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1064 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1065 // which is not desirable.
1066 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1067     EVT VT, unsigned DefinedValues) const {
1068   return false;
1069 }
1070 
1071 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
1072                                  const RISCVSubtarget &Subtarget) {
1073   MVT VT = Op.getSimpleValueType();
1074   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1075 
1076   MVT ContainerVT =
1077       RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget);
1078 
1079   SDLoc DL(Op);
1080   SDValue Mask, VL;
1081   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1082 
1083   if (VT.getVectorElementType() == MVT::i1) {
1084     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1085       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1086       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1087     }
1088 
1089     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1090       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1091       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1092     }
1093 
1094     return SDValue();
1095   }
1096 
1097   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1098     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1099                                         : RISCVISD::VMV_V_X_VL;
1100     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1101     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1102   }
1103 
1104   // Try and match an index sequence, which we can lower directly to the vid
1105   // instruction. An all-undef vector is matched by getSplatValue, above.
1106   if (VT.isInteger()) {
1107     bool IsVID = true;
1108     for (unsigned i = 0, e = Op.getNumOperands(); i < e && IsVID; i++)
1109       IsVID &= Op.getOperand(i).isUndef() ||
1110                (isa<ConstantSDNode>(Op.getOperand(i)) &&
1111                 Op.getConstantOperandVal(i) == i);
1112 
1113     if (IsVID) {
1114       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1115       return convertFromScalableVector(VT, VID, DAG, Subtarget);
1116     }
1117   }
1118 
1119   return SDValue();
1120 }
1121 
1122 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
1123                                    const RISCVSubtarget &Subtarget) {
1124   SDValue V1 = Op.getOperand(0);
1125   SDLoc DL(Op);
1126   MVT VT = Op.getSimpleValueType();
1127   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
1128 
1129   if (SVN->isSplat()) {
1130     int Lane = SVN->getSplatIndex();
1131     if (Lane >= 0) {
1132       MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector(
1133           DAG, VT, Subtarget);
1134 
1135       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
1136       assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!");
1137 
1138       SDValue Mask, VL;
1139       std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1140       MVT XLenVT = Subtarget.getXLenVT();
1141       SDValue Gather =
1142           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
1143                       DAG.getConstant(Lane, DL, XLenVT), Mask, VL);
1144       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1145     }
1146   }
1147 
1148   return SDValue();
1149 }
1150 
1151 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
1152                                      SDLoc DL, SelectionDAG &DAG,
1153                                      const RISCVSubtarget &Subtarget) {
1154   if (VT.isScalableVector())
1155     return DAG.getFPExtendOrRound(Op, DL, VT);
1156   assert(VT.isFixedLengthVector() &&
1157          "Unexpected value type for RVV FP extend/round lowering");
1158   SDValue Mask, VL;
1159   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1160   unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
1161                         ? RISCVISD::FP_EXTEND_VL
1162                         : RISCVISD::FP_ROUND_VL;
1163   return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
1164 }
1165 
1166 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
1167                                             SelectionDAG &DAG) const {
1168   switch (Op.getOpcode()) {
1169   default:
1170     report_fatal_error("unimplemented operand");
1171   case ISD::GlobalAddress:
1172     return lowerGlobalAddress(Op, DAG);
1173   case ISD::BlockAddress:
1174     return lowerBlockAddress(Op, DAG);
1175   case ISD::ConstantPool:
1176     return lowerConstantPool(Op, DAG);
1177   case ISD::JumpTable:
1178     return lowerJumpTable(Op, DAG);
1179   case ISD::GlobalTLSAddress:
1180     return lowerGlobalTLSAddress(Op, DAG);
1181   case ISD::SELECT:
1182     return lowerSELECT(Op, DAG);
1183   case ISD::VASTART:
1184     return lowerVASTART(Op, DAG);
1185   case ISD::FRAMEADDR:
1186     return lowerFRAMEADDR(Op, DAG);
1187   case ISD::RETURNADDR:
1188     return lowerRETURNADDR(Op, DAG);
1189   case ISD::SHL_PARTS:
1190     return lowerShiftLeftParts(Op, DAG);
1191   case ISD::SRA_PARTS:
1192     return lowerShiftRightParts(Op, DAG, true);
1193   case ISD::SRL_PARTS:
1194     return lowerShiftRightParts(Op, DAG, false);
1195   case ISD::BITCAST: {
1196     SDValue Op0 = Op.getOperand(0);
1197     // We can handle fixed length vector bitcasts with a simple replacement
1198     // in isel.
1199     if (Op.getValueType().isFixedLengthVector()) {
1200       if (Op0.getValueType().isFixedLengthVector())
1201         return Op;
1202       return SDValue();
1203     }
1204     assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
1205             Subtarget.hasStdExtZfh()) &&
1206            "Unexpected custom legalisation");
1207     SDLoc DL(Op);
1208     if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
1209       if (Op0.getValueType() != MVT::i16)
1210         return SDValue();
1211       SDValue NewOp0 =
1212           DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
1213       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
1214       return FPConv;
1215     } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
1216                Subtarget.hasStdExtF()) {
1217       if (Op0.getValueType() != MVT::i32)
1218         return SDValue();
1219       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1220       SDValue FPConv =
1221           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
1222       return FPConv;
1223     }
1224     return SDValue();
1225   }
1226   case ISD::INTRINSIC_WO_CHAIN:
1227     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
1228   case ISD::INTRINSIC_W_CHAIN:
1229     return LowerINTRINSIC_W_CHAIN(Op, DAG);
1230   case ISD::BSWAP:
1231   case ISD::BITREVERSE: {
1232     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
1233     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1234     MVT VT = Op.getSimpleValueType();
1235     SDLoc DL(Op);
1236     // Start with the maximum immediate value which is the bitwidth - 1.
1237     unsigned Imm = VT.getSizeInBits() - 1;
1238     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
1239     if (Op.getOpcode() == ISD::BSWAP)
1240       Imm &= ~0x7U;
1241     return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
1242                        DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
1243   }
1244   case ISD::FSHL:
1245   case ISD::FSHR: {
1246     MVT VT = Op.getSimpleValueType();
1247     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
1248     SDLoc DL(Op);
1249     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
1250     // use log(XLen) bits. Mask the shift amount accordingly.
1251     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
1252     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
1253                                 DAG.getConstant(ShAmtWidth, DL, VT));
1254     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
1255     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
1256   }
1257   case ISD::TRUNCATE: {
1258     SDLoc DL(Op);
1259     MVT VT = Op.getSimpleValueType();
1260     // Only custom-lower vector truncates
1261     if (!VT.isVector())
1262       return Op;
1263 
1264     // Truncates to mask types are handled differently
1265     if (VT.getVectorElementType() == MVT::i1)
1266       return lowerVectorMaskTrunc(Op, DAG);
1267 
1268     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
1269     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
1270     // truncate by one power of two at a time.
1271     MVT DstEltVT = VT.getVectorElementType();
1272 
1273     SDValue Src = Op.getOperand(0);
1274     MVT SrcVT = Src.getSimpleValueType();
1275     MVT SrcEltVT = SrcVT.getVectorElementType();
1276 
1277     assert(DstEltVT.bitsLT(SrcEltVT) &&
1278            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
1279            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
1280            "Unexpected vector truncate lowering");
1281 
1282     MVT ContainerVT = SrcVT;
1283     if (SrcVT.isFixedLengthVector()) {
1284       ContainerVT = getContainerForFixedLengthVector(SrcVT);
1285       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
1286     }
1287 
1288     SDValue Result = Src;
1289     SDValue Mask, VL;
1290     std::tie(Mask, VL) =
1291         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
1292     LLVMContext &Context = *DAG.getContext();
1293     const ElementCount Count = ContainerVT.getVectorElementCount();
1294     do {
1295       SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
1296       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
1297       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
1298                            Mask, VL);
1299     } while (SrcEltVT != DstEltVT);
1300 
1301     if (SrcVT.isFixedLengthVector())
1302       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
1303 
1304     return Result;
1305   }
1306   case ISD::ANY_EXTEND:
1307   case ISD::ZERO_EXTEND:
1308     if (Op.getOperand(0).getValueType().isVector() &&
1309         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
1310       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
1311     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
1312   case ISD::SIGN_EXTEND:
1313     if (Op.getOperand(0).getValueType().isVector() &&
1314         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
1315       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
1316     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
1317   case ISD::SPLAT_VECTOR:
1318     return lowerSPLATVECTOR(Op, DAG);
1319   case ISD::INSERT_VECTOR_ELT:
1320     return lowerINSERT_VECTOR_ELT(Op, DAG);
1321   case ISD::EXTRACT_VECTOR_ELT:
1322     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1323   case ISD::VSCALE: {
1324     MVT VT = Op.getSimpleValueType();
1325     SDLoc DL(Op);
1326     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
1327     // We define our scalable vector types for lmul=1 to use a 64 bit known
1328     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
1329     // vscale as VLENB / 8.
1330     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
1331                                  DAG.getConstant(3, DL, VT));
1332     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
1333   }
1334   case ISD::FP_EXTEND: {
1335     // RVV can only do fp_extend to types double the size as the source. We
1336     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
1337     // via f32.
1338     SDLoc DL(Op);
1339     MVT VT = Op.getSimpleValueType();
1340     SDValue Src = Op.getOperand(0);
1341     MVT SrcVT = Src.getSimpleValueType();
1342 
1343     // Prepare any fixed-length vector operands.
1344     MVT ContainerVT = VT;
1345     if (SrcVT.isFixedLengthVector()) {
1346       ContainerVT = getContainerForFixedLengthVector(VT);
1347       MVT SrcContainerVT =
1348           ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
1349       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1350     }
1351 
1352     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
1353         SrcVT.getVectorElementType() != MVT::f16) {
1354       // For scalable vectors, we only need to close the gap between
1355       // vXf16->vXf64.
1356       if (!VT.isFixedLengthVector())
1357         return Op;
1358       // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
1359       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
1360       return convertFromScalableVector(VT, Src, DAG, Subtarget);
1361     }
1362 
1363     MVT InterVT = VT.changeVectorElementType(MVT::f32);
1364     MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
1365     SDValue IntermediateExtend = getRVVFPExtendOrRound(
1366         Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
1367 
1368     SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
1369                                            DL, DAG, Subtarget);
1370     if (VT.isFixedLengthVector())
1371       return convertFromScalableVector(VT, Extend, DAG, Subtarget);
1372     return Extend;
1373   }
1374   case ISD::FP_ROUND: {
1375     // RVV can only do fp_round to types half the size as the source. We
1376     // custom-lower f64->f16 rounds via RVV's round-to-odd float
1377     // conversion instruction.
1378     SDLoc DL(Op);
1379     MVT VT = Op.getSimpleValueType();
1380     SDValue Src = Op.getOperand(0);
1381     MVT SrcVT = Src.getSimpleValueType();
1382 
1383     // Prepare any fixed-length vector operands.
1384     MVT ContainerVT = VT;
1385     if (VT.isFixedLengthVector()) {
1386       MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
1387       ContainerVT =
1388           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
1389       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1390     }
1391 
1392     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
1393         SrcVT.getVectorElementType() != MVT::f64) {
1394       // For scalable vectors, we only need to close the gap between
1395       // vXf64<->vXf16.
1396       if (!VT.isFixedLengthVector())
1397         return Op;
1398       // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
1399       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
1400       return convertFromScalableVector(VT, Src, DAG, Subtarget);
1401     }
1402 
1403     SDValue Mask, VL;
1404     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1405 
1406     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
1407     SDValue IntermediateRound =
1408         DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
1409     SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
1410                                           DL, DAG, Subtarget);
1411 
1412     if (VT.isFixedLengthVector())
1413       return convertFromScalableVector(VT, Round, DAG, Subtarget);
1414     return Round;
1415   }
1416   case ISD::FP_TO_SINT:
1417   case ISD::FP_TO_UINT:
1418   case ISD::SINT_TO_FP:
1419   case ISD::UINT_TO_FP: {
1420     // RVV can only do fp<->int conversions to types half/double the size as
1421     // the source. We custom-lower any conversions that do two hops into
1422     // sequences.
1423     MVT VT = Op.getSimpleValueType();
1424     if (!VT.isVector())
1425       return Op;
1426     SDLoc DL(Op);
1427     SDValue Src = Op.getOperand(0);
1428     MVT EltVT = VT.getVectorElementType();
1429     MVT SrcVT = Src.getSimpleValueType();
1430     MVT SrcEltVT = SrcVT.getVectorElementType();
1431     unsigned EltSize = EltVT.getSizeInBits();
1432     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
1433     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
1434            "Unexpected vector element types");
1435 
1436     bool IsInt2FP = SrcEltVT.isInteger();
1437     // Widening conversions
1438     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
1439       if (IsInt2FP) {
1440         // Do a regular integer sign/zero extension then convert to float.
1441         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
1442                                       VT.getVectorElementCount());
1443         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
1444                                  ? ISD::ZERO_EXTEND
1445                                  : ISD::SIGN_EXTEND;
1446         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
1447         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
1448       }
1449       // FP2Int
1450       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
1451       // Do one doubling fp_extend then complete the operation by converting
1452       // to int.
1453       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1454       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
1455       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
1456     }
1457 
1458     // Narrowing conversions
1459     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
1460       if (IsInt2FP) {
1461         // One narrowing int_to_fp, then an fp_round.
1462         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
1463         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1464         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
1465         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
1466       }
1467       // FP2Int
1468       // One narrowing fp_to_int, then truncate the integer. If the float isn't
1469       // representable by the integer, the result is poison.
1470       MVT IVecVT =
1471           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
1472                            VT.getVectorElementCount());
1473       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
1474       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
1475     }
1476 
1477     // Scalable vectors can exit here. Patterns will handle equally-sized
1478     // conversions halving/doubling ones.
1479     if (!VT.isFixedLengthVector())
1480       return Op;
1481 
1482     // For fixed-length vectors we lower to a custom "VL" node.
1483     unsigned RVVOpc = 0;
1484     switch (Op.getOpcode()) {
1485     default:
1486       llvm_unreachable("Impossible opcode");
1487     case ISD::FP_TO_SINT:
1488       RVVOpc = RISCVISD::FP_TO_SINT_VL;
1489       break;
1490     case ISD::FP_TO_UINT:
1491       RVVOpc = RISCVISD::FP_TO_UINT_VL;
1492       break;
1493     case ISD::SINT_TO_FP:
1494       RVVOpc = RISCVISD::SINT_TO_FP_VL;
1495       break;
1496     case ISD::UINT_TO_FP:
1497       RVVOpc = RISCVISD::UINT_TO_FP_VL;
1498       break;
1499     }
1500 
1501     MVT ContainerVT, SrcContainerVT;
1502     // Derive the reference container type from the larger vector type.
1503     if (SrcEltSize > EltSize) {
1504       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
1505       ContainerVT =
1506           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
1507     } else {
1508       ContainerVT = getContainerForFixedLengthVector(VT);
1509       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
1510     }
1511 
1512     SDValue Mask, VL;
1513     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1514 
1515     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1516     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
1517     return convertFromScalableVector(VT, Src, DAG, Subtarget);
1518   }
1519   case ISD::VECREDUCE_ADD:
1520   case ISD::VECREDUCE_UMAX:
1521   case ISD::VECREDUCE_SMAX:
1522   case ISD::VECREDUCE_UMIN:
1523   case ISD::VECREDUCE_SMIN:
1524   case ISD::VECREDUCE_AND:
1525   case ISD::VECREDUCE_OR:
1526   case ISD::VECREDUCE_XOR:
1527     return lowerVECREDUCE(Op, DAG);
1528   case ISD::VECREDUCE_FADD:
1529   case ISD::VECREDUCE_SEQ_FADD:
1530     return lowerFPVECREDUCE(Op, DAG);
1531   case ISD::INSERT_SUBVECTOR:
1532     return lowerINSERT_SUBVECTOR(Op, DAG);
1533   case ISD::EXTRACT_SUBVECTOR:
1534     return lowerEXTRACT_SUBVECTOR(Op, DAG);
1535   case ISD::VECTOR_REVERSE:
1536     return lowerVECTOR_REVERSE(Op, DAG);
1537   case ISD::BUILD_VECTOR:
1538     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
1539   case ISD::VECTOR_SHUFFLE:
1540     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
1541   case ISD::CONCAT_VECTORS: {
1542     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
1543     // better than going through the stack, as the default expansion does.
1544     SDLoc DL(Op);
1545     MVT VT = Op.getSimpleValueType();
1546     assert(VT.isFixedLengthVector() && "Unexpected CONCAT_VECTORS lowering");
1547     unsigned NumOpElts =
1548         Op.getOperand(0).getSimpleValueType().getVectorNumElements();
1549     SDValue Vec = DAG.getUNDEF(VT);
1550     for (const auto &OpIdx : enumerate(Op->ops()))
1551       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
1552                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
1553     return Vec;
1554   }
1555   case ISD::LOAD:
1556     return lowerFixedLengthVectorLoadToRVV(Op, DAG);
1557   case ISD::STORE:
1558     return lowerFixedLengthVectorStoreToRVV(Op, DAG);
1559   case ISD::SETCC:
1560     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
1561   case ISD::ADD:
1562     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
1563   case ISD::SUB:
1564     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
1565   case ISD::MUL:
1566     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
1567   case ISD::MULHS:
1568     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
1569   case ISD::MULHU:
1570     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
1571   case ISD::AND:
1572     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
1573                                               RISCVISD::AND_VL);
1574   case ISD::OR:
1575     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
1576                                               RISCVISD::OR_VL);
1577   case ISD::XOR:
1578     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
1579                                               RISCVISD::XOR_VL);
1580   case ISD::SDIV:
1581     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
1582   case ISD::SREM:
1583     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
1584   case ISD::UDIV:
1585     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
1586   case ISD::UREM:
1587     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
1588   case ISD::SHL:
1589     return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
1590   case ISD::SRA:
1591     return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
1592   case ISD::SRL:
1593     return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
1594   case ISD::FADD:
1595     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
1596   case ISD::FSUB:
1597     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
1598   case ISD::FMUL:
1599     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
1600   case ISD::FDIV:
1601     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
1602   case ISD::FNEG:
1603     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
1604   case ISD::FABS:
1605     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
1606   case ISD::FSQRT:
1607     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
1608   case ISD::FMA:
1609     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
1610   case ISD::SMIN:
1611     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
1612   case ISD::SMAX:
1613     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
1614   case ISD::UMIN:
1615     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
1616   case ISD::UMAX:
1617     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
1618   case ISD::ABS:
1619     return lowerABS(Op, DAG);
1620   case ISD::VSELECT:
1621     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
1622   }
1623 }
1624 
1625 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1626                              SelectionDAG &DAG, unsigned Flags) {
1627   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1628 }
1629 
1630 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1631                              SelectionDAG &DAG, unsigned Flags) {
1632   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1633                                    Flags);
1634 }
1635 
1636 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1637                              SelectionDAG &DAG, unsigned Flags) {
1638   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1639                                    N->getOffset(), Flags);
1640 }
1641 
1642 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1643                              SelectionDAG &DAG, unsigned Flags) {
1644   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1645 }
1646 
1647 template <class NodeTy>
1648 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1649                                      bool IsLocal) const {
1650   SDLoc DL(N);
1651   EVT Ty = getPointerTy(DAG.getDataLayout());
1652 
1653   if (isPositionIndependent()) {
1654     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1655     if (IsLocal)
1656       // Use PC-relative addressing to access the symbol. This generates the
1657       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
1658       // %pcrel_lo(auipc)).
1659       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1660 
1661     // Use PC-relative addressing to access the GOT for this symbol, then load
1662     // the address from the GOT. This generates the pattern (PseudoLA sym),
1663     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
1664     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
1665   }
1666 
1667   switch (getTargetMachine().getCodeModel()) {
1668   default:
1669     report_fatal_error("Unsupported code model for lowering");
1670   case CodeModel::Small: {
1671     // Generate a sequence for accessing addresses within the first 2 GiB of
1672     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
1673     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
1674     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
1675     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1676     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
1677   }
1678   case CodeModel::Medium: {
1679     // Generate a sequence for accessing addresses within any 2GiB range within
1680     // the address space. This generates the pattern (PseudoLLA sym), which
1681     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
1682     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1683     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1684   }
1685   }
1686 }
1687 
1688 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
1689                                                 SelectionDAG &DAG) const {
1690   SDLoc DL(Op);
1691   EVT Ty = Op.getValueType();
1692   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1693   int64_t Offset = N->getOffset();
1694   MVT XLenVT = Subtarget.getXLenVT();
1695 
1696   const GlobalValue *GV = N->getGlobal();
1697   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1698   SDValue Addr = getAddr(N, DAG, IsLocal);
1699 
1700   // In order to maximise the opportunity for common subexpression elimination,
1701   // emit a separate ADD node for the global address offset instead of folding
1702   // it in the global address node. Later peephole optimisations may choose to
1703   // fold it back in when profitable.
1704   if (Offset != 0)
1705     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1706                        DAG.getConstant(Offset, DL, XLenVT));
1707   return Addr;
1708 }
1709 
1710 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
1711                                                SelectionDAG &DAG) const {
1712   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
1713 
1714   return getAddr(N, DAG);
1715 }
1716 
1717 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
1718                                                SelectionDAG &DAG) const {
1719   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
1720 
1721   return getAddr(N, DAG);
1722 }
1723 
1724 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
1725                                             SelectionDAG &DAG) const {
1726   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
1727 
1728   return getAddr(N, DAG);
1729 }
1730 
1731 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1732                                               SelectionDAG &DAG,
1733                                               bool UseGOT) const {
1734   SDLoc DL(N);
1735   EVT Ty = getPointerTy(DAG.getDataLayout());
1736   const GlobalValue *GV = N->getGlobal();
1737   MVT XLenVT = Subtarget.getXLenVT();
1738 
1739   if (UseGOT) {
1740     // Use PC-relative addressing to access the GOT for this TLS symbol, then
1741     // load the address from the GOT and add the thread pointer. This generates
1742     // the pattern (PseudoLA_TLS_IE sym), which expands to
1743     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
1744     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1745     SDValue Load =
1746         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
1747 
1748     // Add the thread pointer.
1749     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1750     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
1751   }
1752 
1753   // Generate a sequence for accessing the address relative to the thread
1754   // pointer, with the appropriate adjustment for the thread pointer offset.
1755   // This generates the pattern
1756   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
1757   SDValue AddrHi =
1758       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
1759   SDValue AddrAdd =
1760       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
1761   SDValue AddrLo =
1762       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
1763 
1764   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1765   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1766   SDValue MNAdd = SDValue(
1767       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
1768       0);
1769   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
1770 }
1771 
1772 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1773                                                SelectionDAG &DAG) const {
1774   SDLoc DL(N);
1775   EVT Ty = getPointerTy(DAG.getDataLayout());
1776   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1777   const GlobalValue *GV = N->getGlobal();
1778 
1779   // Use a PC-relative addressing mode to access the global dynamic GOT address.
1780   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
1781   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
1782   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1783   SDValue Load =
1784       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
1785 
1786   // Prepare argument list to generate call.
1787   ArgListTy Args;
1788   ArgListEntry Entry;
1789   Entry.Node = Load;
1790   Entry.Ty = CallTy;
1791   Args.push_back(Entry);
1792 
1793   // Setup call to __tls_get_addr.
1794   TargetLowering::CallLoweringInfo CLI(DAG);
1795   CLI.setDebugLoc(DL)
1796       .setChain(DAG.getEntryNode())
1797       .setLibCallee(CallingConv::C, CallTy,
1798                     DAG.getExternalSymbol("__tls_get_addr", Ty),
1799                     std::move(Args));
1800 
1801   return LowerCallTo(CLI).first;
1802 }
1803 
1804 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1805                                                    SelectionDAG &DAG) const {
1806   SDLoc DL(Op);
1807   EVT Ty = Op.getValueType();
1808   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1809   int64_t Offset = N->getOffset();
1810   MVT XLenVT = Subtarget.getXLenVT();
1811 
1812   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
1813 
1814   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1815       CallingConv::GHC)
1816     report_fatal_error("In GHC calling convention TLS is not supported");
1817 
1818   SDValue Addr;
1819   switch (Model) {
1820   case TLSModel::LocalExec:
1821     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
1822     break;
1823   case TLSModel::InitialExec:
1824     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
1825     break;
1826   case TLSModel::LocalDynamic:
1827   case TLSModel::GeneralDynamic:
1828     Addr = getDynamicTLSAddr(N, DAG);
1829     break;
1830   }
1831 
1832   // In order to maximise the opportunity for common subexpression elimination,
1833   // emit a separate ADD node for the global address offset instead of folding
1834   // it in the global address node. Later peephole optimisations may choose to
1835   // fold it back in when profitable.
1836   if (Offset != 0)
1837     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1838                        DAG.getConstant(Offset, DL, XLenVT));
1839   return Addr;
1840 }
1841 
1842 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
1843   SDValue CondV = Op.getOperand(0);
1844   SDValue TrueV = Op.getOperand(1);
1845   SDValue FalseV = Op.getOperand(2);
1846   SDLoc DL(Op);
1847   MVT XLenVT = Subtarget.getXLenVT();
1848 
1849   // If the result type is XLenVT and CondV is the output of a SETCC node
1850   // which also operated on XLenVT inputs, then merge the SETCC node into the
1851   // lowered RISCVISD::SELECT_CC to take advantage of the integer
1852   // compare+branch instructions. i.e.:
1853   // (select (setcc lhs, rhs, cc), truev, falsev)
1854   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
1855   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
1856       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
1857     SDValue LHS = CondV.getOperand(0);
1858     SDValue RHS = CondV.getOperand(1);
1859     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
1860     ISD::CondCode CCVal = CC->get();
1861 
1862     normaliseSetCC(LHS, RHS, CCVal);
1863 
1864     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
1865     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1866     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1867   }
1868 
1869   // Otherwise:
1870   // (select condv, truev, falsev)
1871   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
1872   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
1873   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
1874 
1875   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1876 
1877   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1878 }
1879 
1880 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1881   MachineFunction &MF = DAG.getMachineFunction();
1882   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
1883 
1884   SDLoc DL(Op);
1885   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1886                                  getPointerTy(MF.getDataLayout()));
1887 
1888   // vastart just stores the address of the VarArgsFrameIndex slot into the
1889   // memory location argument.
1890   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1891   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1892                       MachinePointerInfo(SV));
1893 }
1894 
1895 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
1896                                             SelectionDAG &DAG) const {
1897   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1898   MachineFunction &MF = DAG.getMachineFunction();
1899   MachineFrameInfo &MFI = MF.getFrameInfo();
1900   MFI.setFrameAddressIsTaken(true);
1901   Register FrameReg = RI.getFrameRegister(MF);
1902   int XLenInBytes = Subtarget.getXLen() / 8;
1903 
1904   EVT VT = Op.getValueType();
1905   SDLoc DL(Op);
1906   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1907   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1908   while (Depth--) {
1909     int Offset = -(XLenInBytes * 2);
1910     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1911                               DAG.getIntPtrConstant(Offset, DL));
1912     FrameAddr =
1913         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1914   }
1915   return FrameAddr;
1916 }
1917 
1918 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
1919                                              SelectionDAG &DAG) const {
1920   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1921   MachineFunction &MF = DAG.getMachineFunction();
1922   MachineFrameInfo &MFI = MF.getFrameInfo();
1923   MFI.setReturnAddressIsTaken(true);
1924   MVT XLenVT = Subtarget.getXLenVT();
1925   int XLenInBytes = Subtarget.getXLen() / 8;
1926 
1927   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1928     return SDValue();
1929 
1930   EVT VT = Op.getValueType();
1931   SDLoc DL(Op);
1932   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1933   if (Depth) {
1934     int Off = -XLenInBytes;
1935     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
1936     SDValue Offset = DAG.getConstant(Off, DL, VT);
1937     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1938                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1939                        MachinePointerInfo());
1940   }
1941 
1942   // Return the value of the return address register, marking it an implicit
1943   // live-in.
1944   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
1945   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
1946 }
1947 
1948 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
1949                                                  SelectionDAG &DAG) const {
1950   SDLoc DL(Op);
1951   SDValue Lo = Op.getOperand(0);
1952   SDValue Hi = Op.getOperand(1);
1953   SDValue Shamt = Op.getOperand(2);
1954   EVT VT = Lo.getValueType();
1955 
1956   // if Shamt-XLEN < 0: // Shamt < XLEN
1957   //   Lo = Lo << Shamt
1958   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
1959   // else:
1960   //   Lo = 0
1961   //   Hi = Lo << (Shamt-XLEN)
1962 
1963   SDValue Zero = DAG.getConstant(0, DL, VT);
1964   SDValue One = DAG.getConstant(1, DL, VT);
1965   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1966   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1967   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1968   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1969 
1970   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1971   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1972   SDValue ShiftRightLo =
1973       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
1974   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1975   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1976   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
1977 
1978   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1979 
1980   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1981   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1982 
1983   SDValue Parts[2] = {Lo, Hi};
1984   return DAG.getMergeValues(Parts, DL);
1985 }
1986 
1987 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
1988                                                   bool IsSRA) const {
1989   SDLoc DL(Op);
1990   SDValue Lo = Op.getOperand(0);
1991   SDValue Hi = Op.getOperand(1);
1992   SDValue Shamt = Op.getOperand(2);
1993   EVT VT = Lo.getValueType();
1994 
1995   // SRA expansion:
1996   //   if Shamt-XLEN < 0: // Shamt < XLEN
1997   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1998   //     Hi = Hi >>s Shamt
1999   //   else:
2000   //     Lo = Hi >>s (Shamt-XLEN);
2001   //     Hi = Hi >>s (XLEN-1)
2002   //
2003   // SRL expansion:
2004   //   if Shamt-XLEN < 0: // Shamt < XLEN
2005   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2006   //     Hi = Hi >>u Shamt
2007   //   else:
2008   //     Lo = Hi >>u (Shamt-XLEN);
2009   //     Hi = 0;
2010 
2011   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2012 
2013   SDValue Zero = DAG.getConstant(0, DL, VT);
2014   SDValue One = DAG.getConstant(1, DL, VT);
2015   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2016   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2017   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2018   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2019 
2020   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2021   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2022   SDValue ShiftLeftHi =
2023       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
2024   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2025   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2026   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
2027   SDValue HiFalse =
2028       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
2029 
2030   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2031 
2032   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2033   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2034 
2035   SDValue Parts[2] = {Lo, Hi};
2036   return DAG.getMergeValues(Parts, DL);
2037 }
2038 
2039 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is
2040 // illegal (currently only vXi64 RV32).
2041 // FIXME: We could also catch non-constant sign-extended i32 values and lower
2042 // them to SPLAT_VECTOR_I64
2043 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op,
2044                                               SelectionDAG &DAG) const {
2045   SDLoc DL(Op);
2046   EVT VecVT = Op.getValueType();
2047   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
2048          "Unexpected SPLAT_VECTOR lowering");
2049   SDValue SplatVal = Op.getOperand(0);
2050 
2051   // If we can prove that the value is a sign-extended 32-bit value, lower this
2052   // as a custom node in order to try and match RVV vector/scalar instructions.
2053   if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) {
2054     if (isInt<32>(CVal->getSExtValue()))
2055       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
2056                          DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32));
2057   }
2058 
2059   if (SplatVal.getOpcode() == ISD::SIGN_EXTEND &&
2060       SplatVal.getOperand(0).getValueType() == MVT::i32) {
2061     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
2062                        SplatVal.getOperand(0));
2063   }
2064 
2065   // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
2066   // to accidentally sign-extend the 32-bit halves to the e64 SEW:
2067   // vmv.v.x vX, hi
2068   // vsll.vx vX, vX, /*32*/
2069   // vmv.v.x vY, lo
2070   // vsll.vx vY, vY, /*32*/
2071   // vsrl.vx vY, vY, /*32*/
2072   // vor.vv vX, vX, vY
2073   SDValue One = DAG.getConstant(1, DL, MVT::i32);
2074   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
2075   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
2076   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero);
2077   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One);
2078 
2079   Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
2080   Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
2081   Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
2082 
2083   if (isNullConstant(Hi))
2084     return Lo;
2085 
2086   Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
2087   Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
2088 
2089   return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
2090 }
2091 
2092 // Custom-lower extensions from mask vectors by using a vselect either with 1
2093 // for zero/any-extension or -1 for sign-extension:
2094 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
2095 // Note that any-extension is lowered identically to zero-extension.
2096 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
2097                                                 int64_t ExtTrueVal) const {
2098   SDLoc DL(Op);
2099   MVT VecVT = Op.getSimpleValueType();
2100   SDValue Src = Op.getOperand(0);
2101   // Only custom-lower extensions from mask types
2102   assert(Src.getValueType().isVector() &&
2103          Src.getValueType().getVectorElementType() == MVT::i1);
2104 
2105   MVT XLenVT = Subtarget.getXLenVT();
2106   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
2107   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
2108 
2109   if (VecVT.isScalableVector()) {
2110     // Be careful not to introduce illegal scalar types at this stage, and be
2111     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
2112     // illegal and must be expanded. Since we know that the constants are
2113     // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
2114     bool IsRV32E64 =
2115         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
2116 
2117     if (!IsRV32E64) {
2118       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
2119       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
2120     } else {
2121       SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
2122       SplatTrueVal =
2123           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
2124     }
2125 
2126     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
2127   }
2128 
2129   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
2130   MVT I1ContainerVT =
2131       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2132 
2133   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
2134 
2135   SDValue Mask, VL;
2136   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2137 
2138   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
2139   SplatTrueVal =
2140       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
2141   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
2142                                SplatTrueVal, SplatZero, VL);
2143 
2144   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
2145 }
2146 
2147 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
2148     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
2149   MVT ExtVT = Op.getSimpleValueType();
2150   // Only custom-lower extensions from fixed-length vector types.
2151   if (!ExtVT.isFixedLengthVector())
2152     return Op;
2153   MVT VT = Op.getOperand(0).getSimpleValueType();
2154   // Grab the canonical container type for the extended type. Infer the smaller
2155   // type from that to ensure the same number of vector elements, as we know
2156   // the LMUL will be sufficient to hold the smaller type.
2157   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
2158   // Get the extended container type manually to ensure the same number of
2159   // vector elements between source and dest.
2160   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
2161                                      ContainerExtVT.getVectorElementCount());
2162 
2163   SDValue Op1 =
2164       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
2165 
2166   SDLoc DL(Op);
2167   SDValue Mask, VL;
2168   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2169 
2170   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
2171 
2172   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
2173 }
2174 
2175 // Custom-lower truncations from vectors to mask vectors by using a mask and a
2176 // setcc operation:
2177 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
2178 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
2179                                                   SelectionDAG &DAG) const {
2180   SDLoc DL(Op);
2181   EVT MaskVT = Op.getValueType();
2182   // Only expect to custom-lower truncations to mask types
2183   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
2184          "Unexpected type for vector mask lowering");
2185   SDValue Src = Op.getOperand(0);
2186   MVT VecVT = Src.getSimpleValueType();
2187 
2188   // If this is a fixed vector, we need to convert it to a scalable vector.
2189   MVT ContainerVT = VecVT;
2190   if (VecVT.isFixedLengthVector()) {
2191     ContainerVT = getContainerForFixedLengthVector(VecVT);
2192     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2193   }
2194 
2195   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
2196   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2197 
2198   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
2199   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
2200 
2201   if (VecVT.isScalableVector()) {
2202     SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
2203     return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
2204   }
2205 
2206   SDValue Mask, VL;
2207   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2208 
2209   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2210   SDValue Trunc =
2211       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
2212   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
2213                       DAG.getCondCode(ISD::SETNE), Mask, VL);
2214   return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
2215 }
2216 
2217 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2218                                                     SelectionDAG &DAG) const {
2219   SDLoc DL(Op);
2220   MVT VecVT = Op.getSimpleValueType();
2221   SDValue Vec = Op.getOperand(0);
2222   SDValue Val = Op.getOperand(1);
2223   SDValue Idx = Op.getOperand(2);
2224 
2225   MVT ContainerVT = VecVT;
2226   // If the operand is a fixed-length vector, convert to a scalable one.
2227   if (VecVT.isFixedLengthVector()) {
2228     ContainerVT = getContainerForFixedLengthVector(VecVT);
2229     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2230   }
2231 
2232   SDValue Mask, VL;
2233   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2234 
2235   // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is
2236   // first slid down into position, the value is inserted into the first
2237   // position, and the vector is slid back up. We do this to simplify patterns.
2238   //   (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx),
2239   if (Subtarget.is64Bit() || Val.getValueType() != MVT::i64) {
2240     if (isNullConstant(Idx))
2241       return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL);
2242     SDValue Slidedown =
2243         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2244                     DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
2245     SDValue InsertElt0 =
2246         DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Slidedown, Val, VL);
2247     return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, InsertElt0,
2248                        Idx, Mask, VL);
2249   }
2250 
2251   // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
2252   // is illegal (currently only vXi64 RV32).
2253   // Since there is no easy way of getting a single element into a vector when
2254   // XLEN<SEW, we lower the operation to the following sequence:
2255   //   splat      vVal, rVal
2256   //   vid.v      vVid
2257   //   vmseq.vx   mMask, vVid, rIdx
2258   //   vmerge.vvm vDest, vSrc, vVal, mMask
2259   // This essentially merges the original vector with the inserted element by
2260   // using a mask whose only set bit is that corresponding to the insert
2261   // index.
2262   SDValue SplattedVal = DAG.getSplatVector(ContainerVT, DL, Val);
2263   SDValue SplattedIdx =
2264       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, Idx, VL);
2265 
2266   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
2267   auto SetCCVT =
2268       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ContainerVT);
2269   SDValue SelectCond =
2270       DAG.getNode(RISCVISD::SETCC_VL, DL, SetCCVT, VID, SplattedIdx,
2271                   DAG.getCondCode(ISD::SETEQ), Mask, VL);
2272   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT,
2273                                SelectCond, SplattedVal, Vec, VL);
2274   if (!VecVT.isFixedLengthVector())
2275     return Select;
2276   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
2277 }
2278 
2279 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
2280 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
2281 // types this is done using VMV_X_S to allow us to glean information about the
2282 // sign bits of the result.
2283 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2284                                                      SelectionDAG &DAG) const {
2285   SDLoc DL(Op);
2286   SDValue Idx = Op.getOperand(1);
2287   SDValue Vec = Op.getOperand(0);
2288   EVT EltVT = Op.getValueType();
2289   MVT VecVT = Vec.getSimpleValueType();
2290   MVT XLenVT = Subtarget.getXLenVT();
2291 
2292   // If this is a fixed vector, we need to convert it to a scalable vector.
2293   MVT ContainerVT = VecVT;
2294   if (VecVT.isFixedLengthVector()) {
2295     ContainerVT = getContainerForFixedLengthVector(VecVT);
2296     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2297   }
2298 
2299   // If the index is 0, the vector is already in the right position.
2300   if (!isNullConstant(Idx)) {
2301     // Use a VL of 1 to avoid processing more elements than we need.
2302     SDValue VL = DAG.getConstant(1, DL, XLenVT);
2303     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2304     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2305     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2306                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
2307   }
2308 
2309   if (!EltVT.isInteger()) {
2310     // Floating-point extracts are handled in TableGen.
2311     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
2312                        DAG.getConstant(0, DL, XLenVT));
2313   }
2314 
2315   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
2316   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
2317 }
2318 
2319 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2320                                                      SelectionDAG &DAG) const {
2321   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2322   SDLoc DL(Op);
2323 
2324   if (Subtarget.hasStdExtV()) {
2325     // Some RVV intrinsics may claim that they want an integer operand to be
2326     // extended.
2327     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2328             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
2329       if (II->ExtendedOperand) {
2330         assert(II->ExtendedOperand < Op.getNumOperands());
2331         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
2332         SDValue &ScalarOp = Operands[II->ExtendedOperand];
2333         EVT OpVT = ScalarOp.getValueType();
2334         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
2335             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
2336           // If the operand is a constant, sign extend to increase our chances
2337           // of being able to use a .vi instruction. ANY_EXTEND would become a
2338           // a zero extend and the simm5 check in isel would fail.
2339           // FIXME: Should we ignore the upper bits in isel instead?
2340           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
2341                                                           : ISD::ANY_EXTEND;
2342           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
2343           return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
2344                              Operands);
2345         }
2346       }
2347     }
2348   }
2349 
2350   switch (IntNo) {
2351   default:
2352     return SDValue();    // Don't custom lower most intrinsics.
2353   case Intrinsic::thread_pointer: {
2354     EVT PtrVT = getPointerTy(DAG.getDataLayout());
2355     return DAG.getRegister(RISCV::X4, PtrVT);
2356   }
2357   case Intrinsic::riscv_vmv_x_s:
2358     assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!");
2359     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
2360                        Op.getOperand(1));
2361   case Intrinsic::riscv_vmv_v_x: {
2362     SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(),
2363                                  Op.getOperand(1));
2364     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(),
2365                        Scalar, Op.getOperand(2));
2366   }
2367   case Intrinsic::riscv_vfmv_v_f:
2368     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
2369                        Op.getOperand(1), Op.getOperand(2));
2370   }
2371 }
2372 
2373 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
2374                                                     SelectionDAG &DAG) const {
2375   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2376   SDLoc DL(Op);
2377 
2378   if (Subtarget.hasStdExtV()) {
2379     // Some RVV intrinsics may claim that they want an integer operand to be
2380     // extended.
2381     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2382             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
2383       if (II->ExtendedOperand) {
2384         // The operands start from the second argument in INTRINSIC_W_CHAIN.
2385         unsigned ExtendOp = II->ExtendedOperand + 1;
2386         assert(ExtendOp < Op.getNumOperands());
2387         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
2388         SDValue &ScalarOp = Operands[ExtendOp];
2389         EVT OpVT = ScalarOp.getValueType();
2390         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
2391             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
2392           // If the operand is a constant, sign extend to increase our chances
2393           // of being able to use a .vi instruction. ANY_EXTEND would become a
2394           // a zero extend and the simm5 check in isel would fail.
2395           // FIXME: Should we ignore the upper bits in isel instead?
2396           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
2397                                                           : ISD::ANY_EXTEND;
2398           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
2399           return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(),
2400                              Operands);
2401         }
2402       }
2403     }
2404   }
2405 
2406   return SDValue(); // Don't custom lower most intrinsics.
2407 }
2408 
2409 static MVT getLMUL1VT(MVT VT) {
2410   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
2411          "Unexpected vector MVT");
2412   return MVT::getScalableVectorVT(
2413       VT.getVectorElementType(),
2414       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
2415 }
2416 
2417 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
2418   switch (ISDOpcode) {
2419   default:
2420     llvm_unreachable("Unhandled reduction");
2421   case ISD::VECREDUCE_ADD:
2422     return RISCVISD::VECREDUCE_ADD_VL;
2423   case ISD::VECREDUCE_UMAX:
2424     return RISCVISD::VECREDUCE_UMAX_VL;
2425   case ISD::VECREDUCE_SMAX:
2426     return RISCVISD::VECREDUCE_SMAX_VL;
2427   case ISD::VECREDUCE_UMIN:
2428     return RISCVISD::VECREDUCE_UMIN_VL;
2429   case ISD::VECREDUCE_SMIN:
2430     return RISCVISD::VECREDUCE_SMIN_VL;
2431   case ISD::VECREDUCE_AND:
2432     return RISCVISD::VECREDUCE_AND_VL;
2433   case ISD::VECREDUCE_OR:
2434     return RISCVISD::VECREDUCE_OR_VL;
2435   case ISD::VECREDUCE_XOR:
2436     return RISCVISD::VECREDUCE_XOR_VL;
2437   }
2438 }
2439 
2440 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV
2441 // reduction opcode. Note that this returns a vector type, which must be
2442 // further processed to access the scalar result in element 0.
2443 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
2444                                             SelectionDAG &DAG) const {
2445   SDLoc DL(Op);
2446   MVT VecVT = Op.getOperand(0).getSimpleValueType();
2447   MVT VecEltVT = VecVT.getVectorElementType();
2448 
2449   // Avoid creating vectors with illegal type.
2450   if (!isTypeLegal(VecVT))
2451     return SDValue();
2452 
2453   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
2454 
2455   SDValue Vec = Op.getOperand(0);
2456 
2457   MVT ContainerVT = VecVT;
2458   if (VecVT.isFixedLengthVector()) {
2459     ContainerVT = getContainerForFixedLengthVector(VecVT);
2460     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2461   }
2462 
2463   MVT M1VT = getLMUL1VT(ContainerVT);
2464 
2465   SDValue Mask, VL;
2466   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2467 
2468   // FIXME: This is a VLMAX splat which might be too large and can prevent
2469   // vsetvli removal.
2470   SDValue NeutralElem = DAG.getNeutralElement(
2471       ISD::getVecReduceBaseOpcode(Op.getOpcode()), DL, VecEltVT, SDNodeFlags());
2472   SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
2473   SDValue Reduction =
2474       DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL);
2475   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
2476                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
2477   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
2478 }
2479 
2480 // Given a reduction op, this function returns the matching reduction opcode,
2481 // the vector SDValue and the scalar SDValue required to lower this to a
2482 // RISCVISD node.
2483 static std::tuple<unsigned, SDValue, SDValue>
2484 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
2485   SDLoc DL(Op);
2486   switch (Op.getOpcode()) {
2487   default:
2488     llvm_unreachable("Unhandled reduction");
2489   case ISD::VECREDUCE_FADD:
2490     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
2491                            DAG.getConstantFP(0.0, DL, EltVT));
2492   case ISD::VECREDUCE_SEQ_FADD:
2493     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
2494                            Op.getOperand(0));
2495   }
2496 }
2497 
2498 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
2499                                               SelectionDAG &DAG) const {
2500   SDLoc DL(Op);
2501   MVT VecEltVT = Op.getSimpleValueType();
2502 
2503   unsigned RVVOpcode;
2504   SDValue VectorVal, ScalarVal;
2505   std::tie(RVVOpcode, VectorVal, ScalarVal) =
2506       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
2507   MVT VecVT = VectorVal.getSimpleValueType();
2508 
2509   MVT ContainerVT = VecVT;
2510   if (VecVT.isFixedLengthVector()) {
2511     ContainerVT = getContainerForFixedLengthVector(VecVT);
2512     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
2513   }
2514 
2515   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
2516 
2517   SDValue Mask, VL;
2518   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2519 
2520   // FIXME: This is a VLMAX splat which might be too large and can prevent
2521   // vsetvli removal.
2522   SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
2523   SDValue Reduction =
2524       DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL);
2525   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
2526                      DAG.getConstant(0, DL, Subtarget.getXLenVT()));
2527 }
2528 
2529 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
2530                                                    SelectionDAG &DAG) const {
2531   SDValue Vec = Op.getOperand(0);
2532   SDValue SubVec = Op.getOperand(1);
2533   MVT VecVT = Vec.getSimpleValueType();
2534   MVT SubVecVT = SubVec.getSimpleValueType();
2535 
2536   SDLoc DL(Op);
2537   MVT XLenVT = Subtarget.getXLenVT();
2538   unsigned OrigIdx = Op.getConstantOperandVal(2);
2539   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
2540 
2541   // We don't have the ability to slide mask vectors up indexed by their i1
2542   // elements; the smallest we can do is i8. Often we are able to bitcast to
2543   // equivalent i8 vectors. Note that when inserting a fixed-length vector
2544   // into a scalable one, we might not necessarily have enough scalable
2545   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
2546   if (SubVecVT.getVectorElementType() == MVT::i1 &&
2547       (OrigIdx != 0 || !Vec.isUndef())) {
2548     if (VecVT.getVectorMinNumElements() >= 8 &&
2549         SubVecVT.getVectorMinNumElements() >= 8) {
2550       assert(OrigIdx % 8 == 0 && "Invalid index");
2551       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
2552              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
2553              "Unexpected mask vector lowering");
2554       OrigIdx /= 8;
2555       SubVecVT =
2556           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
2557                            SubVecVT.isScalableVector());
2558       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
2559                                VecVT.isScalableVector());
2560       Vec = DAG.getBitcast(VecVT, Vec);
2561       SubVec = DAG.getBitcast(SubVecVT, SubVec);
2562     } else {
2563       // We can't slide this mask vector up indexed by its i1 elements.
2564       // This poses a problem when we wish to insert a scalable vector which
2565       // can't be re-expressed as a larger type. Just choose the slow path and
2566       // extend to a larger type, then truncate back down.
2567       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
2568       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
2569       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
2570       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
2571       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
2572                         Op.getOperand(2));
2573       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
2574       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
2575     }
2576   }
2577 
2578   // If the subvector vector is a fixed-length type, we cannot use subregister
2579   // manipulation to simplify the codegen; we don't know which register of a
2580   // LMUL group contains the specific subvector as we only know the minimum
2581   // register size. Therefore we must slide the vector group up the full
2582   // amount.
2583   if (SubVecVT.isFixedLengthVector()) {
2584     if (OrigIdx == 0 && Vec.isUndef())
2585       return Op;
2586     MVT ContainerVT = VecVT;
2587     if (VecVT.isFixedLengthVector()) {
2588       ContainerVT = getContainerForFixedLengthVector(VecVT);
2589       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2590     }
2591     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
2592                          DAG.getUNDEF(ContainerVT), SubVec,
2593                          DAG.getConstant(0, DL, XLenVT));
2594     SDValue Mask =
2595         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
2596     // Set the vector length to only the number of elements we care about. Note
2597     // that for slideup this includes the offset.
2598     SDValue VL =
2599         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
2600     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
2601     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
2602                                   SubVec, SlideupAmt, Mask, VL);
2603     if (!VecVT.isFixedLengthVector())
2604       return Slideup;
2605     return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
2606   }
2607 
2608   unsigned SubRegIdx, RemIdx;
2609   std::tie(SubRegIdx, RemIdx) =
2610       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2611           VecVT, SubVecVT, OrigIdx, TRI);
2612 
2613   RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
2614   bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 ||
2615                          SubVecLMUL == RISCVVLMUL::LMUL_F4 ||
2616                          SubVecLMUL == RISCVVLMUL::LMUL_F8;
2617 
2618   // 1. If the Idx has been completely eliminated and this subvector's size is
2619   // a vector register or a multiple thereof, or the surrounding elements are
2620   // undef, then this is a subvector insert which naturally aligns to a vector
2621   // register. These can easily be handled using subregister manipulation.
2622   // 2. If the subvector is smaller than a vector register, then the insertion
2623   // must preserve the undisturbed elements of the register. We do this by
2624   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
2625   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
2626   // subvector within the vector register, and an INSERT_SUBVECTOR of that
2627   // LMUL=1 type back into the larger vector (resolving to another subregister
2628   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
2629   // to avoid allocating a large register group to hold our subvector.
2630   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
2631     return Op;
2632 
2633   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
2634   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
2635   // (in our case undisturbed). This means we can set up a subvector insertion
2636   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
2637   // size of the subvector.
2638   MVT InterSubVT = VecVT;
2639   SDValue AlignedExtract = Vec;
2640   unsigned AlignedIdx = OrigIdx - RemIdx;
2641   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
2642     InterSubVT = getLMUL1VT(VecVT);
2643     // Extract a subvector equal to the nearest full vector register type. This
2644     // should resolve to a EXTRACT_SUBREG instruction.
2645     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
2646                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
2647   }
2648 
2649   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
2650   // For scalable vectors this must be further multiplied by vscale.
2651   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
2652 
2653   SDValue Mask, VL;
2654   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
2655 
2656   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
2657   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
2658   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
2659   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
2660 
2661   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
2662                        DAG.getUNDEF(InterSubVT), SubVec,
2663                        DAG.getConstant(0, DL, XLenVT));
2664 
2665   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
2666                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
2667 
2668   // If required, insert this subvector back into the correct vector register.
2669   // This should resolve to an INSERT_SUBREG instruction.
2670   if (VecVT.bitsGT(InterSubVT))
2671     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
2672                           DAG.getConstant(AlignedIdx, DL, XLenVT));
2673 
2674   // We might have bitcast from a mask type: cast back to the original type if
2675   // required.
2676   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
2677 }
2678 
2679 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
2680                                                     SelectionDAG &DAG) const {
2681   SDValue Vec = Op.getOperand(0);
2682   MVT SubVecVT = Op.getSimpleValueType();
2683   MVT VecVT = Vec.getSimpleValueType();
2684 
2685   SDLoc DL(Op);
2686   MVT XLenVT = Subtarget.getXLenVT();
2687   unsigned OrigIdx = Op.getConstantOperandVal(1);
2688   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
2689 
2690   // We don't have the ability to slide mask vectors down indexed by their i1
2691   // elements; the smallest we can do is i8. Often we are able to bitcast to
2692   // equivalent i8 vectors. Note that when extracting a fixed-length vector
2693   // from a scalable one, we might not necessarily have enough scalable
2694   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
2695   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
2696     if (VecVT.getVectorMinNumElements() >= 8 &&
2697         SubVecVT.getVectorMinNumElements() >= 8) {
2698       assert(OrigIdx % 8 == 0 && "Invalid index");
2699       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
2700              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
2701              "Unexpected mask vector lowering");
2702       OrigIdx /= 8;
2703       SubVecVT =
2704           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
2705                            SubVecVT.isScalableVector());
2706       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
2707                                VecVT.isScalableVector());
2708       Vec = DAG.getBitcast(VecVT, Vec);
2709     } else {
2710       // We can't slide this mask vector down, indexed by its i1 elements.
2711       // This poses a problem when we wish to extract a scalable vector which
2712       // can't be re-expressed as a larger type. Just choose the slow path and
2713       // extend to a larger type, then truncate back down.
2714       // TODO: We could probably improve this when extracting certain fixed
2715       // from fixed, where we can extract as i8 and shift the correct element
2716       // right to reach the desired subvector?
2717       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
2718       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
2719       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
2720       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
2721                         Op.getOperand(1));
2722       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
2723       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
2724     }
2725   }
2726 
2727   // If the subvector vector is a fixed-length type, we cannot use subregister
2728   // manipulation to simplify the codegen; we don't know which register of a
2729   // LMUL group contains the specific subvector as we only know the minimum
2730   // register size. Therefore we must slide the vector group down the full
2731   // amount.
2732   if (SubVecVT.isFixedLengthVector()) {
2733     // With an index of 0 this is a cast-like subvector, which can be performed
2734     // with subregister operations.
2735     if (OrigIdx == 0)
2736       return Op;
2737     MVT ContainerVT = VecVT;
2738     if (VecVT.isFixedLengthVector()) {
2739       ContainerVT = getContainerForFixedLengthVector(VecVT);
2740       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2741     }
2742     SDValue Mask =
2743         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
2744     // Set the vector length to only the number of elements we care about. This
2745     // avoids sliding down elements we're going to discard straight away.
2746     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
2747     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
2748     SDValue Slidedown =
2749         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2750                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
2751     // Now we can use a cast-like subvector extract to get the result.
2752     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
2753                        DAG.getConstant(0, DL, XLenVT));
2754   }
2755 
2756   unsigned SubRegIdx, RemIdx;
2757   std::tie(SubRegIdx, RemIdx) =
2758       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2759           VecVT, SubVecVT, OrigIdx, TRI);
2760 
2761   // If the Idx has been completely eliminated then this is a subvector extract
2762   // which naturally aligns to a vector register. These can easily be handled
2763   // using subregister manipulation.
2764   if (RemIdx == 0)
2765     return Op;
2766 
2767   // Else we must shift our vector register directly to extract the subvector.
2768   // Do this using VSLIDEDOWN.
2769 
2770   // If the vector type is an LMUL-group type, extract a subvector equal to the
2771   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
2772   // instruction.
2773   MVT InterSubVT = VecVT;
2774   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
2775     InterSubVT = getLMUL1VT(VecVT);
2776     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
2777                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
2778   }
2779 
2780   // Slide this vector register down by the desired number of elements in order
2781   // to place the desired subvector starting at element 0.
2782   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
2783   // For scalable vectors this must be further multiplied by vscale.
2784   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
2785 
2786   SDValue Mask, VL;
2787   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
2788   SDValue Slidedown =
2789       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
2790                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
2791 
2792   // Now the vector is in the right position, extract our final subvector. This
2793   // should resolve to a COPY.
2794   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
2795                           DAG.getConstant(0, DL, XLenVT));
2796 
2797   // We might have bitcast from a mask type: cast back to the original type if
2798   // required.
2799   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
2800 }
2801 
2802 // Implement vector_reverse using vrgather.vv with indices determined by
2803 // subtracting the id of each element from (VLMAX-1). This will convert
2804 // the indices like so:
2805 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
2806 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
2807 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
2808                                                  SelectionDAG &DAG) const {
2809   SDLoc DL(Op);
2810   MVT VecVT = Op.getSimpleValueType();
2811   unsigned EltSize = VecVT.getScalarSizeInBits();
2812   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2813 
2814   unsigned MaxVLMAX = 0;
2815   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
2816   if (VectorBitsMax != 0)
2817     MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
2818 
2819   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
2820   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
2821 
2822   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
2823   // to use vrgatherei16.vv.
2824   // TODO: It's also possible to use vrgatherei16.vv for other types to
2825   // decrease register width for the index calculation.
2826   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
2827     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
2828     // Reverse each half, then reassemble them in reverse order.
2829     // NOTE: It's also possible that after splitting that VLMAX no longer
2830     // requires vrgatherei16.vv.
2831     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
2832       SDValue Lo, Hi;
2833       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
2834       EVT LoVT, HiVT;
2835       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
2836       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
2837       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
2838       // Reassemble the low and high pieces reversed.
2839       // FIXME: This is a CONCAT_VECTORS.
2840       SDValue Res =
2841           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
2842                       DAG.getIntPtrConstant(0, DL));
2843       return DAG.getNode(
2844           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
2845           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
2846     }
2847 
2848     // Just promote the int type to i16 which will double the LMUL.
2849     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
2850     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
2851   }
2852 
2853   MVT XLenVT = Subtarget.getXLenVT();
2854   SDValue Mask, VL;
2855   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
2856 
2857   // Calculate VLMAX-1 for the desired SEW.
2858   unsigned MinElts = VecVT.getVectorMinNumElements();
2859   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
2860                               DAG.getConstant(MinElts, DL, XLenVT));
2861   SDValue VLMinus1 =
2862       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
2863 
2864   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
2865   bool IsRV32E64 =
2866       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
2867   SDValue SplatVL;
2868   if (!IsRV32E64)
2869     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
2870   else
2871     SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
2872 
2873   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
2874   SDValue Indices =
2875       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
2876 
2877   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
2878 }
2879 
2880 SDValue
2881 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
2882                                                      SelectionDAG &DAG) const {
2883   auto *Load = cast<LoadSDNode>(Op);
2884 
2885   SDLoc DL(Op);
2886   MVT VT = Op.getSimpleValueType();
2887   MVT ContainerVT = getContainerForFixedLengthVector(VT);
2888 
2889   SDValue VL =
2890       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
2891 
2892   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2893   SDValue NewLoad = DAG.getMemIntrinsicNode(
2894       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
2895       Load->getMemoryVT(), Load->getMemOperand());
2896 
2897   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2898   return DAG.getMergeValues({Result, Load->getChain()}, DL);
2899 }
2900 
2901 SDValue
2902 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
2903                                                       SelectionDAG &DAG) const {
2904   auto *Store = cast<StoreSDNode>(Op);
2905 
2906   SDLoc DL(Op);
2907   MVT VT = Store->getValue().getSimpleValueType();
2908 
2909   // FIXME: We probably need to zero any extra bits in a byte for mask stores.
2910   // This is tricky to do.
2911 
2912   MVT ContainerVT = getContainerForFixedLengthVector(VT);
2913 
2914   SDValue VL =
2915       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
2916 
2917   SDValue NewValue =
2918       convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget);
2919   return DAG.getMemIntrinsicNode(
2920       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
2921       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
2922       Store->getMemoryVT(), Store->getMemOperand());
2923 }
2924 
2925 SDValue
2926 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
2927                                                       SelectionDAG &DAG) const {
2928   MVT InVT = Op.getOperand(0).getSimpleValueType();
2929   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
2930 
2931   MVT VT = Op.getSimpleValueType();
2932 
2933   SDValue Op1 =
2934       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
2935   SDValue Op2 =
2936       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
2937 
2938   SDLoc DL(Op);
2939   SDValue VL =
2940       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
2941 
2942   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2943 
2944   bool Invert = false;
2945   Optional<unsigned> LogicOpc;
2946   if (ContainerVT.isFloatingPoint()) {
2947     bool Swap = false;
2948     switch (CC) {
2949     default:
2950       break;
2951     case ISD::SETULE:
2952     case ISD::SETULT:
2953       Swap = true;
2954       LLVM_FALLTHROUGH;
2955     case ISD::SETUGE:
2956     case ISD::SETUGT:
2957       CC = getSetCCInverse(CC, ContainerVT);
2958       Invert = true;
2959       break;
2960     case ISD::SETOGE:
2961     case ISD::SETOGT:
2962     case ISD::SETGE:
2963     case ISD::SETGT:
2964       Swap = true;
2965       break;
2966     case ISD::SETUEQ:
2967       // Use !((OLT Op1, Op2) || (OLT Op2, Op1))
2968       Invert = true;
2969       LogicOpc = RISCVISD::VMOR_VL;
2970       CC = ISD::SETOLT;
2971       break;
2972     case ISD::SETONE:
2973       // Use ((OLT Op1, Op2) || (OLT Op2, Op1))
2974       LogicOpc = RISCVISD::VMOR_VL;
2975       CC = ISD::SETOLT;
2976       break;
2977     case ISD::SETO:
2978       // Use (OEQ Op1, Op1) && (OEQ Op2, Op2)
2979       LogicOpc = RISCVISD::VMAND_VL;
2980       CC = ISD::SETOEQ;
2981       break;
2982     case ISD::SETUO:
2983       // Use (UNE Op1, Op1) || (UNE Op2, Op2)
2984       LogicOpc = RISCVISD::VMOR_VL;
2985       CC = ISD::SETUNE;
2986       break;
2987     }
2988 
2989     if (Swap) {
2990       CC = getSetCCSwappedOperands(CC);
2991       std::swap(Op1, Op2);
2992     }
2993   }
2994 
2995   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2996   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2997 
2998   // There are 3 cases we need to emit.
2999   // 1. For (OEQ Op1, Op1) && (OEQ Op2, Op2) or (UNE Op1, Op1) || (UNE Op2, Op2)
3000   //    we need to compare each operand with itself.
3001   // 2. For (OLT Op1, Op2) || (OLT Op2, Op1) we need to compare Op1 and Op2 in
3002   //    both orders.
3003   // 3. For any other case we just need one compare with Op1 and Op2.
3004   SDValue Cmp;
3005   if (LogicOpc && (CC == ISD::SETOEQ || CC == ISD::SETUNE)) {
3006     Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1,
3007                       DAG.getCondCode(CC), Mask, VL);
3008     SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2,
3009                                DAG.getCondCode(CC), Mask, VL);
3010     Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL);
3011   } else {
3012     Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
3013                       DAG.getCondCode(CC), Mask, VL);
3014     if (LogicOpc) {
3015       SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1,
3016                                  DAG.getCondCode(CC), Mask, VL);
3017       Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL);
3018     }
3019   }
3020 
3021   if (Invert) {
3022     SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3023     Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL);
3024   }
3025 
3026   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
3027 }
3028 
3029 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
3030     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
3031   MVT VT = Op.getSimpleValueType();
3032 
3033   if (VT.getVectorElementType() == MVT::i1)
3034     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
3035 
3036   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
3037 }
3038 
3039 // Lower vector ABS to smax(X, sub(0, X)).
3040 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
3041   SDLoc DL(Op);
3042   MVT VT = Op.getSimpleValueType();
3043   SDValue X = Op.getOperand(0);
3044 
3045   // For scalable vectors we just need to deal with i64 on RV32 since the
3046   // default expansion crashes in getConstant.
3047   if (VT.isScalableVector()) {
3048     assert(!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64 &&
3049            "Unexpected custom lowering!");
3050     SDValue SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT,
3051                                     DAG.getConstant(0, DL, MVT::i32));
3052     SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, SplatZero, X);
3053     return DAG.getNode(ISD::SMAX, DL, VT, X, NegX);
3054   }
3055 
3056   assert(VT.isFixedLengthVector() && "Unexpected type");
3057 
3058   MVT ContainerVT =
3059       RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget);
3060   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
3061 
3062   SDValue Mask, VL;
3063   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3064 
3065   SDValue SplatZero =
3066       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
3067                   DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3068   SDValue NegX =
3069       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
3070   SDValue Max =
3071       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
3072 
3073   return convertFromScalableVector(VT, Max, DAG, Subtarget);
3074 }
3075 
3076 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
3077     SDValue Op, SelectionDAG &DAG) const {
3078   MVT VT = Op.getSimpleValueType();
3079   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3080 
3081   MVT I1ContainerVT =
3082       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3083 
3084   SDValue CC =
3085       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
3086   SDValue Op1 =
3087       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
3088   SDValue Op2 =
3089       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
3090 
3091   SDLoc DL(Op);
3092   SDValue Mask, VL;
3093   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3094 
3095   SDValue Select =
3096       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
3097 
3098   return convertFromScalableVector(VT, Select, DAG, Subtarget);
3099 }
3100 
3101 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
3102                                                unsigned NewOpc,
3103                                                bool HasMask) const {
3104   MVT VT = Op.getSimpleValueType();
3105   assert(useRVVForFixedLengthVectorVT(VT) &&
3106          "Only expected to lower fixed length vector operation!");
3107   MVT ContainerVT = getContainerForFixedLengthVector(VT);
3108 
3109   // Create list of operands by converting existing ones to scalable types.
3110   SmallVector<SDValue, 6> Ops;
3111   for (const SDValue &V : Op->op_values()) {
3112     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
3113 
3114     // Pass through non-vector operands.
3115     if (!V.getValueType().isVector()) {
3116       Ops.push_back(V);
3117       continue;
3118     }
3119 
3120     // "cast" fixed length vector to a scalable vector.
3121     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
3122            "Only fixed length vectors are supported!");
3123     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
3124   }
3125 
3126   SDLoc DL(Op);
3127   SDValue Mask, VL;
3128   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3129   if (HasMask)
3130     Ops.push_back(Mask);
3131   Ops.push_back(VL);
3132 
3133   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
3134   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
3135 }
3136 
3137 // Returns the opcode of the target-specific SDNode that implements the 32-bit
3138 // form of the given Opcode.
3139 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
3140   switch (Opcode) {
3141   default:
3142     llvm_unreachable("Unexpected opcode");
3143   case ISD::SHL:
3144     return RISCVISD::SLLW;
3145   case ISD::SRA:
3146     return RISCVISD::SRAW;
3147   case ISD::SRL:
3148     return RISCVISD::SRLW;
3149   case ISD::SDIV:
3150     return RISCVISD::DIVW;
3151   case ISD::UDIV:
3152     return RISCVISD::DIVUW;
3153   case ISD::UREM:
3154     return RISCVISD::REMUW;
3155   case ISD::ROTL:
3156     return RISCVISD::ROLW;
3157   case ISD::ROTR:
3158     return RISCVISD::RORW;
3159   case RISCVISD::GREVI:
3160     return RISCVISD::GREVIW;
3161   case RISCVISD::GORCI:
3162     return RISCVISD::GORCIW;
3163   }
3164 }
3165 
3166 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
3167 // Because i32 isn't a legal type for RV64, these operations would otherwise
3168 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
3169 // later one because the fact the operation was originally of type i32 is
3170 // lost.
3171 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
3172                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
3173   SDLoc DL(N);
3174   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
3175   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
3176   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
3177   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
3178   // ReplaceNodeResults requires we maintain the same type for the return value.
3179   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
3180 }
3181 
3182 // Converts the given 32-bit operation to a i64 operation with signed extension
3183 // semantic to reduce the signed extension instructions.
3184 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
3185   SDLoc DL(N);
3186   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3187   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
3188   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
3189   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
3190                                DAG.getValueType(MVT::i32));
3191   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
3192 }
3193 
3194 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
3195                                              SmallVectorImpl<SDValue> &Results,
3196                                              SelectionDAG &DAG) const {
3197   SDLoc DL(N);
3198   switch (N->getOpcode()) {
3199   default:
3200     llvm_unreachable("Don't know how to custom type legalize this operation!");
3201   case ISD::STRICT_FP_TO_SINT:
3202   case ISD::STRICT_FP_TO_UINT:
3203   case ISD::FP_TO_SINT:
3204   case ISD::FP_TO_UINT: {
3205     bool IsStrict = N->isStrictFPOpcode();
3206     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3207            "Unexpected custom legalisation");
3208     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
3209     // If the FP type needs to be softened, emit a library call using the 'si'
3210     // version. If we left it to default legalization we'd end up with 'di'. If
3211     // the FP type doesn't need to be softened just let generic type
3212     // legalization promote the result type.
3213     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
3214         TargetLowering::TypeSoftenFloat)
3215       return;
3216     RTLIB::Libcall LC;
3217     if (N->getOpcode() == ISD::FP_TO_SINT ||
3218         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
3219       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
3220     else
3221       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
3222     MakeLibCallOptions CallOptions;
3223     EVT OpVT = Op0.getValueType();
3224     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
3225     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
3226     SDValue Result;
3227     std::tie(Result, Chain) =
3228         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
3229     Results.push_back(Result);
3230     if (IsStrict)
3231       Results.push_back(Chain);
3232     break;
3233   }
3234   case ISD::READCYCLECOUNTER: {
3235     assert(!Subtarget.is64Bit() &&
3236            "READCYCLECOUNTER only has custom type legalization on riscv32");
3237 
3238     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
3239     SDValue RCW =
3240         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
3241 
3242     Results.push_back(
3243         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
3244     Results.push_back(RCW.getValue(2));
3245     break;
3246   }
3247   case ISD::ADD:
3248   case ISD::SUB:
3249   case ISD::MUL:
3250     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3251            "Unexpected custom legalisation");
3252     if (N->getOperand(1).getOpcode() == ISD::Constant)
3253       return;
3254     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
3255     break;
3256   case ISD::SHL:
3257   case ISD::SRA:
3258   case ISD::SRL:
3259     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3260            "Unexpected custom legalisation");
3261     if (N->getOperand(1).getOpcode() == ISD::Constant)
3262       return;
3263     Results.push_back(customLegalizeToWOp(N, DAG));
3264     break;
3265   case ISD::ROTL:
3266   case ISD::ROTR:
3267     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3268            "Unexpected custom legalisation");
3269     Results.push_back(customLegalizeToWOp(N, DAG));
3270     break;
3271   case ISD::SDIV:
3272   case ISD::UDIV:
3273   case ISD::UREM: {
3274     MVT VT = N->getSimpleValueType(0);
3275     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
3276            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
3277            "Unexpected custom legalisation");
3278     if (N->getOperand(0).getOpcode() == ISD::Constant ||
3279         N->getOperand(1).getOpcode() == ISD::Constant)
3280       return;
3281 
3282     // If the input is i32, use ANY_EXTEND since the W instructions don't read
3283     // the upper 32 bits. For other types we need to sign or zero extend
3284     // based on the opcode.
3285     unsigned ExtOpc = ISD::ANY_EXTEND;
3286     if (VT != MVT::i32)
3287       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
3288                                            : ISD::ZERO_EXTEND;
3289 
3290     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
3291     break;
3292   }
3293   case ISD::BITCAST: {
3294     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3295              Subtarget.hasStdExtF()) ||
3296             (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
3297            "Unexpected custom legalisation");
3298     SDValue Op0 = N->getOperand(0);
3299     if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
3300       if (Op0.getValueType() != MVT::f16)
3301         return;
3302       SDValue FPConv =
3303           DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
3304       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
3305     } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3306                Subtarget.hasStdExtF()) {
3307       if (Op0.getValueType() != MVT::f32)
3308         return;
3309       SDValue FPConv =
3310           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
3311       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
3312     }
3313     break;
3314   }
3315   case RISCVISD::GREVI:
3316   case RISCVISD::GORCI: {
3317     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3318            "Unexpected custom legalisation");
3319     // This is similar to customLegalizeToWOp, except that we pass the second
3320     // operand (a TargetConstant) straight through: it is already of type
3321     // XLenVT.
3322     SDLoc DL(N);
3323     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
3324     SDValue NewOp0 =
3325         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3326     SDValue NewRes =
3327         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
3328     // ReplaceNodeResults requires we maintain the same type for the return
3329     // value.
3330     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
3331     break;
3332   }
3333   case RISCVISD::SHFLI: {
3334     // There is no SHFLIW instruction, but we can just promote the operation.
3335     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3336            "Unexpected custom legalisation");
3337     SDLoc DL(N);
3338     SDValue NewOp0 =
3339         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3340     SDValue NewRes =
3341         DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1));
3342     // ReplaceNodeResults requires we maintain the same type for the return
3343     // value.
3344     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
3345     break;
3346   }
3347   case ISD::BSWAP:
3348   case ISD::BITREVERSE: {
3349     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3350            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
3351     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3352                                  N->getOperand(0));
3353     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
3354     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
3355                                  DAG.getTargetConstant(Imm, DL,
3356                                                        Subtarget.getXLenVT()));
3357     // ReplaceNodeResults requires we maintain the same type for the return
3358     // value.
3359     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
3360     break;
3361   }
3362   case ISD::FSHL:
3363   case ISD::FSHR: {
3364     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3365            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
3366     SDValue NewOp0 =
3367         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3368     SDValue NewOp1 =
3369         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
3370     SDValue NewOp2 =
3371         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
3372     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
3373     // Mask the shift amount to 5 bits.
3374     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
3375                          DAG.getConstant(0x1f, DL, MVT::i64));
3376     unsigned Opc =
3377         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
3378     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
3379     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
3380     break;
3381   }
3382   case ISD::EXTRACT_VECTOR_ELT: {
3383     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
3384     // type is illegal (currently only vXi64 RV32).
3385     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
3386     // transferred to the destination register. We issue two of these from the
3387     // upper- and lower- halves of the SEW-bit vector element, slid down to the
3388     // first element.
3389     SDLoc DL(N);
3390     SDValue Vec = N->getOperand(0);
3391     SDValue Idx = N->getOperand(1);
3392 
3393     // The vector type hasn't been legalized yet so we can't issue target
3394     // specific nodes if it needs legalization.
3395     // FIXME: We would manually legalize if it's important.
3396     if (!isTypeLegal(Vec.getValueType()))
3397       return;
3398 
3399     MVT VecVT = Vec.getSimpleValueType();
3400 
3401     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
3402            VecVT.getVectorElementType() == MVT::i64 &&
3403            "Unexpected EXTRACT_VECTOR_ELT legalization");
3404 
3405     // If this is a fixed vector, we need to convert it to a scalable vector.
3406     MVT ContainerVT = VecVT;
3407     if (VecVT.isFixedLengthVector()) {
3408       ContainerVT = getContainerForFixedLengthVector(VecVT);
3409       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3410     }
3411 
3412     MVT XLenVT = Subtarget.getXLenVT();
3413 
3414     // Use a VL of 1 to avoid processing more elements than we need.
3415     MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
3416     SDValue VL = DAG.getConstant(1, DL, XLenVT);
3417     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3418 
3419     // Unless the index is known to be 0, we must slide the vector down to get
3420     // the desired element into index 0.
3421     if (!isNullConstant(Idx)) {
3422       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3423                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
3424     }
3425 
3426     // Extract the lower XLEN bits of the correct vector element.
3427     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
3428 
3429     // To extract the upper XLEN bits of the vector element, shift the first
3430     // element right by 32 bits and re-extract the lower XLEN bits.
3431     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
3432                                      DAG.getConstant(32, DL, XLenVT), VL);
3433     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
3434                                  ThirtyTwoV, Mask, VL);
3435 
3436     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
3437 
3438     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
3439     break;
3440   }
3441   case ISD::INTRINSIC_WO_CHAIN: {
3442     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3443     switch (IntNo) {
3444     default:
3445       llvm_unreachable(
3446           "Don't know how to custom type legalize this intrinsic!");
3447     case Intrinsic::riscv_vmv_x_s: {
3448       EVT VT = N->getValueType(0);
3449       assert((VT == MVT::i8 || VT == MVT::i16 ||
3450               (Subtarget.is64Bit() && VT == MVT::i32)) &&
3451              "Unexpected custom legalisation!");
3452       SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
3453                                     Subtarget.getXLenVT(), N->getOperand(1));
3454       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
3455       break;
3456     }
3457     }
3458     break;
3459   }
3460   case ISD::VECREDUCE_ADD:
3461   case ISD::VECREDUCE_AND:
3462   case ISD::VECREDUCE_OR:
3463   case ISD::VECREDUCE_XOR:
3464   case ISD::VECREDUCE_SMAX:
3465   case ISD::VECREDUCE_UMAX:
3466   case ISD::VECREDUCE_SMIN:
3467   case ISD::VECREDUCE_UMIN:
3468     // The custom-lowering for these nodes returns a vector whose first element
3469     // is the result of the reduction. Extract its first element and let the
3470     // legalization for EXTRACT_VECTOR_ELT do the rest of the job.
3471     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
3472       Results.push_back(V);
3473     break;
3474   }
3475 }
3476 
3477 // A structure to hold one of the bit-manipulation patterns below. Together, a
3478 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
3479 //   (or (and (shl x, 1), 0xAAAAAAAA),
3480 //       (and (srl x, 1), 0x55555555))
3481 struct RISCVBitmanipPat {
3482   SDValue Op;
3483   unsigned ShAmt;
3484   bool IsSHL;
3485 
3486   bool formsPairWith(const RISCVBitmanipPat &Other) const {
3487     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
3488   }
3489 };
3490 
3491 // Matches patterns of the form
3492 //   (and (shl x, C2), (C1 << C2))
3493 //   (and (srl x, C2), C1)
3494 //   (shl (and x, C1), C2)
3495 //   (srl (and x, (C1 << C2)), C2)
3496 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
3497 // The expected masks for each shift amount are specified in BitmanipMasks where
3498 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
3499 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
3500 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
3501 // XLen is 64.
3502 static Optional<RISCVBitmanipPat>
3503 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
3504   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
3505          "Unexpected number of masks");
3506   Optional<uint64_t> Mask;
3507   // Optionally consume a mask around the shift operation.
3508   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
3509     Mask = Op.getConstantOperandVal(1);
3510     Op = Op.getOperand(0);
3511   }
3512   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
3513     return None;
3514   bool IsSHL = Op.getOpcode() == ISD::SHL;
3515 
3516   if (!isa<ConstantSDNode>(Op.getOperand(1)))
3517     return None;
3518   uint64_t ShAmt = Op.getConstantOperandVal(1);
3519 
3520   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
3521   if (ShAmt >= Width && !isPowerOf2_64(ShAmt))
3522     return None;
3523   // If we don't have enough masks for 64 bit, then we must be trying to
3524   // match SHFL so we're only allowed to shift 1/4 of the width.
3525   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
3526     return None;
3527 
3528   SDValue Src = Op.getOperand(0);
3529 
3530   // The expected mask is shifted left when the AND is found around SHL
3531   // patterns.
3532   //   ((x >> 1) & 0x55555555)
3533   //   ((x << 1) & 0xAAAAAAAA)
3534   bool SHLExpMask = IsSHL;
3535 
3536   if (!Mask) {
3537     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
3538     // the mask is all ones: consume that now.
3539     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
3540       Mask = Src.getConstantOperandVal(1);
3541       Src = Src.getOperand(0);
3542       // The expected mask is now in fact shifted left for SRL, so reverse the
3543       // decision.
3544       //   ((x & 0xAAAAAAAA) >> 1)
3545       //   ((x & 0x55555555) << 1)
3546       SHLExpMask = !SHLExpMask;
3547     } else {
3548       // Use a default shifted mask of all-ones if there's no AND, truncated
3549       // down to the expected width. This simplifies the logic later on.
3550       Mask = maskTrailingOnes<uint64_t>(Width);
3551       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
3552     }
3553   }
3554 
3555   unsigned MaskIdx = Log2_32(ShAmt);
3556   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
3557 
3558   if (SHLExpMask)
3559     ExpMask <<= ShAmt;
3560 
3561   if (Mask != ExpMask)
3562     return None;
3563 
3564   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
3565 }
3566 
3567 // Matches any of the following bit-manipulation patterns:
3568 //   (and (shl x, 1), (0x55555555 << 1))
3569 //   (and (srl x, 1), 0x55555555)
3570 //   (shl (and x, 0x55555555), 1)
3571 //   (srl (and x, (0x55555555 << 1)), 1)
3572 // where the shift amount and mask may vary thus:
3573 //   [1]  = 0x55555555 / 0xAAAAAAAA
3574 //   [2]  = 0x33333333 / 0xCCCCCCCC
3575 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
3576 //   [8]  = 0x00FF00FF / 0xFF00FF00
3577 //   [16] = 0x0000FFFF / 0xFFFFFFFF
3578 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
3579 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
3580   // These are the unshifted masks which we use to match bit-manipulation
3581   // patterns. They may be shifted left in certain circumstances.
3582   static const uint64_t BitmanipMasks[] = {
3583       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
3584       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
3585 
3586   return matchRISCVBitmanipPat(Op, BitmanipMasks);
3587 }
3588 
3589 // Match the following pattern as a GREVI(W) operation
3590 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
3591 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
3592                                const RISCVSubtarget &Subtarget) {
3593   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
3594   EVT VT = Op.getValueType();
3595 
3596   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
3597     auto LHS = matchGREVIPat(Op.getOperand(0));
3598     auto RHS = matchGREVIPat(Op.getOperand(1));
3599     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
3600       SDLoc DL(Op);
3601       return DAG.getNode(
3602           RISCVISD::GREVI, DL, VT, LHS->Op,
3603           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
3604     }
3605   }
3606   return SDValue();
3607 }
3608 
3609 // Matches any the following pattern as a GORCI(W) operation
3610 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
3611 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
3612 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
3613 // Note that with the variant of 3.,
3614 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
3615 // the inner pattern will first be matched as GREVI and then the outer
3616 // pattern will be matched to GORC via the first rule above.
3617 // 4.  (or (rotl/rotr x, bitwidth/2), x)
3618 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
3619                                const RISCVSubtarget &Subtarget) {
3620   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
3621   EVT VT = Op.getValueType();
3622 
3623   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
3624     SDLoc DL(Op);
3625     SDValue Op0 = Op.getOperand(0);
3626     SDValue Op1 = Op.getOperand(1);
3627 
3628     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
3629       if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
3630           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
3631         return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
3632       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
3633       if ((Reverse.getOpcode() == ISD::ROTL ||
3634            Reverse.getOpcode() == ISD::ROTR) &&
3635           Reverse.getOperand(0) == X &&
3636           isa<ConstantSDNode>(Reverse.getOperand(1))) {
3637         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
3638         if (RotAmt == (VT.getSizeInBits() / 2))
3639           return DAG.getNode(
3640               RISCVISD::GORCI, DL, VT, X,
3641               DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
3642       }
3643       return SDValue();
3644     };
3645 
3646     // Check for either commutable permutation of (or (GREVI x, shamt), x)
3647     if (SDValue V = MatchOROfReverse(Op0, Op1))
3648       return V;
3649     if (SDValue V = MatchOROfReverse(Op1, Op0))
3650       return V;
3651 
3652     // OR is commutable so canonicalize its OR operand to the left
3653     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
3654       std::swap(Op0, Op1);
3655     if (Op0.getOpcode() != ISD::OR)
3656       return SDValue();
3657     SDValue OrOp0 = Op0.getOperand(0);
3658     SDValue OrOp1 = Op0.getOperand(1);
3659     auto LHS = matchGREVIPat(OrOp0);
3660     // OR is commutable so swap the operands and try again: x might have been
3661     // on the left
3662     if (!LHS) {
3663       std::swap(OrOp0, OrOp1);
3664       LHS = matchGREVIPat(OrOp0);
3665     }
3666     auto RHS = matchGREVIPat(Op1);
3667     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
3668       return DAG.getNode(
3669           RISCVISD::GORCI, DL, VT, LHS->Op,
3670           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
3671     }
3672   }
3673   return SDValue();
3674 }
3675 
3676 // Matches any of the following bit-manipulation patterns:
3677 //   (and (shl x, 1), (0x22222222 << 1))
3678 //   (and (srl x, 1), 0x22222222)
3679 //   (shl (and x, 0x22222222), 1)
3680 //   (srl (and x, (0x22222222 << 1)), 1)
3681 // where the shift amount and mask may vary thus:
3682 //   [1]  = 0x22222222 / 0x44444444
3683 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
3684 //   [4]  = 0x00F000F0 / 0x0F000F00
3685 //   [8]  = 0x0000FF00 / 0x00FF0000
3686 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
3687 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
3688   // These are the unshifted masks which we use to match bit-manipulation
3689   // patterns. They may be shifted left in certain circumstances.
3690   static const uint64_t BitmanipMasks[] = {
3691       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
3692       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
3693 
3694   return matchRISCVBitmanipPat(Op, BitmanipMasks);
3695 }
3696 
3697 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
3698 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
3699                                const RISCVSubtarget &Subtarget) {
3700   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
3701   EVT VT = Op.getValueType();
3702 
3703   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
3704     return SDValue();
3705 
3706   SDValue Op0 = Op.getOperand(0);
3707   SDValue Op1 = Op.getOperand(1);
3708 
3709   // Or is commutable so canonicalize the second OR to the LHS.
3710   if (Op0.getOpcode() != ISD::OR)
3711     std::swap(Op0, Op1);
3712   if (Op0.getOpcode() != ISD::OR)
3713     return SDValue();
3714 
3715   // We found an inner OR, so our operands are the operands of the inner OR
3716   // and the other operand of the outer OR.
3717   SDValue A = Op0.getOperand(0);
3718   SDValue B = Op0.getOperand(1);
3719   SDValue C = Op1;
3720 
3721   auto Match1 = matchSHFLPat(A);
3722   auto Match2 = matchSHFLPat(B);
3723 
3724   // If neither matched, we failed.
3725   if (!Match1 && !Match2)
3726     return SDValue();
3727 
3728   // We had at least one match. if one failed, try the remaining C operand.
3729   if (!Match1) {
3730     std::swap(A, C);
3731     Match1 = matchSHFLPat(A);
3732     if (!Match1)
3733       return SDValue();
3734   } else if (!Match2) {
3735     std::swap(B, C);
3736     Match2 = matchSHFLPat(B);
3737     if (!Match2)
3738       return SDValue();
3739   }
3740   assert(Match1 && Match2);
3741 
3742   // Make sure our matches pair up.
3743   if (!Match1->formsPairWith(*Match2))
3744     return SDValue();
3745 
3746   // All the remains is to make sure C is an AND with the same input, that masks
3747   // out the bits that are being shuffled.
3748   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
3749       C.getOperand(0) != Match1->Op)
3750     return SDValue();
3751 
3752   uint64_t Mask = C.getConstantOperandVal(1);
3753 
3754   static const uint64_t BitmanipMasks[] = {
3755       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
3756       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
3757   };
3758 
3759   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
3760   unsigned MaskIdx = Log2_32(Match1->ShAmt);
3761   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
3762 
3763   if (Mask != ExpMask)
3764     return SDValue();
3765 
3766   SDLoc DL(Op);
3767   return DAG.getNode(
3768       RISCVISD::SHFLI, DL, VT, Match1->Op,
3769       DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT()));
3770 }
3771 
3772 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
3773 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
3774 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
3775 // not undo itself, but they are redundant.
3776 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
3777   unsigned ShAmt1 = N->getConstantOperandVal(1);
3778   SDValue Src = N->getOperand(0);
3779 
3780   if (Src.getOpcode() != N->getOpcode())
3781     return SDValue();
3782 
3783   unsigned ShAmt2 = Src.getConstantOperandVal(1);
3784   Src = Src.getOperand(0);
3785 
3786   unsigned CombinedShAmt;
3787   if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
3788     CombinedShAmt = ShAmt1 | ShAmt2;
3789   else
3790     CombinedShAmt = ShAmt1 ^ ShAmt2;
3791 
3792   if (CombinedShAmt == 0)
3793     return Src;
3794 
3795   SDLoc DL(N);
3796   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
3797                      DAG.getTargetConstant(CombinedShAmt, DL,
3798                                            N->getOperand(1).getValueType()));
3799 }
3800 
3801 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
3802                                                DAGCombinerInfo &DCI) const {
3803   SelectionDAG &DAG = DCI.DAG;
3804 
3805   switch (N->getOpcode()) {
3806   default:
3807     break;
3808   case RISCVISD::SplitF64: {
3809     SDValue Op0 = N->getOperand(0);
3810     // If the input to SplitF64 is just BuildPairF64 then the operation is
3811     // redundant. Instead, use BuildPairF64's operands directly.
3812     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
3813       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
3814 
3815     SDLoc DL(N);
3816 
3817     // It's cheaper to materialise two 32-bit integers than to load a double
3818     // from the constant pool and transfer it to integer registers through the
3819     // stack.
3820     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
3821       APInt V = C->getValueAPF().bitcastToAPInt();
3822       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
3823       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
3824       return DCI.CombineTo(N, Lo, Hi);
3825     }
3826 
3827     // This is a target-specific version of a DAGCombine performed in
3828     // DAGCombiner::visitBITCAST. It performs the equivalent of:
3829     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
3830     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
3831     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
3832         !Op0.getNode()->hasOneUse())
3833       break;
3834     SDValue NewSplitF64 =
3835         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
3836                     Op0.getOperand(0));
3837     SDValue Lo = NewSplitF64.getValue(0);
3838     SDValue Hi = NewSplitF64.getValue(1);
3839     APInt SignBit = APInt::getSignMask(32);
3840     if (Op0.getOpcode() == ISD::FNEG) {
3841       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
3842                                   DAG.getConstant(SignBit, DL, MVT::i32));
3843       return DCI.CombineTo(N, Lo, NewHi);
3844     }
3845     assert(Op0.getOpcode() == ISD::FABS);
3846     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
3847                                 DAG.getConstant(~SignBit, DL, MVT::i32));
3848     return DCI.CombineTo(N, Lo, NewHi);
3849   }
3850   case RISCVISD::SLLW:
3851   case RISCVISD::SRAW:
3852   case RISCVISD::SRLW:
3853   case RISCVISD::ROLW:
3854   case RISCVISD::RORW: {
3855     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
3856     SDValue LHS = N->getOperand(0);
3857     SDValue RHS = N->getOperand(1);
3858     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
3859     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
3860     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
3861         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
3862       if (N->getOpcode() != ISD::DELETED_NODE)
3863         DCI.AddToWorklist(N);
3864       return SDValue(N, 0);
3865     }
3866     break;
3867   }
3868   case RISCVISD::FSL:
3869   case RISCVISD::FSR: {
3870     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
3871     SDValue ShAmt = N->getOperand(2);
3872     unsigned BitWidth = ShAmt.getValueSizeInBits();
3873     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
3874     APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
3875     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
3876       if (N->getOpcode() != ISD::DELETED_NODE)
3877         DCI.AddToWorklist(N);
3878       return SDValue(N, 0);
3879     }
3880     break;
3881   }
3882   case RISCVISD::FSLW:
3883   case RISCVISD::FSRW: {
3884     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
3885     // read.
3886     SDValue Op0 = N->getOperand(0);
3887     SDValue Op1 = N->getOperand(1);
3888     SDValue ShAmt = N->getOperand(2);
3889     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
3890     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
3891     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
3892         SimplifyDemandedBits(Op1, OpMask, DCI) ||
3893         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
3894       if (N->getOpcode() != ISD::DELETED_NODE)
3895         DCI.AddToWorklist(N);
3896       return SDValue(N, 0);
3897     }
3898     break;
3899   }
3900   case RISCVISD::GREVIW:
3901   case RISCVISD::GORCIW: {
3902     // Only the lower 32 bits of the first operand are read
3903     SDValue Op0 = N->getOperand(0);
3904     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
3905     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
3906       if (N->getOpcode() != ISD::DELETED_NODE)
3907         DCI.AddToWorklist(N);
3908       return SDValue(N, 0);
3909     }
3910 
3911     return combineGREVI_GORCI(N, DCI.DAG);
3912   }
3913   case RISCVISD::FMV_X_ANYEXTW_RV64: {
3914     SDLoc DL(N);
3915     SDValue Op0 = N->getOperand(0);
3916     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
3917     // conversion is unnecessary and can be replaced with an ANY_EXTEND
3918     // of the FMV_W_X_RV64 operand.
3919     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
3920       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
3921              "Unexpected value type!");
3922       return Op0.getOperand(0);
3923     }
3924 
3925     // This is a target-specific version of a DAGCombine performed in
3926     // DAGCombiner::visitBITCAST. It performs the equivalent of:
3927     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
3928     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
3929     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
3930         !Op0.getNode()->hasOneUse())
3931       break;
3932     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
3933                                  Op0.getOperand(0));
3934     APInt SignBit = APInt::getSignMask(32).sext(64);
3935     if (Op0.getOpcode() == ISD::FNEG)
3936       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
3937                          DAG.getConstant(SignBit, DL, MVT::i64));
3938 
3939     assert(Op0.getOpcode() == ISD::FABS);
3940     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
3941                        DAG.getConstant(~SignBit, DL, MVT::i64));
3942   }
3943   case RISCVISD::GREVI:
3944   case RISCVISD::GORCI:
3945     return combineGREVI_GORCI(N, DCI.DAG);
3946   case ISD::OR:
3947     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
3948       return GREV;
3949     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
3950       return GORC;
3951     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget))
3952       return SHFL;
3953     break;
3954   case RISCVISD::SELECT_CC: {
3955     // Transform
3956     SDValue LHS = N->getOperand(0);
3957     SDValue RHS = N->getOperand(1);
3958     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
3959     if (!ISD::isIntEqualitySetCC(CCVal))
3960       break;
3961 
3962     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
3963     //      (select_cc X, Y, lt, trueV, falseV)
3964     // Sometimes the setcc is introduced after select_cc has been formed.
3965     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
3966         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
3967       // If we're looking for eq 0 instead of ne 0, we need to invert the
3968       // condition.
3969       bool Invert = CCVal == ISD::SETEQ;
3970       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
3971       if (Invert)
3972         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
3973 
3974       RHS = LHS.getOperand(1);
3975       LHS = LHS.getOperand(0);
3976       normaliseSetCC(LHS, RHS, CCVal);
3977 
3978       SDLoc DL(N);
3979       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
3980       return DAG.getNode(
3981           RISCVISD::SELECT_CC, DL, N->getValueType(0),
3982           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
3983     }
3984 
3985     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
3986     //      (select_cc X, Y, eq/ne, trueV, falseV)
3987     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
3988       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
3989                          {LHS.getOperand(0), LHS.getOperand(1),
3990                           N->getOperand(2), N->getOperand(3),
3991                           N->getOperand(4)});
3992     // (select_cc X, 1, setne, trueV, falseV) ->
3993     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
3994     // This can occur when legalizing some floating point comparisons.
3995     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
3996     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
3997       SDLoc DL(N);
3998       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
3999       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
4000       RHS = DAG.getConstant(0, DL, LHS.getValueType());
4001       return DAG.getNode(
4002           RISCVISD::SELECT_CC, DL, N->getValueType(0),
4003           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
4004     }
4005 
4006     break;
4007   }
4008   case ISD::SETCC: {
4009     // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1.
4010     // Comparing with 0 may allow us to fold into bnez/beqz.
4011     SDValue LHS = N->getOperand(0);
4012     SDValue RHS = N->getOperand(1);
4013     if (LHS.getValueType().isScalableVector())
4014       break;
4015     auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
4016     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
4017     if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) &&
4018         DAG.MaskedValueIsZero(LHS, Mask)) {
4019       SDLoc DL(N);
4020       SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType());
4021       CC = ISD::getSetCCInverse(CC, LHS.getValueType());
4022       return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC);
4023     }
4024     break;
4025   }
4026   case ISD::FCOPYSIGN: {
4027     EVT VT = N->getValueType(0);
4028     if (!VT.isVector())
4029       break;
4030     // There is a form of VFSGNJ which injects the negated sign of its second
4031     // operand. Try and bubble any FNEG up after the extend/round to produce
4032     // this optimized pattern. Avoid modifying cases where FP_ROUND and
4033     // TRUNC=1.
4034     SDValue In2 = N->getOperand(1);
4035     // Avoid cases where the extend/round has multiple uses, as duplicating
4036     // those is typically more expensive than removing a fneg.
4037     if (!In2.hasOneUse())
4038       break;
4039     if (In2.getOpcode() != ISD::FP_EXTEND &&
4040         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
4041       break;
4042     In2 = In2.getOperand(0);
4043     if (In2.getOpcode() != ISD::FNEG)
4044       break;
4045     SDLoc DL(N);
4046     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
4047     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
4048                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
4049   }
4050   }
4051 
4052   return SDValue();
4053 }
4054 
4055 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
4056     const SDNode *N, CombineLevel Level) const {
4057   // The following folds are only desirable if `(OP _, c1 << c2)` can be
4058   // materialised in fewer instructions than `(OP _, c1)`:
4059   //
4060   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
4061   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
4062   SDValue N0 = N->getOperand(0);
4063   EVT Ty = N0.getValueType();
4064   if (Ty.isScalarInteger() &&
4065       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
4066     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
4067     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
4068     if (C1 && C2) {
4069       const APInt &C1Int = C1->getAPIntValue();
4070       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
4071 
4072       // We can materialise `c1 << c2` into an add immediate, so it's "free",
4073       // and the combine should happen, to potentially allow further combines
4074       // later.
4075       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
4076           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
4077         return true;
4078 
4079       // We can materialise `c1` in an add immediate, so it's "free", and the
4080       // combine should be prevented.
4081       if (C1Int.getMinSignedBits() <= 64 &&
4082           isLegalAddImmediate(C1Int.getSExtValue()))
4083         return false;
4084 
4085       // Neither constant will fit into an immediate, so find materialisation
4086       // costs.
4087       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
4088                                               Subtarget.is64Bit());
4089       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
4090           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
4091 
4092       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
4093       // combine should be prevented.
4094       if (C1Cost < ShiftedC1Cost)
4095         return false;
4096     }
4097   }
4098   return true;
4099 }
4100 
4101 bool RISCVTargetLowering::targetShrinkDemandedConstant(
4102     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4103     TargetLoweringOpt &TLO) const {
4104   // Delay this optimization as late as possible.
4105   if (!TLO.LegalOps)
4106     return false;
4107 
4108   EVT VT = Op.getValueType();
4109   if (VT.isVector())
4110     return false;
4111 
4112   // Only handle AND for now.
4113   if (Op.getOpcode() != ISD::AND)
4114     return false;
4115 
4116   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4117   if (!C)
4118     return false;
4119 
4120   const APInt &Mask = C->getAPIntValue();
4121 
4122   // Clear all non-demanded bits initially.
4123   APInt ShrunkMask = Mask & DemandedBits;
4124 
4125   // If the shrunk mask fits in sign extended 12 bits, let the target
4126   // independent code apply it.
4127   if (ShrunkMask.isSignedIntN(12))
4128     return false;
4129 
4130   // Try to make a smaller immediate by setting undemanded bits.
4131 
4132   // We need to be able to make a negative number through a combination of mask
4133   // and undemanded bits.
4134   APInt ExpandedMask = Mask | ~DemandedBits;
4135   if (!ExpandedMask.isNegative())
4136     return false;
4137 
4138   // What is the fewest number of bits we need to represent the negative number.
4139   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
4140 
4141   // Try to make a 12 bit negative immediate. If that fails try to make a 32
4142   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
4143   APInt NewMask = ShrunkMask;
4144   if (MinSignedBits <= 12)
4145     NewMask.setBitsFrom(11);
4146   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
4147     NewMask.setBitsFrom(31);
4148   else
4149     return false;
4150 
4151   // Sanity check that our new mask is a subset of the demanded mask.
4152   assert(NewMask.isSubsetOf(ExpandedMask));
4153 
4154   // If we aren't changing the mask, just return true to keep it and prevent
4155   // the caller from optimizing.
4156   if (NewMask == Mask)
4157     return true;
4158 
4159   // Replace the constant with the new mask.
4160   SDLoc DL(Op);
4161   SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
4162   SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
4163   return TLO.CombineTo(Op, NewOp);
4164 }
4165 
4166 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
4167                                                         KnownBits &Known,
4168                                                         const APInt &DemandedElts,
4169                                                         const SelectionDAG &DAG,
4170                                                         unsigned Depth) const {
4171   unsigned BitWidth = Known.getBitWidth();
4172   unsigned Opc = Op.getOpcode();
4173   assert((Opc >= ISD::BUILTIN_OP_END ||
4174           Opc == ISD::INTRINSIC_WO_CHAIN ||
4175           Opc == ISD::INTRINSIC_W_CHAIN ||
4176           Opc == ISD::INTRINSIC_VOID) &&
4177          "Should use MaskedValueIsZero if you don't know whether Op"
4178          " is a target node!");
4179 
4180   Known.resetAll();
4181   switch (Opc) {
4182   default: break;
4183   case RISCVISD::REMUW: {
4184     KnownBits Known2;
4185     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
4186     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
4187     // We only care about the lower 32 bits.
4188     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
4189     // Restore the original width by sign extending.
4190     Known = Known.sext(BitWidth);
4191     break;
4192   }
4193   case RISCVISD::DIVUW: {
4194     KnownBits Known2;
4195     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
4196     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
4197     // We only care about the lower 32 bits.
4198     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
4199     // Restore the original width by sign extending.
4200     Known = Known.sext(BitWidth);
4201     break;
4202   }
4203   case RISCVISD::READ_VLENB:
4204     // We assume VLENB is at least 8 bytes.
4205     // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
4206     Known.Zero.setLowBits(3);
4207     break;
4208   }
4209 }
4210 
4211 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
4212     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4213     unsigned Depth) const {
4214   switch (Op.getOpcode()) {
4215   default:
4216     break;
4217   case RISCVISD::SLLW:
4218   case RISCVISD::SRAW:
4219   case RISCVISD::SRLW:
4220   case RISCVISD::DIVW:
4221   case RISCVISD::DIVUW:
4222   case RISCVISD::REMUW:
4223   case RISCVISD::ROLW:
4224   case RISCVISD::RORW:
4225   case RISCVISD::GREVIW:
4226   case RISCVISD::GORCIW:
4227   case RISCVISD::FSLW:
4228   case RISCVISD::FSRW:
4229     // TODO: As the result is sign-extended, this is conservatively correct. A
4230     // more precise answer could be calculated for SRAW depending on known
4231     // bits in the shift amount.
4232     return 33;
4233   case RISCVISD::SHFLI: {
4234     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
4235     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
4236     // will stay within the upper 32 bits. If there were more than 32 sign bits
4237     // before there will be at least 33 sign bits after.
4238     if (Op.getValueType() == MVT::i64 &&
4239         (Op.getConstantOperandVal(1) & 0x10) == 0) {
4240       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
4241       if (Tmp > 32)
4242         return 33;
4243     }
4244     break;
4245   }
4246   case RISCVISD::VMV_X_S:
4247     // The number of sign bits of the scalar result is computed by obtaining the
4248     // element type of the input vector operand, subtracting its width from the
4249     // XLEN, and then adding one (sign bit within the element type). If the
4250     // element type is wider than XLen, the least-significant XLEN bits are
4251     // taken.
4252     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
4253       return 1;
4254     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
4255   }
4256 
4257   return 1;
4258 }
4259 
4260 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
4261                                                   MachineBasicBlock *BB) {
4262   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
4263 
4264   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
4265   // Should the count have wrapped while it was being read, we need to try
4266   // again.
4267   // ...
4268   // read:
4269   // rdcycleh x3 # load high word of cycle
4270   // rdcycle  x2 # load low word of cycle
4271   // rdcycleh x4 # load high word of cycle
4272   // bne x3, x4, read # check if high word reads match, otherwise try again
4273   // ...
4274 
4275   MachineFunction &MF = *BB->getParent();
4276   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4277   MachineFunction::iterator It = ++BB->getIterator();
4278 
4279   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
4280   MF.insert(It, LoopMBB);
4281 
4282   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
4283   MF.insert(It, DoneMBB);
4284 
4285   // Transfer the remainder of BB and its successor edges to DoneMBB.
4286   DoneMBB->splice(DoneMBB->begin(), BB,
4287                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
4288   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
4289 
4290   BB->addSuccessor(LoopMBB);
4291 
4292   MachineRegisterInfo &RegInfo = MF.getRegInfo();
4293   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
4294   Register LoReg = MI.getOperand(0).getReg();
4295   Register HiReg = MI.getOperand(1).getReg();
4296   DebugLoc DL = MI.getDebugLoc();
4297 
4298   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4299   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
4300       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
4301       .addReg(RISCV::X0);
4302   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
4303       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
4304       .addReg(RISCV::X0);
4305   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
4306       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
4307       .addReg(RISCV::X0);
4308 
4309   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
4310       .addReg(HiReg)
4311       .addReg(ReadAgainReg)
4312       .addMBB(LoopMBB);
4313 
4314   LoopMBB->addSuccessor(LoopMBB);
4315   LoopMBB->addSuccessor(DoneMBB);
4316 
4317   MI.eraseFromParent();
4318 
4319   return DoneMBB;
4320 }
4321 
4322 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
4323                                              MachineBasicBlock *BB) {
4324   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
4325 
4326   MachineFunction &MF = *BB->getParent();
4327   DebugLoc DL = MI.getDebugLoc();
4328   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
4329   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
4330   Register LoReg = MI.getOperand(0).getReg();
4331   Register HiReg = MI.getOperand(1).getReg();
4332   Register SrcReg = MI.getOperand(2).getReg();
4333   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
4334   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
4335 
4336   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
4337                           RI);
4338   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
4339   MachineMemOperand *MMOLo =
4340       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
4341   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
4342       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
4343   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
4344       .addFrameIndex(FI)
4345       .addImm(0)
4346       .addMemOperand(MMOLo);
4347   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
4348       .addFrameIndex(FI)
4349       .addImm(4)
4350       .addMemOperand(MMOHi);
4351   MI.eraseFromParent(); // The pseudo instruction is gone now.
4352   return BB;
4353 }
4354 
4355 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
4356                                                  MachineBasicBlock *BB) {
4357   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
4358          "Unexpected instruction");
4359 
4360   MachineFunction &MF = *BB->getParent();
4361   DebugLoc DL = MI.getDebugLoc();
4362   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
4363   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
4364   Register DstReg = MI.getOperand(0).getReg();
4365   Register LoReg = MI.getOperand(1).getReg();
4366   Register HiReg = MI.getOperand(2).getReg();
4367   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
4368   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
4369 
4370   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
4371   MachineMemOperand *MMOLo =
4372       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
4373   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
4374       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
4375   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
4376       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
4377       .addFrameIndex(FI)
4378       .addImm(0)
4379       .addMemOperand(MMOLo);
4380   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
4381       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
4382       .addFrameIndex(FI)
4383       .addImm(4)
4384       .addMemOperand(MMOHi);
4385   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
4386   MI.eraseFromParent(); // The pseudo instruction is gone now.
4387   return BB;
4388 }
4389 
4390 static bool isSelectPseudo(MachineInstr &MI) {
4391   switch (MI.getOpcode()) {
4392   default:
4393     return false;
4394   case RISCV::Select_GPR_Using_CC_GPR:
4395   case RISCV::Select_FPR16_Using_CC_GPR:
4396   case RISCV::Select_FPR32_Using_CC_GPR:
4397   case RISCV::Select_FPR64_Using_CC_GPR:
4398     return true;
4399   }
4400 }
4401 
4402 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
4403                                            MachineBasicBlock *BB) {
4404   // To "insert" Select_* instructions, we actually have to insert the triangle
4405   // control-flow pattern.  The incoming instructions know the destination vreg
4406   // to set, the condition code register to branch on, the true/false values to
4407   // select between, and the condcode to use to select the appropriate branch.
4408   //
4409   // We produce the following control flow:
4410   //     HeadMBB
4411   //     |  \
4412   //     |  IfFalseMBB
4413   //     | /
4414   //    TailMBB
4415   //
4416   // When we find a sequence of selects we attempt to optimize their emission
4417   // by sharing the control flow. Currently we only handle cases where we have
4418   // multiple selects with the exact same condition (same LHS, RHS and CC).
4419   // The selects may be interleaved with other instructions if the other
4420   // instructions meet some requirements we deem safe:
4421   // - They are debug instructions. Otherwise,
4422   // - They do not have side-effects, do not access memory and their inputs do
4423   //   not depend on the results of the select pseudo-instructions.
4424   // The TrueV/FalseV operands of the selects cannot depend on the result of
4425   // previous selects in the sequence.
4426   // These conditions could be further relaxed. See the X86 target for a
4427   // related approach and more information.
4428   Register LHS = MI.getOperand(1).getReg();
4429   Register RHS = MI.getOperand(2).getReg();
4430   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
4431 
4432   SmallVector<MachineInstr *, 4> SelectDebugValues;
4433   SmallSet<Register, 4> SelectDests;
4434   SelectDests.insert(MI.getOperand(0).getReg());
4435 
4436   MachineInstr *LastSelectPseudo = &MI;
4437 
4438   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
4439        SequenceMBBI != E; ++SequenceMBBI) {
4440     if (SequenceMBBI->isDebugInstr())
4441       continue;
4442     else if (isSelectPseudo(*SequenceMBBI)) {
4443       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
4444           SequenceMBBI->getOperand(2).getReg() != RHS ||
4445           SequenceMBBI->getOperand(3).getImm() != CC ||
4446           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
4447           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
4448         break;
4449       LastSelectPseudo = &*SequenceMBBI;
4450       SequenceMBBI->collectDebugValues(SelectDebugValues);
4451       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
4452     } else {
4453       if (SequenceMBBI->hasUnmodeledSideEffects() ||
4454           SequenceMBBI->mayLoadOrStore())
4455         break;
4456       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
4457             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
4458           }))
4459         break;
4460     }
4461   }
4462 
4463   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
4464   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4465   DebugLoc DL = MI.getDebugLoc();
4466   MachineFunction::iterator I = ++BB->getIterator();
4467 
4468   MachineBasicBlock *HeadMBB = BB;
4469   MachineFunction *F = BB->getParent();
4470   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
4471   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
4472 
4473   F->insert(I, IfFalseMBB);
4474   F->insert(I, TailMBB);
4475 
4476   // Transfer debug instructions associated with the selects to TailMBB.
4477   for (MachineInstr *DebugInstr : SelectDebugValues) {
4478     TailMBB->push_back(DebugInstr->removeFromParent());
4479   }
4480 
4481   // Move all instructions after the sequence to TailMBB.
4482   TailMBB->splice(TailMBB->end(), HeadMBB,
4483                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
4484   // Update machine-CFG edges by transferring all successors of the current
4485   // block to the new block which will contain the Phi nodes for the selects.
4486   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
4487   // Set the successors for HeadMBB.
4488   HeadMBB->addSuccessor(IfFalseMBB);
4489   HeadMBB->addSuccessor(TailMBB);
4490 
4491   // Insert appropriate branch.
4492   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
4493 
4494   BuildMI(HeadMBB, DL, TII.get(Opcode))
4495     .addReg(LHS)
4496     .addReg(RHS)
4497     .addMBB(TailMBB);
4498 
4499   // IfFalseMBB just falls through to TailMBB.
4500   IfFalseMBB->addSuccessor(TailMBB);
4501 
4502   // Create PHIs for all of the select pseudo-instructions.
4503   auto SelectMBBI = MI.getIterator();
4504   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
4505   auto InsertionPoint = TailMBB->begin();
4506   while (SelectMBBI != SelectEnd) {
4507     auto Next = std::next(SelectMBBI);
4508     if (isSelectPseudo(*SelectMBBI)) {
4509       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
4510       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
4511               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
4512           .addReg(SelectMBBI->getOperand(4).getReg())
4513           .addMBB(HeadMBB)
4514           .addReg(SelectMBBI->getOperand(5).getReg())
4515           .addMBB(IfFalseMBB);
4516       SelectMBBI->eraseFromParent();
4517     }
4518     SelectMBBI = Next;
4519   }
4520 
4521   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
4522   return TailMBB;
4523 }
4524 
4525 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
4526                                     int VLIndex, unsigned SEWIndex,
4527                                     RISCVVLMUL VLMul, bool ForceTailAgnostic) {
4528   MachineFunction &MF = *BB->getParent();
4529   DebugLoc DL = MI.getDebugLoc();
4530   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
4531 
4532   unsigned SEW = MI.getOperand(SEWIndex).getImm();
4533   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
4534   RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
4535 
4536   MachineRegisterInfo &MRI = MF.getRegInfo();
4537 
4538   auto BuildVSETVLI = [&]() {
4539     if (VLIndex >= 0) {
4540       Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
4541       Register VLReg = MI.getOperand(VLIndex).getReg();
4542 
4543       // VL might be a compile time constant, but isel would have to put it
4544       // in a register. See if VL comes from an ADDI X0, imm.
4545       if (VLReg.isVirtual()) {
4546         MachineInstr *Def = MRI.getVRegDef(VLReg);
4547         if (Def && Def->getOpcode() == RISCV::ADDI &&
4548             Def->getOperand(1).getReg() == RISCV::X0 &&
4549             Def->getOperand(2).isImm()) {
4550           uint64_t Imm = Def->getOperand(2).getImm();
4551           // VSETIVLI allows a 5-bit zero extended immediate.
4552           if (isUInt<5>(Imm))
4553             return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI))
4554                 .addReg(DestReg, RegState::Define | RegState::Dead)
4555                 .addImm(Imm);
4556         }
4557       }
4558 
4559       return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
4560           .addReg(DestReg, RegState::Define | RegState::Dead)
4561           .addReg(VLReg);
4562     }
4563 
4564     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
4565     return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
4566         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
4567         .addReg(RISCV::X0, RegState::Kill);
4568   };
4569 
4570   MachineInstrBuilder MIB = BuildVSETVLI();
4571 
4572   // Default to tail agnostic unless the destination is tied to a source. In
4573   // that case the user would have some control over the tail values. The tail
4574   // policy is also ignored on instructions that only update element 0 like
4575   // vmv.s.x or reductions so use agnostic there to match the common case.
4576   // FIXME: This is conservatively correct, but we might want to detect that
4577   // the input is undefined.
4578   bool TailAgnostic = true;
4579   unsigned UseOpIdx;
4580   if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
4581     TailAgnostic = false;
4582     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
4583     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
4584     MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
4585     if (UseMI && UseMI->isImplicitDef())
4586       TailAgnostic = true;
4587   }
4588 
4589   // For simplicity we reuse the vtype representation here.
4590   MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
4591                                      /*TailAgnostic*/ TailAgnostic,
4592                                      /*MaskAgnostic*/ false));
4593 
4594   // Remove (now) redundant operands from pseudo
4595   if (VLIndex >= 0) {
4596     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
4597     MI.getOperand(VLIndex).setIsKill(false);
4598   }
4599 
4600   return BB;
4601 }
4602 
4603 MachineBasicBlock *
4604 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
4605                                                  MachineBasicBlock *BB) const {
4606   uint64_t TSFlags = MI.getDesc().TSFlags;
4607 
4608   if (TSFlags & RISCVII::HasSEWOpMask) {
4609     unsigned NumOperands = MI.getNumExplicitOperands();
4610     int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
4611     unsigned SEWIndex = NumOperands - 1;
4612     bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask;
4613 
4614     RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
4615                                                RISCVII::VLMulShift);
4616     return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic);
4617   }
4618 
4619   switch (MI.getOpcode()) {
4620   default:
4621     llvm_unreachable("Unexpected instr type to insert");
4622   case RISCV::ReadCycleWide:
4623     assert(!Subtarget.is64Bit() &&
4624            "ReadCycleWrite is only to be used on riscv32");
4625     return emitReadCycleWidePseudo(MI, BB);
4626   case RISCV::Select_GPR_Using_CC_GPR:
4627   case RISCV::Select_FPR16_Using_CC_GPR:
4628   case RISCV::Select_FPR32_Using_CC_GPR:
4629   case RISCV::Select_FPR64_Using_CC_GPR:
4630     return emitSelectPseudo(MI, BB);
4631   case RISCV::BuildPairF64Pseudo:
4632     return emitBuildPairF64Pseudo(MI, BB);
4633   case RISCV::SplitF64Pseudo:
4634     return emitSplitF64Pseudo(MI, BB);
4635   }
4636 }
4637 
4638 // Calling Convention Implementation.
4639 // The expectations for frontend ABI lowering vary from target to target.
4640 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
4641 // details, but this is a longer term goal. For now, we simply try to keep the
4642 // role of the frontend as simple and well-defined as possible. The rules can
4643 // be summarised as:
4644 // * Never split up large scalar arguments. We handle them here.
4645 // * If a hardfloat calling convention is being used, and the struct may be
4646 // passed in a pair of registers (fp+fp, int+fp), and both registers are
4647 // available, then pass as two separate arguments. If either the GPRs or FPRs
4648 // are exhausted, then pass according to the rule below.
4649 // * If a struct could never be passed in registers or directly in a stack
4650 // slot (as it is larger than 2*XLEN and the floating point rules don't
4651 // apply), then pass it using a pointer with the byval attribute.
4652 // * If a struct is less than 2*XLEN, then coerce to either a two-element
4653 // word-sized array or a 2*XLEN scalar (depending on alignment).
4654 // * The frontend can determine whether a struct is returned by reference or
4655 // not based on its size and fields. If it will be returned by reference, the
4656 // frontend must modify the prototype so a pointer with the sret annotation is
4657 // passed as the first argument. This is not necessary for large scalar
4658 // returns.
4659 // * Struct return values and varargs should be coerced to structs containing
4660 // register-size fields in the same situations they would be for fixed
4661 // arguments.
4662 
4663 static const MCPhysReg ArgGPRs[] = {
4664   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
4665   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
4666 };
4667 static const MCPhysReg ArgFPR16s[] = {
4668   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
4669   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
4670 };
4671 static const MCPhysReg ArgFPR32s[] = {
4672   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
4673   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
4674 };
4675 static const MCPhysReg ArgFPR64s[] = {
4676   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
4677   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
4678 };
4679 // This is an interim calling convention and it may be changed in the future.
4680 static const MCPhysReg ArgVRs[] = {
4681     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
4682     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
4683     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
4684 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
4685                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
4686                                      RISCV::V20M2, RISCV::V22M2};
4687 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
4688                                      RISCV::V20M4};
4689 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
4690 
4691 // Pass a 2*XLEN argument that has been split into two XLEN values through
4692 // registers or the stack as necessary.
4693 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
4694                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
4695                                 MVT ValVT2, MVT LocVT2,
4696                                 ISD::ArgFlagsTy ArgFlags2) {
4697   unsigned XLenInBytes = XLen / 8;
4698   if (Register Reg = State.AllocateReg(ArgGPRs)) {
4699     // At least one half can be passed via register.
4700     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4701                                      VA1.getLocVT(), CCValAssign::Full));
4702   } else {
4703     // Both halves must be passed on the stack, with proper alignment.
4704     Align StackAlign =
4705         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4706     State.addLoc(
4707         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
4708                             State.AllocateStack(XLenInBytes, StackAlign),
4709                             VA1.getLocVT(), CCValAssign::Full));
4710     State.addLoc(CCValAssign::getMem(
4711         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
4712         LocVT2, CCValAssign::Full));
4713     return false;
4714   }
4715 
4716   if (Register Reg = State.AllocateReg(ArgGPRs)) {
4717     // The second half can also be passed via register.
4718     State.addLoc(
4719         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4720   } else {
4721     // The second half is passed via the stack, without additional alignment.
4722     State.addLoc(CCValAssign::getMem(
4723         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
4724         LocVT2, CCValAssign::Full));
4725   }
4726 
4727   return false;
4728 }
4729 
4730 // Implements the RISC-V calling convention. Returns true upon failure.
4731 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
4732                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
4733                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
4734                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
4735                      Optional<unsigned> FirstMaskArgument) {
4736   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
4737   assert(XLen == 32 || XLen == 64);
4738   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
4739 
4740   // Any return value split in to more than two values can't be returned
4741   // directly.
4742   if (IsRet && ValNo > 1)
4743     return true;
4744 
4745   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
4746   // variadic argument, or if no F16/F32 argument registers are available.
4747   bool UseGPRForF16_F32 = true;
4748   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
4749   // variadic argument, or if no F64 argument registers are available.
4750   bool UseGPRForF64 = true;
4751 
4752   switch (ABI) {
4753   default:
4754     llvm_unreachable("Unexpected ABI");
4755   case RISCVABI::ABI_ILP32:
4756   case RISCVABI::ABI_LP64:
4757     break;
4758   case RISCVABI::ABI_ILP32F:
4759   case RISCVABI::ABI_LP64F:
4760     UseGPRForF16_F32 = !IsFixed;
4761     break;
4762   case RISCVABI::ABI_ILP32D:
4763   case RISCVABI::ABI_LP64D:
4764     UseGPRForF16_F32 = !IsFixed;
4765     UseGPRForF64 = !IsFixed;
4766     break;
4767   }
4768 
4769   // FPR16, FPR32, and FPR64 alias each other.
4770   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
4771     UseGPRForF16_F32 = true;
4772     UseGPRForF64 = true;
4773   }
4774 
4775   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
4776   // similar local variables rather than directly checking against the target
4777   // ABI.
4778 
4779   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
4780     LocVT = XLenVT;
4781     LocInfo = CCValAssign::BCvt;
4782   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
4783     LocVT = MVT::i64;
4784     LocInfo = CCValAssign::BCvt;
4785   }
4786 
4787   // If this is a variadic argument, the RISC-V calling convention requires
4788   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
4789   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
4790   // be used regardless of whether the original argument was split during
4791   // legalisation or not. The argument will not be passed by registers if the
4792   // original type is larger than 2*XLEN, so the register alignment rule does
4793   // not apply.
4794   unsigned TwoXLenInBytes = (2 * XLen) / 8;
4795   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
4796       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
4797     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4798     // Skip 'odd' register if necessary.
4799     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
4800       State.AllocateReg(ArgGPRs);
4801   }
4802 
4803   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4804   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4805       State.getPendingArgFlags();
4806 
4807   assert(PendingLocs.size() == PendingArgFlags.size() &&
4808          "PendingLocs and PendingArgFlags out of sync");
4809 
4810   // Handle passing f64 on RV32D with a soft float ABI or when floating point
4811   // registers are exhausted.
4812   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
4813     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
4814            "Can't lower f64 if it is split");
4815     // Depending on available argument GPRS, f64 may be passed in a pair of
4816     // GPRs, split between a GPR and the stack, or passed completely on the
4817     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
4818     // cases.
4819     Register Reg = State.AllocateReg(ArgGPRs);
4820     LocVT = MVT::i32;
4821     if (!Reg) {
4822       unsigned StackOffset = State.AllocateStack(8, Align(8));
4823       State.addLoc(
4824           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
4825       return false;
4826     }
4827     if (!State.AllocateReg(ArgGPRs))
4828       State.AllocateStack(4, Align(4));
4829     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4830     return false;
4831   }
4832 
4833   // Split arguments might be passed indirectly, so keep track of the pending
4834   // values.
4835   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
4836     LocVT = XLenVT;
4837     LocInfo = CCValAssign::Indirect;
4838     PendingLocs.push_back(
4839         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4840     PendingArgFlags.push_back(ArgFlags);
4841     if (!ArgFlags.isSplitEnd()) {
4842       return false;
4843     }
4844   }
4845 
4846   // If the split argument only had two elements, it should be passed directly
4847   // in registers or on the stack.
4848   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
4849     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4850     // Apply the normal calling convention rules to the first half of the
4851     // split argument.
4852     CCValAssign VA = PendingLocs[0];
4853     ISD::ArgFlagsTy AF = PendingArgFlags[0];
4854     PendingLocs.clear();
4855     PendingArgFlags.clear();
4856     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
4857                                ArgFlags);
4858   }
4859 
4860   // Allocate to a register if possible, or else a stack slot.
4861   Register Reg;
4862   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
4863     Reg = State.AllocateReg(ArgFPR16s);
4864   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
4865     Reg = State.AllocateReg(ArgFPR32s);
4866   else if (ValVT == MVT::f64 && !UseGPRForF64)
4867     Reg = State.AllocateReg(ArgFPR64s);
4868   else if (ValVT.isScalableVector()) {
4869     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
4870     if (RC == &RISCV::VRRegClass) {
4871       // Assign the first mask argument to V0.
4872       // This is an interim calling convention and it may be changed in the
4873       // future.
4874       if (FirstMaskArgument.hasValue() &&
4875           ValNo == FirstMaskArgument.getValue()) {
4876         Reg = State.AllocateReg(RISCV::V0);
4877       } else {
4878         Reg = State.AllocateReg(ArgVRs);
4879       }
4880     } else if (RC == &RISCV::VRM2RegClass) {
4881       Reg = State.AllocateReg(ArgVRM2s);
4882     } else if (RC == &RISCV::VRM4RegClass) {
4883       Reg = State.AllocateReg(ArgVRM4s);
4884     } else if (RC == &RISCV::VRM8RegClass) {
4885       Reg = State.AllocateReg(ArgVRM8s);
4886     } else {
4887       llvm_unreachable("Unhandled class register for ValueType");
4888     }
4889     if (!Reg) {
4890       LocInfo = CCValAssign::Indirect;
4891       // Try using a GPR to pass the address
4892       Reg = State.AllocateReg(ArgGPRs);
4893       LocVT = XLenVT;
4894     }
4895   } else
4896     Reg = State.AllocateReg(ArgGPRs);
4897   unsigned StackOffset =
4898       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
4899 
4900   // If we reach this point and PendingLocs is non-empty, we must be at the
4901   // end of a split argument that must be passed indirectly.
4902   if (!PendingLocs.empty()) {
4903     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4904     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4905 
4906     for (auto &It : PendingLocs) {
4907       if (Reg)
4908         It.convertToReg(Reg);
4909       else
4910         It.convertToMem(StackOffset);
4911       State.addLoc(It);
4912     }
4913     PendingLocs.clear();
4914     PendingArgFlags.clear();
4915     return false;
4916   }
4917 
4918   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
4919           (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
4920          "Expected an XLenVT or scalable vector types at this stage");
4921 
4922   if (Reg) {
4923     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4924     return false;
4925   }
4926 
4927   // When a floating-point value is passed on the stack, no bit-conversion is
4928   // needed.
4929   if (ValVT.isFloatingPoint()) {
4930     LocVT = ValVT;
4931     LocInfo = CCValAssign::Full;
4932   }
4933   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
4934   return false;
4935 }
4936 
4937 template <typename ArgTy>
4938 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
4939   for (const auto &ArgIdx : enumerate(Args)) {
4940     MVT ArgVT = ArgIdx.value().VT;
4941     if (ArgVT.isScalableVector() &&
4942         ArgVT.getVectorElementType().SimpleTy == MVT::i1)
4943       return ArgIdx.index();
4944   }
4945   return None;
4946 }
4947 
4948 void RISCVTargetLowering::analyzeInputArgs(
4949     MachineFunction &MF, CCState &CCInfo,
4950     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
4951   unsigned NumArgs = Ins.size();
4952   FunctionType *FType = MF.getFunction().getFunctionType();
4953 
4954   Optional<unsigned> FirstMaskArgument;
4955   if (Subtarget.hasStdExtV())
4956     FirstMaskArgument = preAssignMask(Ins);
4957 
4958   for (unsigned i = 0; i != NumArgs; ++i) {
4959     MVT ArgVT = Ins[i].VT;
4960     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
4961 
4962     Type *ArgTy = nullptr;
4963     if (IsRet)
4964       ArgTy = FType->getReturnType();
4965     else if (Ins[i].isOrigArg())
4966       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
4967 
4968     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
4969     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
4970                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
4971                  FirstMaskArgument)) {
4972       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
4973                         << EVT(ArgVT).getEVTString() << '\n');
4974       llvm_unreachable(nullptr);
4975     }
4976   }
4977 }
4978 
4979 void RISCVTargetLowering::analyzeOutputArgs(
4980     MachineFunction &MF, CCState &CCInfo,
4981     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
4982     CallLoweringInfo *CLI) const {
4983   unsigned NumArgs = Outs.size();
4984 
4985   Optional<unsigned> FirstMaskArgument;
4986   if (Subtarget.hasStdExtV())
4987     FirstMaskArgument = preAssignMask(Outs);
4988 
4989   for (unsigned i = 0; i != NumArgs; i++) {
4990     MVT ArgVT = Outs[i].VT;
4991     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4992     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
4993 
4994     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
4995     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
4996                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
4997                  FirstMaskArgument)) {
4998       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
4999                         << EVT(ArgVT).getEVTString() << "\n");
5000       llvm_unreachable(nullptr);
5001     }
5002   }
5003 }
5004 
5005 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5006 // values.
5007 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
5008                                    const CCValAssign &VA, const SDLoc &DL) {
5009   switch (VA.getLocInfo()) {
5010   default:
5011     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5012   case CCValAssign::Full:
5013     break;
5014   case CCValAssign::BCvt:
5015     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
5016       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
5017     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5018       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
5019     else
5020       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5021     break;
5022   }
5023   return Val;
5024 }
5025 
5026 // The caller is responsible for loading the full value if the argument is
5027 // passed with CCValAssign::Indirect.
5028 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
5029                                 const CCValAssign &VA, const SDLoc &DL,
5030                                 const RISCVTargetLowering &TLI) {
5031   MachineFunction &MF = DAG.getMachineFunction();
5032   MachineRegisterInfo &RegInfo = MF.getRegInfo();
5033   EVT LocVT = VA.getLocVT();
5034   SDValue Val;
5035   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
5036   Register VReg = RegInfo.createVirtualRegister(RC);
5037   RegInfo.addLiveIn(VA.getLocReg(), VReg);
5038   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
5039 
5040   if (VA.getLocInfo() == CCValAssign::Indirect)
5041     return Val;
5042 
5043   return convertLocVTToValVT(DAG, Val, VA, DL);
5044 }
5045 
5046 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
5047                                    const CCValAssign &VA, const SDLoc &DL) {
5048   EVT LocVT = VA.getLocVT();
5049 
5050   switch (VA.getLocInfo()) {
5051   default:
5052     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5053   case CCValAssign::Full:
5054     break;
5055   case CCValAssign::BCvt:
5056     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
5057       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
5058     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5059       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
5060     else
5061       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5062     break;
5063   }
5064   return Val;
5065 }
5066 
5067 // The caller is responsible for loading the full value if the argument is
5068 // passed with CCValAssign::Indirect.
5069 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
5070                                 const CCValAssign &VA, const SDLoc &DL) {
5071   MachineFunction &MF = DAG.getMachineFunction();
5072   MachineFrameInfo &MFI = MF.getFrameInfo();
5073   EVT LocVT = VA.getLocVT();
5074   EVT ValVT = VA.getValVT();
5075   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
5076   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
5077                                  VA.getLocMemOffset(), /*Immutable=*/true);
5078   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
5079   SDValue Val;
5080 
5081   ISD::LoadExtType ExtType;
5082   switch (VA.getLocInfo()) {
5083   default:
5084     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5085   case CCValAssign::Full:
5086   case CCValAssign::Indirect:
5087   case CCValAssign::BCvt:
5088     ExtType = ISD::NON_EXTLOAD;
5089     break;
5090   }
5091   Val = DAG.getExtLoad(
5092       ExtType, DL, LocVT, Chain, FIN,
5093       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
5094   return Val;
5095 }
5096 
5097 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
5098                                        const CCValAssign &VA, const SDLoc &DL) {
5099   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
5100          "Unexpected VA");
5101   MachineFunction &MF = DAG.getMachineFunction();
5102   MachineFrameInfo &MFI = MF.getFrameInfo();
5103   MachineRegisterInfo &RegInfo = MF.getRegInfo();
5104 
5105   if (VA.isMemLoc()) {
5106     // f64 is passed on the stack.
5107     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
5108     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
5109     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
5110                        MachinePointerInfo::getFixedStack(MF, FI));
5111   }
5112 
5113   assert(VA.isRegLoc() && "Expected register VA assignment");
5114 
5115   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
5116   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
5117   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
5118   SDValue Hi;
5119   if (VA.getLocReg() == RISCV::X17) {
5120     // Second half of f64 is passed on the stack.
5121     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
5122     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
5123     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
5124                      MachinePointerInfo::getFixedStack(MF, FI));
5125   } else {
5126     // Second half of f64 is passed in another GPR.
5127     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
5128     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
5129     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
5130   }
5131   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
5132 }
5133 
5134 // FastCC has less than 1% performance improvement for some particular
5135 // benchmark. But theoretically, it may has benenfit for some cases.
5136 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
5137                             CCValAssign::LocInfo LocInfo,
5138                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
5139 
5140   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5141     // X5 and X6 might be used for save-restore libcall.
5142     static const MCPhysReg GPRList[] = {
5143         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
5144         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
5145         RISCV::X29, RISCV::X30, RISCV::X31};
5146     if (unsigned Reg = State.AllocateReg(GPRList)) {
5147       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5148       return false;
5149     }
5150   }
5151 
5152   if (LocVT == MVT::f16) {
5153     static const MCPhysReg FPR16List[] = {
5154         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
5155         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
5156         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
5157         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
5158     if (unsigned Reg = State.AllocateReg(FPR16List)) {
5159       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5160       return false;
5161     }
5162   }
5163 
5164   if (LocVT == MVT::f32) {
5165     static const MCPhysReg FPR32List[] = {
5166         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
5167         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
5168         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
5169         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
5170     if (unsigned Reg = State.AllocateReg(FPR32List)) {
5171       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5172       return false;
5173     }
5174   }
5175 
5176   if (LocVT == MVT::f64) {
5177     static const MCPhysReg FPR64List[] = {
5178         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
5179         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
5180         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
5181         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
5182     if (unsigned Reg = State.AllocateReg(FPR64List)) {
5183       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5184       return false;
5185     }
5186   }
5187 
5188   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
5189     unsigned Offset4 = State.AllocateStack(4, Align(4));
5190     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
5191     return false;
5192   }
5193 
5194   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
5195     unsigned Offset5 = State.AllocateStack(8, Align(8));
5196     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
5197     return false;
5198   }
5199 
5200   return true; // CC didn't match.
5201 }
5202 
5203 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5204                          CCValAssign::LocInfo LocInfo,
5205                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
5206 
5207   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5208     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
5209     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
5210     static const MCPhysReg GPRList[] = {
5211         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
5212         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
5213     if (unsigned Reg = State.AllocateReg(GPRList)) {
5214       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5215       return false;
5216     }
5217   }
5218 
5219   if (LocVT == MVT::f32) {
5220     // Pass in STG registers: F1, ..., F6
5221     //                        fs0 ... fs5
5222     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
5223                                           RISCV::F18_F, RISCV::F19_F,
5224                                           RISCV::F20_F, RISCV::F21_F};
5225     if (unsigned Reg = State.AllocateReg(FPR32List)) {
5226       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5227       return false;
5228     }
5229   }
5230 
5231   if (LocVT == MVT::f64) {
5232     // Pass in STG registers: D1, ..., D6
5233     //                        fs6 ... fs11
5234     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
5235                                           RISCV::F24_D, RISCV::F25_D,
5236                                           RISCV::F26_D, RISCV::F27_D};
5237     if (unsigned Reg = State.AllocateReg(FPR64List)) {
5238       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5239       return false;
5240     }
5241   }
5242 
5243   report_fatal_error("No registers left in GHC calling convention");
5244   return true;
5245 }
5246 
5247 // Transform physical registers into virtual registers.
5248 SDValue RISCVTargetLowering::LowerFormalArguments(
5249     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5250     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5251     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5252 
5253   MachineFunction &MF = DAG.getMachineFunction();
5254 
5255   switch (CallConv) {
5256   default:
5257     report_fatal_error("Unsupported calling convention");
5258   case CallingConv::C:
5259   case CallingConv::Fast:
5260     break;
5261   case CallingConv::GHC:
5262     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
5263         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
5264       report_fatal_error(
5265         "GHC calling convention requires the F and D instruction set extensions");
5266   }
5267 
5268   const Function &Func = MF.getFunction();
5269   if (Func.hasFnAttribute("interrupt")) {
5270     if (!Func.arg_empty())
5271       report_fatal_error(
5272         "Functions with the interrupt attribute cannot have arguments!");
5273 
5274     StringRef Kind =
5275       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
5276 
5277     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
5278       report_fatal_error(
5279         "Function interrupt attribute argument not supported!");
5280   }
5281 
5282   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5283   MVT XLenVT = Subtarget.getXLenVT();
5284   unsigned XLenInBytes = Subtarget.getXLen() / 8;
5285   // Used with vargs to acumulate store chains.
5286   std::vector<SDValue> OutChains;
5287 
5288   // Assign locations to all of the incoming arguments.
5289   SmallVector<CCValAssign, 16> ArgLocs;
5290   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5291 
5292   if (CallConv == CallingConv::Fast)
5293     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
5294   else if (CallConv == CallingConv::GHC)
5295     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
5296   else
5297     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
5298 
5299   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5300     CCValAssign &VA = ArgLocs[i];
5301     SDValue ArgValue;
5302     // Passing f64 on RV32D with a soft float ABI must be handled as a special
5303     // case.
5304     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
5305       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
5306     else if (VA.isRegLoc())
5307       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
5308     else
5309       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5310 
5311     if (VA.getLocInfo() == CCValAssign::Indirect) {
5312       // If the original argument was split and passed by reference (e.g. i128
5313       // on RV32), we need to load all parts of it here (using the same
5314       // address).
5315       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5316                                    MachinePointerInfo()));
5317       unsigned ArgIndex = Ins[i].OrigArgIndex;
5318       assert(Ins[i].PartOffset == 0);
5319       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5320         CCValAssign &PartVA = ArgLocs[i + 1];
5321         unsigned PartOffset = Ins[i + 1].PartOffset;
5322         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
5323                                       DAG.getIntPtrConstant(PartOffset, DL));
5324         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5325                                      MachinePointerInfo()));
5326         ++i;
5327       }
5328       continue;
5329     }
5330     InVals.push_back(ArgValue);
5331   }
5332 
5333   if (IsVarArg) {
5334     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
5335     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5336     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
5337     MachineFrameInfo &MFI = MF.getFrameInfo();
5338     MachineRegisterInfo &RegInfo = MF.getRegInfo();
5339     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
5340 
5341     // Offset of the first variable argument from stack pointer, and size of
5342     // the vararg save area. For now, the varargs save area is either zero or
5343     // large enough to hold a0-a7.
5344     int VaArgOffset, VarArgsSaveSize;
5345 
5346     // If all registers are allocated, then all varargs must be passed on the
5347     // stack and we don't need to save any argregs.
5348     if (ArgRegs.size() == Idx) {
5349       VaArgOffset = CCInfo.getNextStackOffset();
5350       VarArgsSaveSize = 0;
5351     } else {
5352       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
5353       VaArgOffset = -VarArgsSaveSize;
5354     }
5355 
5356     // Record the frame index of the first variable argument
5357     // which is a value necessary to VASTART.
5358     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
5359     RVFI->setVarArgsFrameIndex(FI);
5360 
5361     // If saving an odd number of registers then create an extra stack slot to
5362     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
5363     // offsets to even-numbered registered remain 2*XLEN-aligned.
5364     if (Idx % 2) {
5365       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
5366       VarArgsSaveSize += XLenInBytes;
5367     }
5368 
5369     // Copy the integer registers that may have been used for passing varargs
5370     // to the vararg save area.
5371     for (unsigned I = Idx; I < ArgRegs.size();
5372          ++I, VaArgOffset += XLenInBytes) {
5373       const Register Reg = RegInfo.createVirtualRegister(RC);
5374       RegInfo.addLiveIn(ArgRegs[I], Reg);
5375       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
5376       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
5377       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5378       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5379                                    MachinePointerInfo::getFixedStack(MF, FI));
5380       cast<StoreSDNode>(Store.getNode())
5381           ->getMemOperand()
5382           ->setValue((Value *)nullptr);
5383       OutChains.push_back(Store);
5384     }
5385     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
5386   }
5387 
5388   // All stores are grouped in one node to allow the matching between
5389   // the size of Ins and InVals. This only happens for vararg functions.
5390   if (!OutChains.empty()) {
5391     OutChains.push_back(Chain);
5392     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5393   }
5394 
5395   return Chain;
5396 }
5397 
5398 /// isEligibleForTailCallOptimization - Check whether the call is eligible
5399 /// for tail call optimization.
5400 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
5401 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
5402     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5403     const SmallVector<CCValAssign, 16> &ArgLocs) const {
5404 
5405   auto &Callee = CLI.Callee;
5406   auto CalleeCC = CLI.CallConv;
5407   auto &Outs = CLI.Outs;
5408   auto &Caller = MF.getFunction();
5409   auto CallerCC = Caller.getCallingConv();
5410 
5411   // Exception-handling functions need a special set of instructions to
5412   // indicate a return to the hardware. Tail-calling another function would
5413   // probably break this.
5414   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
5415   // should be expanded as new function attributes are introduced.
5416   if (Caller.hasFnAttribute("interrupt"))
5417     return false;
5418 
5419   // Do not tail call opt if the stack is used to pass parameters.
5420   if (CCInfo.getNextStackOffset() != 0)
5421     return false;
5422 
5423   // Do not tail call opt if any parameters need to be passed indirectly.
5424   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
5425   // passed indirectly. So the address of the value will be passed in a
5426   // register, or if not available, then the address is put on the stack. In
5427   // order to pass indirectly, space on the stack often needs to be allocated
5428   // in order to store the value. In this case the CCInfo.getNextStackOffset()
5429   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
5430   // are passed CCValAssign::Indirect.
5431   for (auto &VA : ArgLocs)
5432     if (VA.getLocInfo() == CCValAssign::Indirect)
5433       return false;
5434 
5435   // Do not tail call opt if either caller or callee uses struct return
5436   // semantics.
5437   auto IsCallerStructRet = Caller.hasStructRetAttr();
5438   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5439   if (IsCallerStructRet || IsCalleeStructRet)
5440     return false;
5441 
5442   // Externally-defined functions with weak linkage should not be
5443   // tail-called. The behaviour of branch instructions in this situation (as
5444   // used for tail calls) is implementation-defined, so we cannot rely on the
5445   // linker replacing the tail call with a return.
5446   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5447     const GlobalValue *GV = G->getGlobal();
5448     if (GV->hasExternalWeakLinkage())
5449       return false;
5450   }
5451 
5452   // The callee has to preserve all registers the caller needs to preserve.
5453   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
5454   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5455   if (CalleeCC != CallerCC) {
5456     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5457     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5458       return false;
5459   }
5460 
5461   // Byval parameters hand the function a pointer directly into the stack area
5462   // we want to reuse during a tail call. Working around this *is* possible
5463   // but less efficient and uglier in LowerCall.
5464   for (auto &Arg : Outs)
5465     if (Arg.Flags.isByVal())
5466       return false;
5467 
5468   return true;
5469 }
5470 
5471 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5472 // and output parameter nodes.
5473 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
5474                                        SmallVectorImpl<SDValue> &InVals) const {
5475   SelectionDAG &DAG = CLI.DAG;
5476   SDLoc &DL = CLI.DL;
5477   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5478   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5479   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5480   SDValue Chain = CLI.Chain;
5481   SDValue Callee = CLI.Callee;
5482   bool &IsTailCall = CLI.IsTailCall;
5483   CallingConv::ID CallConv = CLI.CallConv;
5484   bool IsVarArg = CLI.IsVarArg;
5485   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5486   MVT XLenVT = Subtarget.getXLenVT();
5487 
5488   MachineFunction &MF = DAG.getMachineFunction();
5489 
5490   // Analyze the operands of the call, assigning locations to each operand.
5491   SmallVector<CCValAssign, 16> ArgLocs;
5492   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5493 
5494   if (CallConv == CallingConv::Fast)
5495     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
5496   else if (CallConv == CallingConv::GHC)
5497     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
5498   else
5499     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
5500 
5501   // Check if it's really possible to do a tail call.
5502   if (IsTailCall)
5503     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5504 
5505   if (IsTailCall)
5506     ++NumTailCalls;
5507   else if (CLI.CB && CLI.CB->isMustTailCall())
5508     report_fatal_error("failed to perform tail call elimination on a call "
5509                        "site marked musttail");
5510 
5511   // Get a count of how many bytes are to be pushed on the stack.
5512   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
5513 
5514   // Create local copies for byval args
5515   SmallVector<SDValue, 8> ByValArgs;
5516   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5517     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5518     if (!Flags.isByVal())
5519       continue;
5520 
5521     SDValue Arg = OutVals[i];
5522     unsigned Size = Flags.getByValSize();
5523     Align Alignment = Flags.getNonZeroByValAlign();
5524 
5525     int FI =
5526         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5527     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5528     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
5529 
5530     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5531                           /*IsVolatile=*/false,
5532                           /*AlwaysInline=*/false, IsTailCall,
5533                           MachinePointerInfo(), MachinePointerInfo());
5534     ByValArgs.push_back(FIPtr);
5535   }
5536 
5537   if (!IsTailCall)
5538     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5539 
5540   // Copy argument values to their designated locations.
5541   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
5542   SmallVector<SDValue, 8> MemOpChains;
5543   SDValue StackPtr;
5544   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5545     CCValAssign &VA = ArgLocs[i];
5546     SDValue ArgValue = OutVals[i];
5547     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5548 
5549     // Handle passing f64 on RV32D with a soft float ABI as a special case.
5550     bool IsF64OnRV32DSoftABI =
5551         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
5552     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
5553       SDValue SplitF64 = DAG.getNode(
5554           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
5555       SDValue Lo = SplitF64.getValue(0);
5556       SDValue Hi = SplitF64.getValue(1);
5557 
5558       Register RegLo = VA.getLocReg();
5559       RegsToPass.push_back(std::make_pair(RegLo, Lo));
5560 
5561       if (RegLo == RISCV::X17) {
5562         // Second half of f64 is passed on the stack.
5563         // Work out the address of the stack slot.
5564         if (!StackPtr.getNode())
5565           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
5566         // Emit the store.
5567         MemOpChains.push_back(
5568             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
5569       } else {
5570         // Second half of f64 is passed in another GPR.
5571         assert(RegLo < RISCV::X31 && "Invalid register pair");
5572         Register RegHigh = RegLo + 1;
5573         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
5574       }
5575       continue;
5576     }
5577 
5578     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
5579     // as any other MemLoc.
5580 
5581     // Promote the value if needed.
5582     // For now, only handle fully promoted and indirect arguments.
5583     if (VA.getLocInfo() == CCValAssign::Indirect) {
5584       // Store the argument in a stack slot and pass its address.
5585       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
5586       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5587       MemOpChains.push_back(
5588           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5589                        MachinePointerInfo::getFixedStack(MF, FI)));
5590       // If the original argument was split (e.g. i128), we need
5591       // to store all parts of it here (and pass just one address).
5592       unsigned ArgIndex = Outs[i].OrigArgIndex;
5593       assert(Outs[i].PartOffset == 0);
5594       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5595         SDValue PartValue = OutVals[i + 1];
5596         unsigned PartOffset = Outs[i + 1].PartOffset;
5597         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
5598                                       DAG.getIntPtrConstant(PartOffset, DL));
5599         MemOpChains.push_back(
5600             DAG.getStore(Chain, DL, PartValue, Address,
5601                          MachinePointerInfo::getFixedStack(MF, FI)));
5602         ++i;
5603       }
5604       ArgValue = SpillSlot;
5605     } else {
5606       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5607     }
5608 
5609     // Use local copy if it is a byval arg.
5610     if (Flags.isByVal())
5611       ArgValue = ByValArgs[j++];
5612 
5613     if (VA.isRegLoc()) {
5614       // Queue up the argument copies and emit them at the end.
5615       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5616     } else {
5617       assert(VA.isMemLoc() && "Argument not register or memory");
5618       assert(!IsTailCall && "Tail call not allowed if stack is used "
5619                             "for passing parameters");
5620 
5621       // Work out the address of the stack slot.
5622       if (!StackPtr.getNode())
5623         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
5624       SDValue Address =
5625           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5626                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
5627 
5628       // Emit the store.
5629       MemOpChains.push_back(
5630           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5631     }
5632   }
5633 
5634   // Join the stores, which are independent of one another.
5635   if (!MemOpChains.empty())
5636     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5637 
5638   SDValue Glue;
5639 
5640   // Build a sequence of copy-to-reg nodes, chained and glued together.
5641   for (auto &Reg : RegsToPass) {
5642     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5643     Glue = Chain.getValue(1);
5644   }
5645 
5646   // Validate that none of the argument registers have been marked as
5647   // reserved, if so report an error. Do the same for the return address if this
5648   // is not a tailcall.
5649   validateCCReservedRegs(RegsToPass, MF);
5650   if (!IsTailCall &&
5651       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
5652     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
5653         MF.getFunction(),
5654         "Return address register required, but has been reserved."});
5655 
5656   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5657   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5658   // split it and then direct call can be matched by PseudoCALL.
5659   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5660     const GlobalValue *GV = S->getGlobal();
5661 
5662     unsigned OpFlags = RISCVII::MO_CALL;
5663     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
5664       OpFlags = RISCVII::MO_PLT;
5665 
5666     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
5667   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5668     unsigned OpFlags = RISCVII::MO_CALL;
5669 
5670     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
5671                                                  nullptr))
5672       OpFlags = RISCVII::MO_PLT;
5673 
5674     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5675   }
5676 
5677   // The first call operand is the chain and the second is the target address.
5678   SmallVector<SDValue, 8> Ops;
5679   Ops.push_back(Chain);
5680   Ops.push_back(Callee);
5681 
5682   // Add argument registers to the end of the list so that they are
5683   // known live into the call.
5684   for (auto &Reg : RegsToPass)
5685     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5686 
5687   if (!IsTailCall) {
5688     // Add a register mask operand representing the call-preserved registers.
5689     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5690     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5691     assert(Mask && "Missing call preserved mask for calling convention");
5692     Ops.push_back(DAG.getRegisterMask(Mask));
5693   }
5694 
5695   // Glue the call to the argument copies, if any.
5696   if (Glue.getNode())
5697     Ops.push_back(Glue);
5698 
5699   // Emit the call.
5700   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5701 
5702   if (IsTailCall) {
5703     MF.getFrameInfo().setHasTailCall();
5704     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
5705   }
5706 
5707   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
5708   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5709   Glue = Chain.getValue(1);
5710 
5711   // Mark the end of the call, which is glued to the call itself.
5712   Chain = DAG.getCALLSEQ_END(Chain,
5713                              DAG.getConstant(NumBytes, DL, PtrVT, true),
5714                              DAG.getConstant(0, DL, PtrVT, true),
5715                              Glue, DL);
5716   Glue = Chain.getValue(1);
5717 
5718   // Assign locations to each value returned by this call.
5719   SmallVector<CCValAssign, 16> RVLocs;
5720   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5721   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
5722 
5723   // Copy all of the result registers out of their specified physreg.
5724   for (auto &VA : RVLocs) {
5725     // Copy the value out
5726     SDValue RetValue =
5727         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5728     // Glue the RetValue to the end of the call sequence
5729     Chain = RetValue.getValue(1);
5730     Glue = RetValue.getValue(2);
5731 
5732     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
5733       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
5734       SDValue RetValue2 =
5735           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
5736       Chain = RetValue2.getValue(1);
5737       Glue = RetValue2.getValue(2);
5738       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
5739                              RetValue2);
5740     }
5741 
5742     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5743 
5744     InVals.push_back(RetValue);
5745   }
5746 
5747   return Chain;
5748 }
5749 
5750 bool RISCVTargetLowering::CanLowerReturn(
5751     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5752     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5753   SmallVector<CCValAssign, 16> RVLocs;
5754   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5755 
5756   Optional<unsigned> FirstMaskArgument;
5757   if (Subtarget.hasStdExtV())
5758     FirstMaskArgument = preAssignMask(Outs);
5759 
5760   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5761     MVT VT = Outs[i].VT;
5762     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5763     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
5764     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
5765                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
5766                  *this, FirstMaskArgument))
5767       return false;
5768   }
5769   return true;
5770 }
5771 
5772 SDValue
5773 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
5774                                  bool IsVarArg,
5775                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
5776                                  const SmallVectorImpl<SDValue> &OutVals,
5777                                  const SDLoc &DL, SelectionDAG &DAG) const {
5778   const MachineFunction &MF = DAG.getMachineFunction();
5779   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
5780 
5781   // Stores the assignment of the return value to a location.
5782   SmallVector<CCValAssign, 16> RVLocs;
5783 
5784   // Info about the registers and stack slot.
5785   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5786                  *DAG.getContext());
5787 
5788   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5789                     nullptr);
5790 
5791   if (CallConv == CallingConv::GHC && !RVLocs.empty())
5792     report_fatal_error("GHC functions return void only");
5793 
5794   SDValue Glue;
5795   SmallVector<SDValue, 4> RetOps(1, Chain);
5796 
5797   // Copy the result values into the output registers.
5798   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5799     SDValue Val = OutVals[i];
5800     CCValAssign &VA = RVLocs[i];
5801     assert(VA.isRegLoc() && "Can only return in registers!");
5802 
5803     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
5804       // Handle returning f64 on RV32D with a soft float ABI.
5805       assert(VA.isRegLoc() && "Expected return via registers");
5806       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
5807                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
5808       SDValue Lo = SplitF64.getValue(0);
5809       SDValue Hi = SplitF64.getValue(1);
5810       Register RegLo = VA.getLocReg();
5811       assert(RegLo < RISCV::X31 && "Invalid register pair");
5812       Register RegHi = RegLo + 1;
5813 
5814       if (STI.isRegisterReservedByUser(RegLo) ||
5815           STI.isRegisterReservedByUser(RegHi))
5816         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
5817             MF.getFunction(),
5818             "Return value register required, but has been reserved."});
5819 
5820       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
5821       Glue = Chain.getValue(1);
5822       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
5823       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
5824       Glue = Chain.getValue(1);
5825       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
5826     } else {
5827       // Handle a 'normal' return.
5828       Val = convertValVTToLocVT(DAG, Val, VA, DL);
5829       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5830 
5831       if (STI.isRegisterReservedByUser(VA.getLocReg()))
5832         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
5833             MF.getFunction(),
5834             "Return value register required, but has been reserved."});
5835 
5836       // Guarantee that all emitted copies are stuck together.
5837       Glue = Chain.getValue(1);
5838       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5839     }
5840   }
5841 
5842   RetOps[0] = Chain; // Update chain.
5843 
5844   // Add the glue node if we have it.
5845   if (Glue.getNode()) {
5846     RetOps.push_back(Glue);
5847   }
5848 
5849   // Interrupt service routines use different return instructions.
5850   const Function &Func = DAG.getMachineFunction().getFunction();
5851   if (Func.hasFnAttribute("interrupt")) {
5852     if (!Func.getReturnType()->isVoidTy())
5853       report_fatal_error(
5854           "Functions with the interrupt attribute must have void return type!");
5855 
5856     MachineFunction &MF = DAG.getMachineFunction();
5857     StringRef Kind =
5858       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
5859 
5860     unsigned RetOpc;
5861     if (Kind == "user")
5862       RetOpc = RISCVISD::URET_FLAG;
5863     else if (Kind == "supervisor")
5864       RetOpc = RISCVISD::SRET_FLAG;
5865     else
5866       RetOpc = RISCVISD::MRET_FLAG;
5867 
5868     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
5869   }
5870 
5871   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
5872 }
5873 
5874 void RISCVTargetLowering::validateCCReservedRegs(
5875     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
5876     MachineFunction &MF) const {
5877   const Function &F = MF.getFunction();
5878   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
5879 
5880   if (llvm::any_of(Regs, [&STI](auto Reg) {
5881         return STI.isRegisterReservedByUser(Reg.first);
5882       }))
5883     F.getContext().diagnose(DiagnosticInfoUnsupported{
5884         F, "Argument register required, but has been reserved."});
5885 }
5886 
5887 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
5888   return CI->isTailCall();
5889 }
5890 
5891 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
5892 #define NODE_NAME_CASE(NODE)                                                   \
5893   case RISCVISD::NODE:                                                         \
5894     return "RISCVISD::" #NODE;
5895   // clang-format off
5896   switch ((RISCVISD::NodeType)Opcode) {
5897   case RISCVISD::FIRST_NUMBER:
5898     break;
5899   NODE_NAME_CASE(RET_FLAG)
5900   NODE_NAME_CASE(URET_FLAG)
5901   NODE_NAME_CASE(SRET_FLAG)
5902   NODE_NAME_CASE(MRET_FLAG)
5903   NODE_NAME_CASE(CALL)
5904   NODE_NAME_CASE(SELECT_CC)
5905   NODE_NAME_CASE(BuildPairF64)
5906   NODE_NAME_CASE(SplitF64)
5907   NODE_NAME_CASE(TAIL)
5908   NODE_NAME_CASE(SLLW)
5909   NODE_NAME_CASE(SRAW)
5910   NODE_NAME_CASE(SRLW)
5911   NODE_NAME_CASE(DIVW)
5912   NODE_NAME_CASE(DIVUW)
5913   NODE_NAME_CASE(REMUW)
5914   NODE_NAME_CASE(ROLW)
5915   NODE_NAME_CASE(RORW)
5916   NODE_NAME_CASE(FSLW)
5917   NODE_NAME_CASE(FSRW)
5918   NODE_NAME_CASE(FSL)
5919   NODE_NAME_CASE(FSR)
5920   NODE_NAME_CASE(FMV_H_X)
5921   NODE_NAME_CASE(FMV_X_ANYEXTH)
5922   NODE_NAME_CASE(FMV_W_X_RV64)
5923   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
5924   NODE_NAME_CASE(READ_CYCLE_WIDE)
5925   NODE_NAME_CASE(GREVI)
5926   NODE_NAME_CASE(GREVIW)
5927   NODE_NAME_CASE(GORCI)
5928   NODE_NAME_CASE(GORCIW)
5929   NODE_NAME_CASE(SHFLI)
5930   NODE_NAME_CASE(VMV_V_X_VL)
5931   NODE_NAME_CASE(VFMV_V_F_VL)
5932   NODE_NAME_CASE(VMV_X_S)
5933   NODE_NAME_CASE(VMV_S_XF_VL)
5934   NODE_NAME_CASE(SPLAT_VECTOR_I64)
5935   NODE_NAME_CASE(READ_VLENB)
5936   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
5937   NODE_NAME_CASE(VLEFF)
5938   NODE_NAME_CASE(VLEFF_MASK)
5939   NODE_NAME_CASE(VSLIDEUP_VL)
5940   NODE_NAME_CASE(VSLIDEDOWN_VL)
5941   NODE_NAME_CASE(VID_VL)
5942   NODE_NAME_CASE(VFNCVT_ROD_VL)
5943   NODE_NAME_CASE(VECREDUCE_ADD_VL)
5944   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
5945   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
5946   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
5947   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
5948   NODE_NAME_CASE(VECREDUCE_AND_VL)
5949   NODE_NAME_CASE(VECREDUCE_OR_VL)
5950   NODE_NAME_CASE(VECREDUCE_XOR_VL)
5951   NODE_NAME_CASE(VECREDUCE_FADD_VL)
5952   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
5953   NODE_NAME_CASE(ADD_VL)
5954   NODE_NAME_CASE(AND_VL)
5955   NODE_NAME_CASE(MUL_VL)
5956   NODE_NAME_CASE(OR_VL)
5957   NODE_NAME_CASE(SDIV_VL)
5958   NODE_NAME_CASE(SHL_VL)
5959   NODE_NAME_CASE(SREM_VL)
5960   NODE_NAME_CASE(SRA_VL)
5961   NODE_NAME_CASE(SRL_VL)
5962   NODE_NAME_CASE(SUB_VL)
5963   NODE_NAME_CASE(UDIV_VL)
5964   NODE_NAME_CASE(UREM_VL)
5965   NODE_NAME_CASE(XOR_VL)
5966   NODE_NAME_CASE(FADD_VL)
5967   NODE_NAME_CASE(FSUB_VL)
5968   NODE_NAME_CASE(FMUL_VL)
5969   NODE_NAME_CASE(FDIV_VL)
5970   NODE_NAME_CASE(FNEG_VL)
5971   NODE_NAME_CASE(FABS_VL)
5972   NODE_NAME_CASE(FSQRT_VL)
5973   NODE_NAME_CASE(FMA_VL)
5974   NODE_NAME_CASE(SMIN_VL)
5975   NODE_NAME_CASE(SMAX_VL)
5976   NODE_NAME_CASE(UMIN_VL)
5977   NODE_NAME_CASE(UMAX_VL)
5978   NODE_NAME_CASE(MULHS_VL)
5979   NODE_NAME_CASE(MULHU_VL)
5980   NODE_NAME_CASE(FP_TO_SINT_VL)
5981   NODE_NAME_CASE(FP_TO_UINT_VL)
5982   NODE_NAME_CASE(SINT_TO_FP_VL)
5983   NODE_NAME_CASE(UINT_TO_FP_VL)
5984   NODE_NAME_CASE(FP_EXTEND_VL)
5985   NODE_NAME_CASE(FP_ROUND_VL)
5986   NODE_NAME_CASE(SETCC_VL)
5987   NODE_NAME_CASE(VSELECT_VL)
5988   NODE_NAME_CASE(VMAND_VL)
5989   NODE_NAME_CASE(VMOR_VL)
5990   NODE_NAME_CASE(VMXOR_VL)
5991   NODE_NAME_CASE(VMCLR_VL)
5992   NODE_NAME_CASE(VMSET_VL)
5993   NODE_NAME_CASE(VRGATHER_VX_VL)
5994   NODE_NAME_CASE(VRGATHER_VV_VL)
5995   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
5996   NODE_NAME_CASE(VSEXT_VL)
5997   NODE_NAME_CASE(VZEXT_VL)
5998   NODE_NAME_CASE(VLE_VL)
5999   NODE_NAME_CASE(VSE_VL)
6000   }
6001   // clang-format on
6002   return nullptr;
6003 #undef NODE_NAME_CASE
6004 }
6005 
6006 /// getConstraintType - Given a constraint letter, return the type of
6007 /// constraint it is for this target.
6008 RISCVTargetLowering::ConstraintType
6009 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
6010   if (Constraint.size() == 1) {
6011     switch (Constraint[0]) {
6012     default:
6013       break;
6014     case 'f':
6015       return C_RegisterClass;
6016     case 'I':
6017     case 'J':
6018     case 'K':
6019       return C_Immediate;
6020     case 'A':
6021       return C_Memory;
6022     }
6023   }
6024   return TargetLowering::getConstraintType(Constraint);
6025 }
6026 
6027 std::pair<unsigned, const TargetRegisterClass *>
6028 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
6029                                                   StringRef Constraint,
6030                                                   MVT VT) const {
6031   // First, see if this is a constraint that directly corresponds to a
6032   // RISCV register class.
6033   if (Constraint.size() == 1) {
6034     switch (Constraint[0]) {
6035     case 'r':
6036       return std::make_pair(0U, &RISCV::GPRRegClass);
6037     case 'f':
6038       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
6039         return std::make_pair(0U, &RISCV::FPR16RegClass);
6040       if (Subtarget.hasStdExtF() && VT == MVT::f32)
6041         return std::make_pair(0U, &RISCV::FPR32RegClass);
6042       if (Subtarget.hasStdExtD() && VT == MVT::f64)
6043         return std::make_pair(0U, &RISCV::FPR64RegClass);
6044       break;
6045     default:
6046       break;
6047     }
6048   }
6049 
6050   // Clang will correctly decode the usage of register name aliases into their
6051   // official names. However, other frontends like `rustc` do not. This allows
6052   // users of these frontends to use the ABI names for registers in LLVM-style
6053   // register constraints.
6054   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
6055                                .Case("{zero}", RISCV::X0)
6056                                .Case("{ra}", RISCV::X1)
6057                                .Case("{sp}", RISCV::X2)
6058                                .Case("{gp}", RISCV::X3)
6059                                .Case("{tp}", RISCV::X4)
6060                                .Case("{t0}", RISCV::X5)
6061                                .Case("{t1}", RISCV::X6)
6062                                .Case("{t2}", RISCV::X7)
6063                                .Cases("{s0}", "{fp}", RISCV::X8)
6064                                .Case("{s1}", RISCV::X9)
6065                                .Case("{a0}", RISCV::X10)
6066                                .Case("{a1}", RISCV::X11)
6067                                .Case("{a2}", RISCV::X12)
6068                                .Case("{a3}", RISCV::X13)
6069                                .Case("{a4}", RISCV::X14)
6070                                .Case("{a5}", RISCV::X15)
6071                                .Case("{a6}", RISCV::X16)
6072                                .Case("{a7}", RISCV::X17)
6073                                .Case("{s2}", RISCV::X18)
6074                                .Case("{s3}", RISCV::X19)
6075                                .Case("{s4}", RISCV::X20)
6076                                .Case("{s5}", RISCV::X21)
6077                                .Case("{s6}", RISCV::X22)
6078                                .Case("{s7}", RISCV::X23)
6079                                .Case("{s8}", RISCV::X24)
6080                                .Case("{s9}", RISCV::X25)
6081                                .Case("{s10}", RISCV::X26)
6082                                .Case("{s11}", RISCV::X27)
6083                                .Case("{t3}", RISCV::X28)
6084                                .Case("{t4}", RISCV::X29)
6085                                .Case("{t5}", RISCV::X30)
6086                                .Case("{t6}", RISCV::X31)
6087                                .Default(RISCV::NoRegister);
6088   if (XRegFromAlias != RISCV::NoRegister)
6089     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
6090 
6091   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
6092   // TableGen record rather than the AsmName to choose registers for InlineAsm
6093   // constraints, plus we want to match those names to the widest floating point
6094   // register type available, manually select floating point registers here.
6095   //
6096   // The second case is the ABI name of the register, so that frontends can also
6097   // use the ABI names in register constraint lists.
6098   if (Subtarget.hasStdExtF()) {
6099     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
6100                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
6101                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
6102                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
6103                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
6104                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
6105                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
6106                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
6107                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
6108                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
6109                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
6110                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
6111                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
6112                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
6113                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
6114                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
6115                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
6116                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
6117                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
6118                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
6119                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
6120                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
6121                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
6122                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
6123                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
6124                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
6125                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
6126                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
6127                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
6128                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
6129                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
6130                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
6131                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
6132                         .Default(RISCV::NoRegister);
6133     if (FReg != RISCV::NoRegister) {
6134       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
6135       if (Subtarget.hasStdExtD()) {
6136         unsigned RegNo = FReg - RISCV::F0_F;
6137         unsigned DReg = RISCV::F0_D + RegNo;
6138         return std::make_pair(DReg, &RISCV::FPR64RegClass);
6139       }
6140       return std::make_pair(FReg, &RISCV::FPR32RegClass);
6141     }
6142   }
6143 
6144   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
6145 }
6146 
6147 unsigned
6148 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
6149   // Currently only support length 1 constraints.
6150   if (ConstraintCode.size() == 1) {
6151     switch (ConstraintCode[0]) {
6152     case 'A':
6153       return InlineAsm::Constraint_A;
6154     default:
6155       break;
6156     }
6157   }
6158 
6159   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
6160 }
6161 
6162 void RISCVTargetLowering::LowerAsmOperandForConstraint(
6163     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
6164     SelectionDAG &DAG) const {
6165   // Currently only support length 1 constraints.
6166   if (Constraint.length() == 1) {
6167     switch (Constraint[0]) {
6168     case 'I':
6169       // Validate & create a 12-bit signed immediate operand.
6170       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6171         uint64_t CVal = C->getSExtValue();
6172         if (isInt<12>(CVal))
6173           Ops.push_back(
6174               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
6175       }
6176       return;
6177     case 'J':
6178       // Validate & create an integer zero operand.
6179       if (auto *C = dyn_cast<ConstantSDNode>(Op))
6180         if (C->getZExtValue() == 0)
6181           Ops.push_back(
6182               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
6183       return;
6184     case 'K':
6185       // Validate & create a 5-bit unsigned immediate operand.
6186       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6187         uint64_t CVal = C->getZExtValue();
6188         if (isUInt<5>(CVal))
6189           Ops.push_back(
6190               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
6191       }
6192       return;
6193     default:
6194       break;
6195     }
6196   }
6197   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6198 }
6199 
6200 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
6201                                                    Instruction *Inst,
6202                                                    AtomicOrdering Ord) const {
6203   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
6204     return Builder.CreateFence(Ord);
6205   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
6206     return Builder.CreateFence(AtomicOrdering::Release);
6207   return nullptr;
6208 }
6209 
6210 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
6211                                                     Instruction *Inst,
6212                                                     AtomicOrdering Ord) const {
6213   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
6214     return Builder.CreateFence(AtomicOrdering::Acquire);
6215   return nullptr;
6216 }
6217 
6218 TargetLowering::AtomicExpansionKind
6219 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
6220   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
6221   // point operations can't be used in an lr/sc sequence without breaking the
6222   // forward-progress guarantee.
6223   if (AI->isFloatingPointOperation())
6224     return AtomicExpansionKind::CmpXChg;
6225 
6226   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
6227   if (Size == 8 || Size == 16)
6228     return AtomicExpansionKind::MaskedIntrinsic;
6229   return AtomicExpansionKind::None;
6230 }
6231 
6232 static Intrinsic::ID
6233 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
6234   if (XLen == 32) {
6235     switch (BinOp) {
6236     default:
6237       llvm_unreachable("Unexpected AtomicRMW BinOp");
6238     case AtomicRMWInst::Xchg:
6239       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
6240     case AtomicRMWInst::Add:
6241       return Intrinsic::riscv_masked_atomicrmw_add_i32;
6242     case AtomicRMWInst::Sub:
6243       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
6244     case AtomicRMWInst::Nand:
6245       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
6246     case AtomicRMWInst::Max:
6247       return Intrinsic::riscv_masked_atomicrmw_max_i32;
6248     case AtomicRMWInst::Min:
6249       return Intrinsic::riscv_masked_atomicrmw_min_i32;
6250     case AtomicRMWInst::UMax:
6251       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
6252     case AtomicRMWInst::UMin:
6253       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
6254     }
6255   }
6256 
6257   if (XLen == 64) {
6258     switch (BinOp) {
6259     default:
6260       llvm_unreachable("Unexpected AtomicRMW BinOp");
6261     case AtomicRMWInst::Xchg:
6262       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
6263     case AtomicRMWInst::Add:
6264       return Intrinsic::riscv_masked_atomicrmw_add_i64;
6265     case AtomicRMWInst::Sub:
6266       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
6267     case AtomicRMWInst::Nand:
6268       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
6269     case AtomicRMWInst::Max:
6270       return Intrinsic::riscv_masked_atomicrmw_max_i64;
6271     case AtomicRMWInst::Min:
6272       return Intrinsic::riscv_masked_atomicrmw_min_i64;
6273     case AtomicRMWInst::UMax:
6274       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
6275     case AtomicRMWInst::UMin:
6276       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
6277     }
6278   }
6279 
6280   llvm_unreachable("Unexpected XLen\n");
6281 }
6282 
6283 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
6284     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
6285     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
6286   unsigned XLen = Subtarget.getXLen();
6287   Value *Ordering =
6288       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
6289   Type *Tys[] = {AlignedAddr->getType()};
6290   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
6291       AI->getModule(),
6292       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
6293 
6294   if (XLen == 64) {
6295     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
6296     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6297     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
6298   }
6299 
6300   Value *Result;
6301 
6302   // Must pass the shift amount needed to sign extend the loaded value prior
6303   // to performing a signed comparison for min/max. ShiftAmt is the number of
6304   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
6305   // is the number of bits to left+right shift the value in order to
6306   // sign-extend.
6307   if (AI->getOperation() == AtomicRMWInst::Min ||
6308       AI->getOperation() == AtomicRMWInst::Max) {
6309     const DataLayout &DL = AI->getModule()->getDataLayout();
6310     unsigned ValWidth =
6311         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
6312     Value *SextShamt =
6313         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
6314     Result = Builder.CreateCall(LrwOpScwLoop,
6315                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
6316   } else {
6317     Result =
6318         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
6319   }
6320 
6321   if (XLen == 64)
6322     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6323   return Result;
6324 }
6325 
6326 TargetLowering::AtomicExpansionKind
6327 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
6328     AtomicCmpXchgInst *CI) const {
6329   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
6330   if (Size == 8 || Size == 16)
6331     return AtomicExpansionKind::MaskedIntrinsic;
6332   return AtomicExpansionKind::None;
6333 }
6334 
6335 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
6336     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
6337     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
6338   unsigned XLen = Subtarget.getXLen();
6339   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
6340   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
6341   if (XLen == 64) {
6342     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
6343     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
6344     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6345     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
6346   }
6347   Type *Tys[] = {AlignedAddr->getType()};
6348   Function *MaskedCmpXchg =
6349       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
6350   Value *Result = Builder.CreateCall(
6351       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
6352   if (XLen == 64)
6353     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6354   return Result;
6355 }
6356 
6357 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
6358                                                      EVT VT) const {
6359   VT = VT.getScalarType();
6360 
6361   if (!VT.isSimple())
6362     return false;
6363 
6364   switch (VT.getSimpleVT().SimpleTy) {
6365   case MVT::f16:
6366     return Subtarget.hasStdExtZfh();
6367   case MVT::f32:
6368     return Subtarget.hasStdExtF();
6369   case MVT::f64:
6370     return Subtarget.hasStdExtD();
6371   default:
6372     break;
6373   }
6374 
6375   return false;
6376 }
6377 
6378 Register RISCVTargetLowering::getExceptionPointerRegister(
6379     const Constant *PersonalityFn) const {
6380   return RISCV::X10;
6381 }
6382 
6383 Register RISCVTargetLowering::getExceptionSelectorRegister(
6384     const Constant *PersonalityFn) const {
6385   return RISCV::X11;
6386 }
6387 
6388 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
6389   // Return false to suppress the unnecessary extensions if the LibCall
6390   // arguments or return value is f32 type for LP64 ABI.
6391   RISCVABI::ABI ABI = Subtarget.getTargetABI();
6392   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
6393     return false;
6394 
6395   return true;
6396 }
6397 
6398 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
6399   if (Subtarget.is64Bit() && Type == MVT::i32)
6400     return true;
6401 
6402   return IsSigned;
6403 }
6404 
6405 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
6406                                                  SDValue C) const {
6407   // Check integral scalar types.
6408   if (VT.isScalarInteger()) {
6409     // Omit the optimization if the sub target has the M extension and the data
6410     // size exceeds XLen.
6411     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
6412       return false;
6413     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6414       // Break the MUL to a SLLI and an ADD/SUB.
6415       const APInt &Imm = ConstNode->getAPIntValue();
6416       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6417           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6418         return true;
6419       // Omit the following optimization if the sub target has the M extension
6420       // and the data size >= XLen.
6421       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
6422         return false;
6423       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
6424       // a pair of LUI/ADDI.
6425       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
6426         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
6427         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
6428             (1 - ImmS).isPowerOf2())
6429         return true;
6430       }
6431     }
6432   }
6433 
6434   return false;
6435 }
6436 
6437 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
6438   if (!Subtarget.useRVVForFixedLengthVectors())
6439     return false;
6440 
6441   if (!VT.isFixedLengthVector())
6442     return false;
6443 
6444   // Don't use RVV for vectors we cannot scalarize if required.
6445   switch (VT.getVectorElementType().SimpleTy) {
6446   // i1 is supported but has different rules.
6447   default:
6448     return false;
6449   case MVT::i1:
6450     // Masks can only use a single register.
6451     if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits())
6452       return false;
6453     break;
6454   case MVT::i8:
6455   case MVT::i16:
6456   case MVT::i32:
6457   case MVT::i64:
6458     break;
6459   case MVT::f16:
6460     if (!Subtarget.hasStdExtZfh())
6461       return false;
6462     break;
6463   case MVT::f32:
6464     if (!Subtarget.hasStdExtF())
6465       return false;
6466     break;
6467   case MVT::f64:
6468     if (!Subtarget.hasStdExtD())
6469       return false;
6470     break;
6471   }
6472 
6473   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
6474   // Don't use RVV for types that don't fit.
6475   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
6476     return false;
6477 
6478   // TODO: Perhaps an artificial restriction, but worth having whilst getting
6479   // the base fixed length RVV support in place.
6480   if (!VT.isPow2VectorType())
6481     return false;
6482 
6483   return true;
6484 }
6485 
6486 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
6487     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
6488     bool *Fast) const {
6489   if (!VT.isScalableVector())
6490     return false;
6491 
6492   EVT ElemVT = VT.getVectorElementType();
6493   if (Alignment >= ElemVT.getStoreSize()) {
6494     if (Fast)
6495       *Fast = true;
6496     return true;
6497   }
6498 
6499   return false;
6500 }
6501 
6502 #define GET_REGISTER_MATCHER
6503 #include "RISCVGenAsmMatcher.inc"
6504 
6505 Register
6506 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
6507                                        const MachineFunction &MF) const {
6508   Register Reg = MatchRegisterAltName(RegName);
6509   if (Reg == RISCV::NoRegister)
6510     Reg = MatchRegisterName(RegName);
6511   if (Reg == RISCV::NoRegister)
6512     report_fatal_error(
6513         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6514   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6515   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
6516     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6517                              StringRef(RegName) + "\"."));
6518   return Reg;
6519 }
6520 
6521 namespace llvm {
6522 namespace RISCVVIntrinsicsTable {
6523 
6524 #define GET_RISCVVIntrinsicsTable_IMPL
6525 #include "RISCVGenSearchableTables.inc"
6526 
6527 } // namespace RISCVVIntrinsicsTable
6528 
6529 } // namespace llvm
6530