1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 185 186 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 187 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 188 189 setOperationAction(ISD::VASTART, MVT::Other, Custom); 190 setOperationAction(ISD::VAARG, MVT::Other, Expand); 191 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 192 setOperationAction(ISD::VAEND, MVT::Other, Expand); 193 194 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 195 if (!Subtarget.hasStdExtZbb()) { 196 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 198 } 199 200 if (Subtarget.is64Bit()) { 201 setOperationAction(ISD::ADD, MVT::i32, Custom); 202 setOperationAction(ISD::SUB, MVT::i32, Custom); 203 setOperationAction(ISD::SHL, MVT::i32, Custom); 204 setOperationAction(ISD::SRA, MVT::i32, Custom); 205 setOperationAction(ISD::SRL, MVT::i32, Custom); 206 } 207 208 if (!Subtarget.hasStdExtM()) { 209 setOperationAction(ISD::MUL, XLenVT, Expand); 210 setOperationAction(ISD::MULHS, XLenVT, Expand); 211 setOperationAction(ISD::MULHU, XLenVT, Expand); 212 setOperationAction(ISD::SDIV, XLenVT, Expand); 213 setOperationAction(ISD::UDIV, XLenVT, Expand); 214 setOperationAction(ISD::SREM, XLenVT, Expand); 215 setOperationAction(ISD::UREM, XLenVT, Expand); 216 } 217 218 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 219 setOperationAction(ISD::MUL, MVT::i32, Custom); 220 221 setOperationAction(ISD::SDIV, MVT::i8, Custom); 222 setOperationAction(ISD::UDIV, MVT::i8, Custom); 223 setOperationAction(ISD::UREM, MVT::i8, Custom); 224 setOperationAction(ISD::SDIV, MVT::i16, Custom); 225 setOperationAction(ISD::UDIV, MVT::i16, Custom); 226 setOperationAction(ISD::UREM, MVT::i16, Custom); 227 setOperationAction(ISD::SDIV, MVT::i32, Custom); 228 setOperationAction(ISD::UDIV, MVT::i32, Custom); 229 setOperationAction(ISD::UREM, MVT::i32, Custom); 230 } 231 232 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 233 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 234 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 235 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 236 237 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 238 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 239 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 240 241 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 242 if (Subtarget.is64Bit()) { 243 setOperationAction(ISD::ROTL, MVT::i32, Custom); 244 setOperationAction(ISD::ROTR, MVT::i32, Custom); 245 } 246 } else { 247 setOperationAction(ISD::ROTL, XLenVT, Expand); 248 setOperationAction(ISD::ROTR, XLenVT, Expand); 249 } 250 251 if (Subtarget.hasStdExtZbp()) { 252 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 253 // more combining. 254 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 255 setOperationAction(ISD::BSWAP, XLenVT, Custom); 256 257 if (Subtarget.is64Bit()) { 258 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 259 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 260 } 261 } else { 262 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 263 // pattern match it directly in isel. 264 setOperationAction(ISD::BSWAP, XLenVT, 265 Subtarget.hasStdExtZbb() ? Legal : Expand); 266 } 267 268 if (Subtarget.hasStdExtZbb()) { 269 setOperationAction(ISD::SMIN, XLenVT, Legal); 270 setOperationAction(ISD::SMAX, XLenVT, Legal); 271 setOperationAction(ISD::UMIN, XLenVT, Legal); 272 setOperationAction(ISD::UMAX, XLenVT, Legal); 273 } else { 274 setOperationAction(ISD::CTTZ, XLenVT, Expand); 275 setOperationAction(ISD::CTLZ, XLenVT, Expand); 276 setOperationAction(ISD::CTPOP, XLenVT, Expand); 277 } 278 279 if (Subtarget.hasStdExtZbt()) { 280 setOperationAction(ISD::FSHL, XLenVT, Custom); 281 setOperationAction(ISD::FSHR, XLenVT, Custom); 282 setOperationAction(ISD::SELECT, XLenVT, Legal); 283 284 if (Subtarget.is64Bit()) { 285 setOperationAction(ISD::FSHL, MVT::i32, Custom); 286 setOperationAction(ISD::FSHR, MVT::i32, Custom); 287 } 288 } else { 289 setOperationAction(ISD::SELECT, XLenVT, Custom); 290 } 291 292 ISD::CondCode FPCCToExpand[] = { 293 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 294 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 295 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 296 297 ISD::NodeType FPOpToExpand[] = { 298 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 299 ISD::FP_TO_FP16}; 300 301 if (Subtarget.hasStdExtZfh()) 302 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 303 304 if (Subtarget.hasStdExtZfh()) { 305 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 306 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 307 for (auto CC : FPCCToExpand) 308 setCondCodeAction(CC, MVT::f16, Expand); 309 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 310 setOperationAction(ISD::SELECT, MVT::f16, Custom); 311 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 312 for (auto Op : FPOpToExpand) 313 setOperationAction(Op, MVT::f16, Expand); 314 } 315 316 if (Subtarget.hasStdExtF()) { 317 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 318 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 319 for (auto CC : FPCCToExpand) 320 setCondCodeAction(CC, MVT::f32, Expand); 321 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 322 setOperationAction(ISD::SELECT, MVT::f32, Custom); 323 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 324 for (auto Op : FPOpToExpand) 325 setOperationAction(Op, MVT::f32, Expand); 326 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 327 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 328 } 329 330 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 331 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 332 333 if (Subtarget.hasStdExtD()) { 334 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 335 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 336 for (auto CC : FPCCToExpand) 337 setCondCodeAction(CC, MVT::f64, Expand); 338 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 339 setOperationAction(ISD::SELECT, MVT::f64, Custom); 340 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 341 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 342 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 343 for (auto Op : FPOpToExpand) 344 setOperationAction(Op, MVT::f64, Expand); 345 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 346 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 347 } 348 349 if (Subtarget.is64Bit()) { 350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 351 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 352 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 353 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 354 } 355 356 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 357 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 358 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 359 setOperationAction(ISD::JumpTable, XLenVT, Custom); 360 361 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 362 363 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 364 // Unfortunately this can't be determined just from the ISA naming string. 365 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 366 Subtarget.is64Bit() ? Legal : Custom); 367 368 setOperationAction(ISD::TRAP, MVT::Other, Legal); 369 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 370 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 371 372 if (Subtarget.hasStdExtA()) { 373 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 374 setMinCmpXchgSizeInBits(32); 375 } else { 376 setMaxAtomicSizeInBitsSupported(0); 377 } 378 379 setBooleanContents(ZeroOrOneBooleanContent); 380 381 if (Subtarget.hasStdExtV()) { 382 setBooleanVectorContents(ZeroOrOneBooleanContent); 383 384 setOperationAction(ISD::VSCALE, XLenVT, Custom); 385 386 // RVV intrinsics may have illegal operands. 387 // We also need to custom legalize vmv.x.s. 388 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 389 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 390 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 391 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 392 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 393 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 394 395 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 396 397 if (Subtarget.is64Bit()) { 398 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 399 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 400 } else { 401 // We must custom-lower certain vXi64 operations on RV32 due to the vector 402 // element type being illegal. 403 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); 404 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 405 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 406 407 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 408 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 409 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 410 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 411 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 412 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 413 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 414 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 415 } 416 417 for (MVT VT : BoolVecVTs) { 418 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 419 420 // Mask VTs are custom-expanded into a series of standard nodes 421 setOperationAction(ISD::TRUNCATE, VT, Custom); 422 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 423 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 424 } 425 426 for (MVT VT : IntVecVTs) { 427 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 428 429 setOperationAction(ISD::SMIN, VT, Legal); 430 setOperationAction(ISD::SMAX, VT, Legal); 431 setOperationAction(ISD::UMIN, VT, Legal); 432 setOperationAction(ISD::UMAX, VT, Legal); 433 434 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) 435 setOperationAction(ISD::ABS, VT, Custom); 436 437 setOperationAction(ISD::ROTL, VT, Expand); 438 setOperationAction(ISD::ROTR, VT, Expand); 439 440 // Custom-lower extensions and truncations from/to mask types. 441 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 442 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 443 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 444 445 // RVV has native int->float & float->int conversions where the 446 // element type sizes are within one power-of-two of each other. Any 447 // wider distances between type sizes have to be lowered as sequences 448 // which progressively narrow the gap in stages. 449 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 450 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 451 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 452 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 453 454 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 455 // nodes which truncate by one power of two at a time. 456 setOperationAction(ISD::TRUNCATE, VT, Custom); 457 458 // Custom-lower insert/extract operations to simplify patterns. 459 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 460 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 461 462 // Custom-lower reduction operations to set up the corresponding custom 463 // nodes' operands. 464 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 465 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 466 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 467 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 468 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 469 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 470 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 471 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 472 473 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 474 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 475 476 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 477 } 478 479 // Expand various CCs to best match the RVV ISA, which natively supports UNE 480 // but no other unordered comparisons, and supports all ordered comparisons 481 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 482 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 483 // and we pattern-match those back to the "original", swapping operands once 484 // more. This way we catch both operations and both "vf" and "fv" forms with 485 // fewer patterns. 486 ISD::CondCode VFPCCToExpand[] = { 487 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 488 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 489 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 490 }; 491 492 // Sets common operation actions on RVV floating-point vector types. 493 const auto SetCommonVFPActions = [&](MVT VT) { 494 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 495 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 496 // sizes are within one power-of-two of each other. Therefore conversions 497 // between vXf16 and vXf64 must be lowered as sequences which convert via 498 // vXf32. 499 setOperationAction(ISD::FP_ROUND, VT, Custom); 500 setOperationAction(ISD::FP_EXTEND, VT, Custom); 501 // Custom-lower insert/extract operations to simplify patterns. 502 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 503 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 504 // Expand various condition codes (explained above). 505 for (auto CC : VFPCCToExpand) 506 setCondCodeAction(CC, VT, Expand); 507 508 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 509 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 510 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 511 512 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 513 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 514 515 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 516 }; 517 518 if (Subtarget.hasStdExtZfh()) 519 for (MVT VT : F16VecVTs) 520 SetCommonVFPActions(VT); 521 522 if (Subtarget.hasStdExtF()) 523 for (MVT VT : F32VecVTs) 524 SetCommonVFPActions(VT); 525 526 if (Subtarget.hasStdExtD()) 527 for (MVT VT : F64VecVTs) 528 SetCommonVFPActions(VT); 529 530 if (Subtarget.useRVVForFixedLengthVectors()) { 531 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 532 if (!useRVVForFixedLengthVectorVT(VT)) 533 continue; 534 535 // By default everything must be expanded. 536 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 537 setOperationAction(Op, VT, Expand); 538 for (MVT OtherVT : MVT::fixedlen_vector_valuetypes()) 539 setTruncStoreAction(VT, OtherVT, Expand); 540 541 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 542 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 543 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 544 545 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 546 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 547 548 setOperationAction(ISD::LOAD, VT, Custom); 549 setOperationAction(ISD::STORE, VT, Custom); 550 551 setOperationAction(ISD::SETCC, VT, Custom); 552 553 setOperationAction(ISD::TRUNCATE, VT, Custom); 554 555 // Operations below are different for between masks and other vectors. 556 if (VT.getVectorElementType() == MVT::i1) { 557 setOperationAction(ISD::AND, VT, Custom); 558 setOperationAction(ISD::OR, VT, Custom); 559 setOperationAction(ISD::XOR, VT, Custom); 560 continue; 561 } 562 563 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 564 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 565 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 566 567 setOperationAction(ISD::ADD, VT, Custom); 568 setOperationAction(ISD::MUL, VT, Custom); 569 setOperationAction(ISD::SUB, VT, Custom); 570 setOperationAction(ISD::AND, VT, Custom); 571 setOperationAction(ISD::OR, VT, Custom); 572 setOperationAction(ISD::XOR, VT, Custom); 573 setOperationAction(ISD::SDIV, VT, Custom); 574 setOperationAction(ISD::SREM, VT, Custom); 575 setOperationAction(ISD::UDIV, VT, Custom); 576 setOperationAction(ISD::UREM, VT, Custom); 577 setOperationAction(ISD::SHL, VT, Custom); 578 setOperationAction(ISD::SRA, VT, Custom); 579 setOperationAction(ISD::SRL, VT, Custom); 580 581 setOperationAction(ISD::SMIN, VT, Custom); 582 setOperationAction(ISD::SMAX, VT, Custom); 583 setOperationAction(ISD::UMIN, VT, Custom); 584 setOperationAction(ISD::UMAX, VT, Custom); 585 setOperationAction(ISD::ABS, VT, Custom); 586 587 setOperationAction(ISD::MULHS, VT, Custom); 588 setOperationAction(ISD::MULHU, VT, Custom); 589 590 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 591 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 592 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 593 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 594 595 setOperationAction(ISD::VSELECT, VT, Custom); 596 597 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 598 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 599 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 600 601 setOperationAction(ISD::BITCAST, VT, Custom); 602 603 // Custom-lower reduction operations to set up the corresponding custom 604 // nodes' operands. 605 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 606 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 607 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 608 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 609 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 610 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 611 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 612 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 613 } 614 615 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 616 if (!useRVVForFixedLengthVectorVT(VT)) 617 continue; 618 619 // By default everything must be expanded. 620 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 621 setOperationAction(Op, VT, Expand); 622 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { 623 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 624 setTruncStoreAction(VT, OtherVT, Expand); 625 } 626 627 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 628 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 629 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 630 631 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 632 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 633 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 634 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 635 636 setOperationAction(ISD::LOAD, VT, Custom); 637 setOperationAction(ISD::STORE, VT, Custom); 638 setOperationAction(ISD::FADD, VT, Custom); 639 setOperationAction(ISD::FSUB, VT, Custom); 640 setOperationAction(ISD::FMUL, VT, Custom); 641 setOperationAction(ISD::FDIV, VT, Custom); 642 setOperationAction(ISD::FNEG, VT, Custom); 643 setOperationAction(ISD::FABS, VT, Custom); 644 setOperationAction(ISD::FSQRT, VT, Custom); 645 setOperationAction(ISD::FMA, VT, Custom); 646 647 setOperationAction(ISD::FP_ROUND, VT, Custom); 648 setOperationAction(ISD::FP_EXTEND, VT, Custom); 649 650 for (auto CC : VFPCCToExpand) 651 setCondCodeAction(CC, VT, Expand); 652 653 setOperationAction(ISD::VSELECT, VT, Custom); 654 655 setOperationAction(ISD::BITCAST, VT, Custom); 656 657 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 658 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 659 } 660 } 661 } 662 663 // Function alignments. 664 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 665 setMinFunctionAlignment(FunctionAlignment); 666 setPrefFunctionAlignment(FunctionAlignment); 667 668 setMinimumJumpTableEntries(5); 669 670 // Jumps are expensive, compared to logic 671 setJumpIsExpensive(); 672 673 // We can use any register for comparisons 674 setHasMultipleConditionRegisters(); 675 676 setTargetDAGCombine(ISD::SETCC); 677 if (Subtarget.hasStdExtZbp()) { 678 setTargetDAGCombine(ISD::OR); 679 } 680 if (Subtarget.hasStdExtV()) 681 setTargetDAGCombine(ISD::FCOPYSIGN); 682 } 683 684 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 685 LLVMContext &Context, 686 EVT VT) const { 687 if (!VT.isVector()) 688 return getPointerTy(DL); 689 if (Subtarget.hasStdExtV() && 690 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 691 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 692 return VT.changeVectorElementTypeToInteger(); 693 } 694 695 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 696 const CallInst &I, 697 MachineFunction &MF, 698 unsigned Intrinsic) const { 699 switch (Intrinsic) { 700 default: 701 return false; 702 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 703 case Intrinsic::riscv_masked_atomicrmw_add_i32: 704 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 705 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 706 case Intrinsic::riscv_masked_atomicrmw_max_i32: 707 case Intrinsic::riscv_masked_atomicrmw_min_i32: 708 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 709 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 710 case Intrinsic::riscv_masked_cmpxchg_i32: 711 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 712 Info.opc = ISD::INTRINSIC_W_CHAIN; 713 Info.memVT = MVT::getVT(PtrTy->getElementType()); 714 Info.ptrVal = I.getArgOperand(0); 715 Info.offset = 0; 716 Info.align = Align(4); 717 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 718 MachineMemOperand::MOVolatile; 719 return true; 720 } 721 } 722 723 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 724 const AddrMode &AM, Type *Ty, 725 unsigned AS, 726 Instruction *I) const { 727 // No global is ever allowed as a base. 728 if (AM.BaseGV) 729 return false; 730 731 // Require a 12-bit signed offset. 732 if (!isInt<12>(AM.BaseOffs)) 733 return false; 734 735 switch (AM.Scale) { 736 case 0: // "r+i" or just "i", depending on HasBaseReg. 737 break; 738 case 1: 739 if (!AM.HasBaseReg) // allow "r+i". 740 break; 741 return false; // disallow "r+r" or "r+r+i". 742 default: 743 return false; 744 } 745 746 return true; 747 } 748 749 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 750 return isInt<12>(Imm); 751 } 752 753 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 754 return isInt<12>(Imm); 755 } 756 757 // On RV32, 64-bit integers are split into their high and low parts and held 758 // in two different registers, so the trunc is free since the low register can 759 // just be used. 760 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 761 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 762 return false; 763 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 764 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 765 return (SrcBits == 64 && DestBits == 32); 766 } 767 768 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 769 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 770 !SrcVT.isInteger() || !DstVT.isInteger()) 771 return false; 772 unsigned SrcBits = SrcVT.getSizeInBits(); 773 unsigned DestBits = DstVT.getSizeInBits(); 774 return (SrcBits == 64 && DestBits == 32); 775 } 776 777 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 778 // Zexts are free if they can be combined with a load. 779 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 780 EVT MemVT = LD->getMemoryVT(); 781 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 782 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 783 (LD->getExtensionType() == ISD::NON_EXTLOAD || 784 LD->getExtensionType() == ISD::ZEXTLOAD)) 785 return true; 786 } 787 788 return TargetLowering::isZExtFree(Val, VT2); 789 } 790 791 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 792 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 793 } 794 795 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 796 return Subtarget.hasStdExtZbb(); 797 } 798 799 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 800 return Subtarget.hasStdExtZbb(); 801 } 802 803 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 804 bool ForCodeSize) const { 805 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 806 return false; 807 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 808 return false; 809 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 810 return false; 811 if (Imm.isNegZero()) 812 return false; 813 return Imm.isZero(); 814 } 815 816 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 817 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 818 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 819 (VT == MVT::f64 && Subtarget.hasStdExtD()); 820 } 821 822 // Changes the condition code and swaps operands if necessary, so the SetCC 823 // operation matches one of the comparisons supported directly in the RISC-V 824 // ISA. 825 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 826 switch (CC) { 827 default: 828 break; 829 case ISD::SETGT: 830 case ISD::SETLE: 831 case ISD::SETUGT: 832 case ISD::SETULE: 833 CC = ISD::getSetCCSwappedOperands(CC); 834 std::swap(LHS, RHS); 835 break; 836 } 837 } 838 839 // Return the RISC-V branch opcode that matches the given DAG integer 840 // condition code. The CondCode must be one of those supported by the RISC-V 841 // ISA (see normaliseSetCC). 842 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 843 switch (CC) { 844 default: 845 llvm_unreachable("Unsupported CondCode"); 846 case ISD::SETEQ: 847 return RISCV::BEQ; 848 case ISD::SETNE: 849 return RISCV::BNE; 850 case ISD::SETLT: 851 return RISCV::BLT; 852 case ISD::SETGE: 853 return RISCV::BGE; 854 case ISD::SETULT: 855 return RISCV::BLTU; 856 case ISD::SETUGE: 857 return RISCV::BGEU; 858 } 859 } 860 861 RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) { 862 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 863 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 864 if (VT.getVectorElementType() == MVT::i1) 865 KnownSize *= 8; 866 867 switch (KnownSize) { 868 default: 869 llvm_unreachable("Invalid LMUL."); 870 case 8: 871 return RISCVVLMUL::LMUL_F8; 872 case 16: 873 return RISCVVLMUL::LMUL_F4; 874 case 32: 875 return RISCVVLMUL::LMUL_F2; 876 case 64: 877 return RISCVVLMUL::LMUL_1; 878 case 128: 879 return RISCVVLMUL::LMUL_2; 880 case 256: 881 return RISCVVLMUL::LMUL_4; 882 case 512: 883 return RISCVVLMUL::LMUL_8; 884 } 885 } 886 887 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) { 888 switch (LMul) { 889 default: 890 llvm_unreachable("Invalid LMUL."); 891 case RISCVVLMUL::LMUL_F8: 892 case RISCVVLMUL::LMUL_F4: 893 case RISCVVLMUL::LMUL_F2: 894 case RISCVVLMUL::LMUL_1: 895 return RISCV::VRRegClassID; 896 case RISCVVLMUL::LMUL_2: 897 return RISCV::VRM2RegClassID; 898 case RISCVVLMUL::LMUL_4: 899 return RISCV::VRM4RegClassID; 900 case RISCVVLMUL::LMUL_8: 901 return RISCV::VRM8RegClassID; 902 } 903 } 904 905 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 906 RISCVVLMUL LMUL = getLMUL(VT); 907 if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 || 908 LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) { 909 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 910 "Unexpected subreg numbering"); 911 return RISCV::sub_vrm1_0 + Index; 912 } 913 if (LMUL == RISCVVLMUL::LMUL_2) { 914 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 915 "Unexpected subreg numbering"); 916 return RISCV::sub_vrm2_0 + Index; 917 } 918 if (LMUL == RISCVVLMUL::LMUL_4) { 919 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 920 "Unexpected subreg numbering"); 921 return RISCV::sub_vrm4_0 + Index; 922 } 923 llvm_unreachable("Invalid vector type."); 924 } 925 926 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 927 if (VT.getVectorElementType() == MVT::i1) 928 return RISCV::VRRegClassID; 929 return getRegClassIDForLMUL(getLMUL(VT)); 930 } 931 932 // Attempt to decompose a subvector insert/extract between VecVT and 933 // SubVecVT via subregister indices. Returns the subregister index that 934 // can perform the subvector insert/extract with the given element index, as 935 // well as the index corresponding to any leftover subvectors that must be 936 // further inserted/extracted within the register class for SubVecVT. 937 std::pair<unsigned, unsigned> 938 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 939 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 940 const RISCVRegisterInfo *TRI) { 941 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 942 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 943 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 944 "Register classes not ordered"); 945 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 946 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 947 // Try to compose a subregister index that takes us from the incoming 948 // LMUL>1 register class down to the outgoing one. At each step we half 949 // the LMUL: 950 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 951 // Note that this is not guaranteed to find a subregister index, such as 952 // when we are extracting from one VR type to another. 953 unsigned SubRegIdx = RISCV::NoSubRegister; 954 for (const unsigned RCID : 955 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 956 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 957 VecVT = VecVT.getHalfNumVectorElementsVT(); 958 bool IsHi = 959 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 960 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 961 getSubregIndexByMVT(VecVT, IsHi)); 962 if (IsHi) 963 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 964 } 965 return {SubRegIdx, InsertExtractIdx}; 966 } 967 968 // Return the largest legal scalable vector type that matches VT's element type. 969 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 970 const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) { 971 assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) && 972 "Expected legal fixed length vector!"); 973 974 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 975 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 976 977 MVT EltVT = VT.getVectorElementType(); 978 switch (EltVT.SimpleTy) { 979 default: 980 llvm_unreachable("unexpected element type for RVV container"); 981 case MVT::i1: { 982 // Masks are calculated assuming 8-bit elements since that's when we need 983 // the most elements. 984 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; 985 return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); 986 } 987 case MVT::i8: 988 case MVT::i16: 989 case MVT::i32: 990 case MVT::i64: 991 case MVT::f16: 992 case MVT::f32: 993 case MVT::f64: { 994 unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); 995 return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); 996 } 997 } 998 } 999 1000 MVT RISCVTargetLowering::getContainerForFixedLengthVector( 1001 SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) { 1002 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 1003 Subtarget); 1004 } 1005 1006 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 1007 return getContainerForFixedLengthVector(*this, VT, getSubtarget()); 1008 } 1009 1010 // Grow V to consume an entire RVV register. 1011 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1012 const RISCVSubtarget &Subtarget) { 1013 assert(VT.isScalableVector() && 1014 "Expected to convert into a scalable vector!"); 1015 assert(V.getValueType().isFixedLengthVector() && 1016 "Expected a fixed length vector operand!"); 1017 SDLoc DL(V); 1018 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1019 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 1020 } 1021 1022 // Shrink V so it's just big enough to maintain a VT's worth of data. 1023 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1024 const RISCVSubtarget &Subtarget) { 1025 assert(VT.isFixedLengthVector() && 1026 "Expected to convert into a fixed length vector!"); 1027 assert(V.getValueType().isScalableVector() && 1028 "Expected a scalable vector operand!"); 1029 SDLoc DL(V); 1030 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1031 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 1032 } 1033 1034 // Gets the two common "VL" operands: an all-ones mask and the vector length. 1035 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 1036 // the vector type that it is contained in. 1037 static std::pair<SDValue, SDValue> 1038 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 1039 const RISCVSubtarget &Subtarget) { 1040 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 1041 MVT XLenVT = Subtarget.getXLenVT(); 1042 SDValue VL = VecVT.isFixedLengthVector() 1043 ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) 1044 : DAG.getRegister(RISCV::X0, XLenVT); 1045 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 1046 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1047 return {Mask, VL}; 1048 } 1049 1050 // As above but assuming the given type is a scalable vector type. 1051 static std::pair<SDValue, SDValue> 1052 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, 1053 const RISCVSubtarget &Subtarget) { 1054 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 1055 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 1056 } 1057 1058 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 1059 // of either is (currently) supported. This can get us into an infinite loop 1060 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 1061 // as a ..., etc. 1062 // Until either (or both) of these can reliably lower any node, reporting that 1063 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 1064 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 1065 // which is not desirable. 1066 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 1067 EVT VT, unsigned DefinedValues) const { 1068 return false; 1069 } 1070 1071 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 1072 const RISCVSubtarget &Subtarget) { 1073 MVT VT = Op.getSimpleValueType(); 1074 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 1075 1076 MVT ContainerVT = 1077 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 1078 1079 SDLoc DL(Op); 1080 SDValue Mask, VL; 1081 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1082 1083 if (VT.getVectorElementType() == MVT::i1) { 1084 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 1085 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 1086 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 1087 } 1088 1089 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 1090 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 1091 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 1092 } 1093 1094 return SDValue(); 1095 } 1096 1097 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 1098 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 1099 : RISCVISD::VMV_V_X_VL; 1100 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 1101 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 1102 } 1103 1104 // Try and match an index sequence, which we can lower directly to the vid 1105 // instruction. An all-undef vector is matched by getSplatValue, above. 1106 if (VT.isInteger()) { 1107 bool IsVID = true; 1108 for (unsigned i = 0, e = Op.getNumOperands(); i < e && IsVID; i++) 1109 IsVID &= Op.getOperand(i).isUndef() || 1110 (isa<ConstantSDNode>(Op.getOperand(i)) && 1111 Op.getConstantOperandVal(i) == i); 1112 1113 if (IsVID) { 1114 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 1115 return convertFromScalableVector(VT, VID, DAG, Subtarget); 1116 } 1117 } 1118 1119 return SDValue(); 1120 } 1121 1122 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 1123 const RISCVSubtarget &Subtarget) { 1124 SDValue V1 = Op.getOperand(0); 1125 SDLoc DL(Op); 1126 MVT VT = Op.getSimpleValueType(); 1127 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 1128 1129 if (SVN->isSplat()) { 1130 int Lane = SVN->getSplatIndex(); 1131 if (Lane >= 0) { 1132 MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( 1133 DAG, VT, Subtarget); 1134 1135 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 1136 assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!"); 1137 1138 SDValue Mask, VL; 1139 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1140 MVT XLenVT = Subtarget.getXLenVT(); 1141 SDValue Gather = 1142 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 1143 DAG.getConstant(Lane, DL, XLenVT), Mask, VL); 1144 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 1145 } 1146 } 1147 1148 return SDValue(); 1149 } 1150 1151 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, 1152 SDLoc DL, SelectionDAG &DAG, 1153 const RISCVSubtarget &Subtarget) { 1154 if (VT.isScalableVector()) 1155 return DAG.getFPExtendOrRound(Op, DL, VT); 1156 assert(VT.isFixedLengthVector() && 1157 "Unexpected value type for RVV FP extend/round lowering"); 1158 SDValue Mask, VL; 1159 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1160 unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) 1161 ? RISCVISD::FP_EXTEND_VL 1162 : RISCVISD::FP_ROUND_VL; 1163 return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); 1164 } 1165 1166 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 1167 SelectionDAG &DAG) const { 1168 switch (Op.getOpcode()) { 1169 default: 1170 report_fatal_error("unimplemented operand"); 1171 case ISD::GlobalAddress: 1172 return lowerGlobalAddress(Op, DAG); 1173 case ISD::BlockAddress: 1174 return lowerBlockAddress(Op, DAG); 1175 case ISD::ConstantPool: 1176 return lowerConstantPool(Op, DAG); 1177 case ISD::JumpTable: 1178 return lowerJumpTable(Op, DAG); 1179 case ISD::GlobalTLSAddress: 1180 return lowerGlobalTLSAddress(Op, DAG); 1181 case ISD::SELECT: 1182 return lowerSELECT(Op, DAG); 1183 case ISD::VASTART: 1184 return lowerVASTART(Op, DAG); 1185 case ISD::FRAMEADDR: 1186 return lowerFRAMEADDR(Op, DAG); 1187 case ISD::RETURNADDR: 1188 return lowerRETURNADDR(Op, DAG); 1189 case ISD::SHL_PARTS: 1190 return lowerShiftLeftParts(Op, DAG); 1191 case ISD::SRA_PARTS: 1192 return lowerShiftRightParts(Op, DAG, true); 1193 case ISD::SRL_PARTS: 1194 return lowerShiftRightParts(Op, DAG, false); 1195 case ISD::BITCAST: { 1196 SDValue Op0 = Op.getOperand(0); 1197 // We can handle fixed length vector bitcasts with a simple replacement 1198 // in isel. 1199 if (Op.getValueType().isFixedLengthVector()) { 1200 if (Op0.getValueType().isFixedLengthVector()) 1201 return Op; 1202 return SDValue(); 1203 } 1204 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 1205 Subtarget.hasStdExtZfh()) && 1206 "Unexpected custom legalisation"); 1207 SDLoc DL(Op); 1208 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 1209 if (Op0.getValueType() != MVT::i16) 1210 return SDValue(); 1211 SDValue NewOp0 = 1212 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 1213 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 1214 return FPConv; 1215 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 1216 Subtarget.hasStdExtF()) { 1217 if (Op0.getValueType() != MVT::i32) 1218 return SDValue(); 1219 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 1220 SDValue FPConv = 1221 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 1222 return FPConv; 1223 } 1224 return SDValue(); 1225 } 1226 case ISD::INTRINSIC_WO_CHAIN: 1227 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1228 case ISD::INTRINSIC_W_CHAIN: 1229 return LowerINTRINSIC_W_CHAIN(Op, DAG); 1230 case ISD::BSWAP: 1231 case ISD::BITREVERSE: { 1232 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 1233 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1234 MVT VT = Op.getSimpleValueType(); 1235 SDLoc DL(Op); 1236 // Start with the maximum immediate value which is the bitwidth - 1. 1237 unsigned Imm = VT.getSizeInBits() - 1; 1238 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 1239 if (Op.getOpcode() == ISD::BSWAP) 1240 Imm &= ~0x7U; 1241 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 1242 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 1243 } 1244 case ISD::FSHL: 1245 case ISD::FSHR: { 1246 MVT VT = Op.getSimpleValueType(); 1247 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 1248 SDLoc DL(Op); 1249 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 1250 // use log(XLen) bits. Mask the shift amount accordingly. 1251 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 1252 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 1253 DAG.getConstant(ShAmtWidth, DL, VT)); 1254 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 1255 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 1256 } 1257 case ISD::TRUNCATE: { 1258 SDLoc DL(Op); 1259 MVT VT = Op.getSimpleValueType(); 1260 // Only custom-lower vector truncates 1261 if (!VT.isVector()) 1262 return Op; 1263 1264 // Truncates to mask types are handled differently 1265 if (VT.getVectorElementType() == MVT::i1) 1266 return lowerVectorMaskTrunc(Op, DAG); 1267 1268 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 1269 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 1270 // truncate by one power of two at a time. 1271 MVT DstEltVT = VT.getVectorElementType(); 1272 1273 SDValue Src = Op.getOperand(0); 1274 MVT SrcVT = Src.getSimpleValueType(); 1275 MVT SrcEltVT = SrcVT.getVectorElementType(); 1276 1277 assert(DstEltVT.bitsLT(SrcEltVT) && 1278 isPowerOf2_64(DstEltVT.getSizeInBits()) && 1279 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 1280 "Unexpected vector truncate lowering"); 1281 1282 MVT ContainerVT = SrcVT; 1283 if (SrcVT.isFixedLengthVector()) { 1284 ContainerVT = getContainerForFixedLengthVector(SrcVT); 1285 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 1286 } 1287 1288 SDValue Result = Src; 1289 SDValue Mask, VL; 1290 std::tie(Mask, VL) = 1291 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 1292 LLVMContext &Context = *DAG.getContext(); 1293 const ElementCount Count = ContainerVT.getVectorElementCount(); 1294 do { 1295 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 1296 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 1297 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 1298 Mask, VL); 1299 } while (SrcEltVT != DstEltVT); 1300 1301 if (SrcVT.isFixedLengthVector()) 1302 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 1303 1304 return Result; 1305 } 1306 case ISD::ANY_EXTEND: 1307 case ISD::ZERO_EXTEND: 1308 if (Op.getOperand(0).getValueType().isVector() && 1309 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1310 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 1311 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 1312 case ISD::SIGN_EXTEND: 1313 if (Op.getOperand(0).getValueType().isVector() && 1314 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 1315 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 1316 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 1317 case ISD::SPLAT_VECTOR: 1318 return lowerSPLATVECTOR(Op, DAG); 1319 case ISD::INSERT_VECTOR_ELT: 1320 return lowerINSERT_VECTOR_ELT(Op, DAG); 1321 case ISD::EXTRACT_VECTOR_ELT: 1322 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 1323 case ISD::VSCALE: { 1324 MVT VT = Op.getSimpleValueType(); 1325 SDLoc DL(Op); 1326 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 1327 // We define our scalable vector types for lmul=1 to use a 64 bit known 1328 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 1329 // vscale as VLENB / 8. 1330 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 1331 DAG.getConstant(3, DL, VT)); 1332 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 1333 } 1334 case ISD::FP_EXTEND: { 1335 // RVV can only do fp_extend to types double the size as the source. We 1336 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 1337 // via f32. 1338 SDLoc DL(Op); 1339 MVT VT = Op.getSimpleValueType(); 1340 SDValue Src = Op.getOperand(0); 1341 MVT SrcVT = Src.getSimpleValueType(); 1342 1343 // Prepare any fixed-length vector operands. 1344 MVT ContainerVT = VT; 1345 if (SrcVT.isFixedLengthVector()) { 1346 ContainerVT = getContainerForFixedLengthVector(VT); 1347 MVT SrcContainerVT = 1348 ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); 1349 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1350 } 1351 1352 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 1353 SrcVT.getVectorElementType() != MVT::f16) { 1354 // For scalable vectors, we only need to close the gap between 1355 // vXf16->vXf64. 1356 if (!VT.isFixedLengthVector()) 1357 return Op; 1358 // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version. 1359 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1360 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1361 } 1362 1363 MVT InterVT = VT.changeVectorElementType(MVT::f32); 1364 MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32); 1365 SDValue IntermediateExtend = getRVVFPExtendOrRound( 1366 Src, InterVT, InterContainerVT, DL, DAG, Subtarget); 1367 1368 SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT, 1369 DL, DAG, Subtarget); 1370 if (VT.isFixedLengthVector()) 1371 return convertFromScalableVector(VT, Extend, DAG, Subtarget); 1372 return Extend; 1373 } 1374 case ISD::FP_ROUND: { 1375 // RVV can only do fp_round to types half the size as the source. We 1376 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1377 // conversion instruction. 1378 SDLoc DL(Op); 1379 MVT VT = Op.getSimpleValueType(); 1380 SDValue Src = Op.getOperand(0); 1381 MVT SrcVT = Src.getSimpleValueType(); 1382 1383 // Prepare any fixed-length vector operands. 1384 MVT ContainerVT = VT; 1385 if (VT.isFixedLengthVector()) { 1386 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1387 ContainerVT = 1388 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1389 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1390 } 1391 1392 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1393 SrcVT.getVectorElementType() != MVT::f64) { 1394 // For scalable vectors, we only need to close the gap between 1395 // vXf64<->vXf16. 1396 if (!VT.isFixedLengthVector()) 1397 return Op; 1398 // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. 1399 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 1400 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1401 } 1402 1403 SDValue Mask, VL; 1404 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1405 1406 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 1407 SDValue IntermediateRound = 1408 DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); 1409 SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, 1410 DL, DAG, Subtarget); 1411 1412 if (VT.isFixedLengthVector()) 1413 return convertFromScalableVector(VT, Round, DAG, Subtarget); 1414 return Round; 1415 } 1416 case ISD::FP_TO_SINT: 1417 case ISD::FP_TO_UINT: 1418 case ISD::SINT_TO_FP: 1419 case ISD::UINT_TO_FP: { 1420 // RVV can only do fp<->int conversions to types half/double the size as 1421 // the source. We custom-lower any conversions that do two hops into 1422 // sequences. 1423 MVT VT = Op.getSimpleValueType(); 1424 if (!VT.isVector()) 1425 return Op; 1426 SDLoc DL(Op); 1427 SDValue Src = Op.getOperand(0); 1428 MVT EltVT = VT.getVectorElementType(); 1429 MVT SrcVT = Src.getSimpleValueType(); 1430 MVT SrcEltVT = SrcVT.getVectorElementType(); 1431 unsigned EltSize = EltVT.getSizeInBits(); 1432 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1433 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1434 "Unexpected vector element types"); 1435 1436 bool IsInt2FP = SrcEltVT.isInteger(); 1437 // Widening conversions 1438 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1439 if (IsInt2FP) { 1440 // Do a regular integer sign/zero extension then convert to float. 1441 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1442 VT.getVectorElementCount()); 1443 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1444 ? ISD::ZERO_EXTEND 1445 : ISD::SIGN_EXTEND; 1446 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1447 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1448 } 1449 // FP2Int 1450 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1451 // Do one doubling fp_extend then complete the operation by converting 1452 // to int. 1453 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1454 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1455 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1456 } 1457 1458 // Narrowing conversions 1459 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1460 if (IsInt2FP) { 1461 // One narrowing int_to_fp, then an fp_round. 1462 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1463 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1464 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1465 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1466 } 1467 // FP2Int 1468 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1469 // representable by the integer, the result is poison. 1470 MVT IVecVT = 1471 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1472 VT.getVectorElementCount()); 1473 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1474 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1475 } 1476 1477 // Scalable vectors can exit here. Patterns will handle equally-sized 1478 // conversions halving/doubling ones. 1479 if (!VT.isFixedLengthVector()) 1480 return Op; 1481 1482 // For fixed-length vectors we lower to a custom "VL" node. 1483 unsigned RVVOpc = 0; 1484 switch (Op.getOpcode()) { 1485 default: 1486 llvm_unreachable("Impossible opcode"); 1487 case ISD::FP_TO_SINT: 1488 RVVOpc = RISCVISD::FP_TO_SINT_VL; 1489 break; 1490 case ISD::FP_TO_UINT: 1491 RVVOpc = RISCVISD::FP_TO_UINT_VL; 1492 break; 1493 case ISD::SINT_TO_FP: 1494 RVVOpc = RISCVISD::SINT_TO_FP_VL; 1495 break; 1496 case ISD::UINT_TO_FP: 1497 RVVOpc = RISCVISD::UINT_TO_FP_VL; 1498 break; 1499 } 1500 1501 MVT ContainerVT, SrcContainerVT; 1502 // Derive the reference container type from the larger vector type. 1503 if (SrcEltSize > EltSize) { 1504 SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 1505 ContainerVT = 1506 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 1507 } else { 1508 ContainerVT = getContainerForFixedLengthVector(VT); 1509 SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT); 1510 } 1511 1512 SDValue Mask, VL; 1513 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1514 1515 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 1516 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 1517 return convertFromScalableVector(VT, Src, DAG, Subtarget); 1518 } 1519 case ISD::VECREDUCE_ADD: 1520 case ISD::VECREDUCE_UMAX: 1521 case ISD::VECREDUCE_SMAX: 1522 case ISD::VECREDUCE_UMIN: 1523 case ISD::VECREDUCE_SMIN: 1524 case ISD::VECREDUCE_AND: 1525 case ISD::VECREDUCE_OR: 1526 case ISD::VECREDUCE_XOR: 1527 return lowerVECREDUCE(Op, DAG); 1528 case ISD::VECREDUCE_FADD: 1529 case ISD::VECREDUCE_SEQ_FADD: 1530 return lowerFPVECREDUCE(Op, DAG); 1531 case ISD::INSERT_SUBVECTOR: 1532 return lowerINSERT_SUBVECTOR(Op, DAG); 1533 case ISD::EXTRACT_SUBVECTOR: 1534 return lowerEXTRACT_SUBVECTOR(Op, DAG); 1535 case ISD::VECTOR_REVERSE: 1536 return lowerVECTOR_REVERSE(Op, DAG); 1537 case ISD::BUILD_VECTOR: 1538 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1539 case ISD::VECTOR_SHUFFLE: 1540 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 1541 case ISD::CONCAT_VECTORS: { 1542 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 1543 // better than going through the stack, as the default expansion does. 1544 SDLoc DL(Op); 1545 MVT VT = Op.getSimpleValueType(); 1546 assert(VT.isFixedLengthVector() && "Unexpected CONCAT_VECTORS lowering"); 1547 unsigned NumOpElts = 1548 Op.getOperand(0).getSimpleValueType().getVectorNumElements(); 1549 SDValue Vec = DAG.getUNDEF(VT); 1550 for (const auto &OpIdx : enumerate(Op->ops())) 1551 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), 1552 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 1553 return Vec; 1554 } 1555 case ISD::LOAD: 1556 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1557 case ISD::STORE: 1558 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1559 case ISD::SETCC: 1560 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 1561 case ISD::ADD: 1562 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1563 case ISD::SUB: 1564 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1565 case ISD::MUL: 1566 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1567 case ISD::MULHS: 1568 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL); 1569 case ISD::MULHU: 1570 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL); 1571 case ISD::AND: 1572 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, 1573 RISCVISD::AND_VL); 1574 case ISD::OR: 1575 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, 1576 RISCVISD::OR_VL); 1577 case ISD::XOR: 1578 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, 1579 RISCVISD::XOR_VL); 1580 case ISD::SDIV: 1581 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1582 case ISD::SREM: 1583 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1584 case ISD::UDIV: 1585 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1586 case ISD::UREM: 1587 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1588 case ISD::SHL: 1589 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1590 case ISD::SRA: 1591 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1592 case ISD::SRL: 1593 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1594 case ISD::FADD: 1595 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1596 case ISD::FSUB: 1597 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1598 case ISD::FMUL: 1599 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1600 case ISD::FDIV: 1601 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1602 case ISD::FNEG: 1603 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1604 case ISD::FABS: 1605 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); 1606 case ISD::FSQRT: 1607 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); 1608 case ISD::FMA: 1609 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1610 case ISD::SMIN: 1611 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); 1612 case ISD::SMAX: 1613 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); 1614 case ISD::UMIN: 1615 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); 1616 case ISD::UMAX: 1617 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); 1618 case ISD::ABS: 1619 return lowerABS(Op, DAG); 1620 case ISD::VSELECT: 1621 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 1622 } 1623 } 1624 1625 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1626 SelectionDAG &DAG, unsigned Flags) { 1627 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1628 } 1629 1630 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1631 SelectionDAG &DAG, unsigned Flags) { 1632 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1633 Flags); 1634 } 1635 1636 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1637 SelectionDAG &DAG, unsigned Flags) { 1638 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1639 N->getOffset(), Flags); 1640 } 1641 1642 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1643 SelectionDAG &DAG, unsigned Flags) { 1644 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1645 } 1646 1647 template <class NodeTy> 1648 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1649 bool IsLocal) const { 1650 SDLoc DL(N); 1651 EVT Ty = getPointerTy(DAG.getDataLayout()); 1652 1653 if (isPositionIndependent()) { 1654 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1655 if (IsLocal) 1656 // Use PC-relative addressing to access the symbol. This generates the 1657 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 1658 // %pcrel_lo(auipc)). 1659 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1660 1661 // Use PC-relative addressing to access the GOT for this symbol, then load 1662 // the address from the GOT. This generates the pattern (PseudoLA sym), 1663 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 1664 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 1665 } 1666 1667 switch (getTargetMachine().getCodeModel()) { 1668 default: 1669 report_fatal_error("Unsupported code model for lowering"); 1670 case CodeModel::Small: { 1671 // Generate a sequence for accessing addresses within the first 2 GiB of 1672 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 1673 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 1674 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 1675 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1676 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 1677 } 1678 case CodeModel::Medium: { 1679 // Generate a sequence for accessing addresses within any 2GiB range within 1680 // the address space. This generates the pattern (PseudoLLA sym), which 1681 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 1682 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1683 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1684 } 1685 } 1686 } 1687 1688 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 1689 SelectionDAG &DAG) const { 1690 SDLoc DL(Op); 1691 EVT Ty = Op.getValueType(); 1692 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1693 int64_t Offset = N->getOffset(); 1694 MVT XLenVT = Subtarget.getXLenVT(); 1695 1696 const GlobalValue *GV = N->getGlobal(); 1697 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1698 SDValue Addr = getAddr(N, DAG, IsLocal); 1699 1700 // In order to maximise the opportunity for common subexpression elimination, 1701 // emit a separate ADD node for the global address offset instead of folding 1702 // it in the global address node. Later peephole optimisations may choose to 1703 // fold it back in when profitable. 1704 if (Offset != 0) 1705 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1706 DAG.getConstant(Offset, DL, XLenVT)); 1707 return Addr; 1708 } 1709 1710 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1711 SelectionDAG &DAG) const { 1712 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1713 1714 return getAddr(N, DAG); 1715 } 1716 1717 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 1718 SelectionDAG &DAG) const { 1719 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 1720 1721 return getAddr(N, DAG); 1722 } 1723 1724 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 1725 SelectionDAG &DAG) const { 1726 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 1727 1728 return getAddr(N, DAG); 1729 } 1730 1731 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1732 SelectionDAG &DAG, 1733 bool UseGOT) const { 1734 SDLoc DL(N); 1735 EVT Ty = getPointerTy(DAG.getDataLayout()); 1736 const GlobalValue *GV = N->getGlobal(); 1737 MVT XLenVT = Subtarget.getXLenVT(); 1738 1739 if (UseGOT) { 1740 // Use PC-relative addressing to access the GOT for this TLS symbol, then 1741 // load the address from the GOT and add the thread pointer. This generates 1742 // the pattern (PseudoLA_TLS_IE sym), which expands to 1743 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 1744 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1745 SDValue Load = 1746 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 1747 1748 // Add the thread pointer. 1749 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1750 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 1751 } 1752 1753 // Generate a sequence for accessing the address relative to the thread 1754 // pointer, with the appropriate adjustment for the thread pointer offset. 1755 // This generates the pattern 1756 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 1757 SDValue AddrHi = 1758 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 1759 SDValue AddrAdd = 1760 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 1761 SDValue AddrLo = 1762 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 1763 1764 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1765 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1766 SDValue MNAdd = SDValue( 1767 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 1768 0); 1769 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 1770 } 1771 1772 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1773 SelectionDAG &DAG) const { 1774 SDLoc DL(N); 1775 EVT Ty = getPointerTy(DAG.getDataLayout()); 1776 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1777 const GlobalValue *GV = N->getGlobal(); 1778 1779 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1780 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 1781 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 1782 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1783 SDValue Load = 1784 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 1785 1786 // Prepare argument list to generate call. 1787 ArgListTy Args; 1788 ArgListEntry Entry; 1789 Entry.Node = Load; 1790 Entry.Ty = CallTy; 1791 Args.push_back(Entry); 1792 1793 // Setup call to __tls_get_addr. 1794 TargetLowering::CallLoweringInfo CLI(DAG); 1795 CLI.setDebugLoc(DL) 1796 .setChain(DAG.getEntryNode()) 1797 .setLibCallee(CallingConv::C, CallTy, 1798 DAG.getExternalSymbol("__tls_get_addr", Ty), 1799 std::move(Args)); 1800 1801 return LowerCallTo(CLI).first; 1802 } 1803 1804 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1805 SelectionDAG &DAG) const { 1806 SDLoc DL(Op); 1807 EVT Ty = Op.getValueType(); 1808 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1809 int64_t Offset = N->getOffset(); 1810 MVT XLenVT = Subtarget.getXLenVT(); 1811 1812 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 1813 1814 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1815 CallingConv::GHC) 1816 report_fatal_error("In GHC calling convention TLS is not supported"); 1817 1818 SDValue Addr; 1819 switch (Model) { 1820 case TLSModel::LocalExec: 1821 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 1822 break; 1823 case TLSModel::InitialExec: 1824 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 1825 break; 1826 case TLSModel::LocalDynamic: 1827 case TLSModel::GeneralDynamic: 1828 Addr = getDynamicTLSAddr(N, DAG); 1829 break; 1830 } 1831 1832 // In order to maximise the opportunity for common subexpression elimination, 1833 // emit a separate ADD node for the global address offset instead of folding 1834 // it in the global address node. Later peephole optimisations may choose to 1835 // fold it back in when profitable. 1836 if (Offset != 0) 1837 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1838 DAG.getConstant(Offset, DL, XLenVT)); 1839 return Addr; 1840 } 1841 1842 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1843 SDValue CondV = Op.getOperand(0); 1844 SDValue TrueV = Op.getOperand(1); 1845 SDValue FalseV = Op.getOperand(2); 1846 SDLoc DL(Op); 1847 MVT XLenVT = Subtarget.getXLenVT(); 1848 1849 // If the result type is XLenVT and CondV is the output of a SETCC node 1850 // which also operated on XLenVT inputs, then merge the SETCC node into the 1851 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1852 // compare+branch instructions. i.e.: 1853 // (select (setcc lhs, rhs, cc), truev, falsev) 1854 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1855 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1856 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1857 SDValue LHS = CondV.getOperand(0); 1858 SDValue RHS = CondV.getOperand(1); 1859 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1860 ISD::CondCode CCVal = CC->get(); 1861 1862 normaliseSetCC(LHS, RHS, CCVal); 1863 1864 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1865 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1866 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1867 } 1868 1869 // Otherwise: 1870 // (select condv, truev, falsev) 1871 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1872 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1873 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1874 1875 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1876 1877 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1878 } 1879 1880 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 1881 MachineFunction &MF = DAG.getMachineFunction(); 1882 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 1883 1884 SDLoc DL(Op); 1885 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1886 getPointerTy(MF.getDataLayout())); 1887 1888 // vastart just stores the address of the VarArgsFrameIndex slot into the 1889 // memory location argument. 1890 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1891 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1892 MachinePointerInfo(SV)); 1893 } 1894 1895 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 1896 SelectionDAG &DAG) const { 1897 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1898 MachineFunction &MF = DAG.getMachineFunction(); 1899 MachineFrameInfo &MFI = MF.getFrameInfo(); 1900 MFI.setFrameAddressIsTaken(true); 1901 Register FrameReg = RI.getFrameRegister(MF); 1902 int XLenInBytes = Subtarget.getXLen() / 8; 1903 1904 EVT VT = Op.getValueType(); 1905 SDLoc DL(Op); 1906 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1907 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1908 while (Depth--) { 1909 int Offset = -(XLenInBytes * 2); 1910 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1911 DAG.getIntPtrConstant(Offset, DL)); 1912 FrameAddr = 1913 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1914 } 1915 return FrameAddr; 1916 } 1917 1918 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 1919 SelectionDAG &DAG) const { 1920 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1921 MachineFunction &MF = DAG.getMachineFunction(); 1922 MachineFrameInfo &MFI = MF.getFrameInfo(); 1923 MFI.setReturnAddressIsTaken(true); 1924 MVT XLenVT = Subtarget.getXLenVT(); 1925 int XLenInBytes = Subtarget.getXLen() / 8; 1926 1927 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1928 return SDValue(); 1929 1930 EVT VT = Op.getValueType(); 1931 SDLoc DL(Op); 1932 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1933 if (Depth) { 1934 int Off = -XLenInBytes; 1935 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 1936 SDValue Offset = DAG.getConstant(Off, DL, VT); 1937 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 1938 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 1939 MachinePointerInfo()); 1940 } 1941 1942 // Return the value of the return address register, marking it an implicit 1943 // live-in. 1944 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 1945 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 1946 } 1947 1948 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 1949 SelectionDAG &DAG) const { 1950 SDLoc DL(Op); 1951 SDValue Lo = Op.getOperand(0); 1952 SDValue Hi = Op.getOperand(1); 1953 SDValue Shamt = Op.getOperand(2); 1954 EVT VT = Lo.getValueType(); 1955 1956 // if Shamt-XLEN < 0: // Shamt < XLEN 1957 // Lo = Lo << Shamt 1958 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 1959 // else: 1960 // Lo = 0 1961 // Hi = Lo << (Shamt-XLEN) 1962 1963 SDValue Zero = DAG.getConstant(0, DL, VT); 1964 SDValue One = DAG.getConstant(1, DL, VT); 1965 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1966 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1967 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1968 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1969 1970 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1971 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1972 SDValue ShiftRightLo = 1973 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 1974 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1975 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1976 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 1977 1978 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1979 1980 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1981 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1982 1983 SDValue Parts[2] = {Lo, Hi}; 1984 return DAG.getMergeValues(Parts, DL); 1985 } 1986 1987 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1988 bool IsSRA) const { 1989 SDLoc DL(Op); 1990 SDValue Lo = Op.getOperand(0); 1991 SDValue Hi = Op.getOperand(1); 1992 SDValue Shamt = Op.getOperand(2); 1993 EVT VT = Lo.getValueType(); 1994 1995 // SRA expansion: 1996 // if Shamt-XLEN < 0: // Shamt < XLEN 1997 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1998 // Hi = Hi >>s Shamt 1999 // else: 2000 // Lo = Hi >>s (Shamt-XLEN); 2001 // Hi = Hi >>s (XLEN-1) 2002 // 2003 // SRL expansion: 2004 // if Shamt-XLEN < 0: // Shamt < XLEN 2005 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 2006 // Hi = Hi >>u Shamt 2007 // else: 2008 // Lo = Hi >>u (Shamt-XLEN); 2009 // Hi = 0; 2010 2011 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 2012 2013 SDValue Zero = DAG.getConstant(0, DL, VT); 2014 SDValue One = DAG.getConstant(1, DL, VT); 2015 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 2016 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 2017 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 2018 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 2019 2020 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 2021 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 2022 SDValue ShiftLeftHi = 2023 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 2024 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 2025 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 2026 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 2027 SDValue HiFalse = 2028 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 2029 2030 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 2031 2032 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 2033 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2034 2035 SDValue Parts[2] = {Lo, Hi}; 2036 return DAG.getMergeValues(Parts, DL); 2037 } 2038 2039 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is 2040 // illegal (currently only vXi64 RV32). 2041 // FIXME: We could also catch non-constant sign-extended i32 values and lower 2042 // them to SPLAT_VECTOR_I64 2043 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, 2044 SelectionDAG &DAG) const { 2045 SDLoc DL(Op); 2046 EVT VecVT = Op.getValueType(); 2047 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 2048 "Unexpected SPLAT_VECTOR lowering"); 2049 SDValue SplatVal = Op.getOperand(0); 2050 2051 // If we can prove that the value is a sign-extended 32-bit value, lower this 2052 // as a custom node in order to try and match RVV vector/scalar instructions. 2053 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { 2054 if (isInt<32>(CVal->getSExtValue())) 2055 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 2056 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); 2057 } 2058 2059 if (SplatVal.getOpcode() == ISD::SIGN_EXTEND && 2060 SplatVal.getOperand(0).getValueType() == MVT::i32) { 2061 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 2062 SplatVal.getOperand(0)); 2063 } 2064 2065 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 2066 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 2067 // vmv.v.x vX, hi 2068 // vsll.vx vX, vX, /*32*/ 2069 // vmv.v.x vY, lo 2070 // vsll.vx vY, vY, /*32*/ 2071 // vsrl.vx vY, vY, /*32*/ 2072 // vor.vv vX, vX, vY 2073 SDValue One = DAG.getConstant(1, DL, MVT::i32); 2074 SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 2075 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 2076 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); 2077 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); 2078 2079 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 2080 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 2081 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 2082 2083 if (isNullConstant(Hi)) 2084 return Lo; 2085 2086 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 2087 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 2088 2089 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 2090 } 2091 2092 // Custom-lower extensions from mask vectors by using a vselect either with 1 2093 // for zero/any-extension or -1 for sign-extension: 2094 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 2095 // Note that any-extension is lowered identically to zero-extension. 2096 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 2097 int64_t ExtTrueVal) const { 2098 SDLoc DL(Op); 2099 MVT VecVT = Op.getSimpleValueType(); 2100 SDValue Src = Op.getOperand(0); 2101 // Only custom-lower extensions from mask types 2102 assert(Src.getValueType().isVector() && 2103 Src.getValueType().getVectorElementType() == MVT::i1); 2104 2105 MVT XLenVT = Subtarget.getXLenVT(); 2106 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 2107 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 2108 2109 if (VecVT.isScalableVector()) { 2110 // Be careful not to introduce illegal scalar types at this stage, and be 2111 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 2112 // illegal and must be expanded. Since we know that the constants are 2113 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 2114 bool IsRV32E64 = 2115 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 2116 2117 if (!IsRV32E64) { 2118 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 2119 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 2120 } else { 2121 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 2122 SplatTrueVal = 2123 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 2124 } 2125 2126 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 2127 } 2128 2129 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 2130 MVT I1ContainerVT = 2131 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2132 2133 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 2134 2135 SDValue Mask, VL; 2136 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2137 2138 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL); 2139 SplatTrueVal = 2140 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL); 2141 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 2142 SplatTrueVal, SplatZero, VL); 2143 2144 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 2145 } 2146 2147 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 2148 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 2149 MVT ExtVT = Op.getSimpleValueType(); 2150 // Only custom-lower extensions from fixed-length vector types. 2151 if (!ExtVT.isFixedLengthVector()) 2152 return Op; 2153 MVT VT = Op.getOperand(0).getSimpleValueType(); 2154 // Grab the canonical container type for the extended type. Infer the smaller 2155 // type from that to ensure the same number of vector elements, as we know 2156 // the LMUL will be sufficient to hold the smaller type. 2157 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 2158 // Get the extended container type manually to ensure the same number of 2159 // vector elements between source and dest. 2160 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 2161 ContainerExtVT.getVectorElementCount()); 2162 2163 SDValue Op1 = 2164 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 2165 2166 SDLoc DL(Op); 2167 SDValue Mask, VL; 2168 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2169 2170 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 2171 2172 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 2173 } 2174 2175 // Custom-lower truncations from vectors to mask vectors by using a mask and a 2176 // setcc operation: 2177 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 2178 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 2179 SelectionDAG &DAG) const { 2180 SDLoc DL(Op); 2181 EVT MaskVT = Op.getValueType(); 2182 // Only expect to custom-lower truncations to mask types 2183 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 2184 "Unexpected type for vector mask lowering"); 2185 SDValue Src = Op.getOperand(0); 2186 MVT VecVT = Src.getSimpleValueType(); 2187 2188 // If this is a fixed vector, we need to convert it to a scalable vector. 2189 MVT ContainerVT = VecVT; 2190 if (VecVT.isFixedLengthVector()) { 2191 ContainerVT = getContainerForFixedLengthVector(VecVT); 2192 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2193 } 2194 2195 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 2196 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2197 2198 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne); 2199 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero); 2200 2201 if (VecVT.isScalableVector()) { 2202 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 2203 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 2204 } 2205 2206 SDValue Mask, VL; 2207 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2208 2209 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 2210 SDValue Trunc = 2211 DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); 2212 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, 2213 DAG.getCondCode(ISD::SETNE), Mask, VL); 2214 return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 2215 } 2216 2217 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 2218 SelectionDAG &DAG) const { 2219 SDLoc DL(Op); 2220 MVT VecVT = Op.getSimpleValueType(); 2221 SDValue Vec = Op.getOperand(0); 2222 SDValue Val = Op.getOperand(1); 2223 SDValue Idx = Op.getOperand(2); 2224 2225 MVT ContainerVT = VecVT; 2226 // If the operand is a fixed-length vector, convert to a scalable one. 2227 if (VecVT.isFixedLengthVector()) { 2228 ContainerVT = getContainerForFixedLengthVector(VecVT); 2229 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2230 } 2231 2232 SDValue Mask, VL; 2233 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2234 2235 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is 2236 // first slid down into position, the value is inserted into the first 2237 // position, and the vector is slid back up. We do this to simplify patterns. 2238 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), 2239 if (Subtarget.is64Bit() || Val.getValueType() != MVT::i64) { 2240 if (isNullConstant(Idx)) 2241 return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL); 2242 SDValue Slidedown = 2243 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2244 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 2245 SDValue InsertElt0 = 2246 DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Slidedown, Val, VL); 2247 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, InsertElt0, 2248 Idx, Mask, VL); 2249 } 2250 2251 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type 2252 // is illegal (currently only vXi64 RV32). 2253 // Since there is no easy way of getting a single element into a vector when 2254 // XLEN<SEW, we lower the operation to the following sequence: 2255 // splat vVal, rVal 2256 // vid.v vVid 2257 // vmseq.vx mMask, vVid, rIdx 2258 // vmerge.vvm vDest, vSrc, vVal, mMask 2259 // This essentially merges the original vector with the inserted element by 2260 // using a mask whose only set bit is that corresponding to the insert 2261 // index. 2262 SDValue SplattedVal = DAG.getSplatVector(ContainerVT, DL, Val); 2263 SDValue SplattedIdx = 2264 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, Idx, VL); 2265 2266 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 2267 auto SetCCVT = 2268 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ContainerVT); 2269 SDValue SelectCond = 2270 DAG.getNode(RISCVISD::SETCC_VL, DL, SetCCVT, VID, SplattedIdx, 2271 DAG.getCondCode(ISD::SETEQ), Mask, VL); 2272 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, 2273 SelectCond, SplattedVal, Vec, VL); 2274 if (!VecVT.isFixedLengthVector()) 2275 return Select; 2276 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 2277 } 2278 2279 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 2280 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 2281 // types this is done using VMV_X_S to allow us to glean information about the 2282 // sign bits of the result. 2283 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 2284 SelectionDAG &DAG) const { 2285 SDLoc DL(Op); 2286 SDValue Idx = Op.getOperand(1); 2287 SDValue Vec = Op.getOperand(0); 2288 EVT EltVT = Op.getValueType(); 2289 MVT VecVT = Vec.getSimpleValueType(); 2290 MVT XLenVT = Subtarget.getXLenVT(); 2291 2292 // If this is a fixed vector, we need to convert it to a scalable vector. 2293 MVT ContainerVT = VecVT; 2294 if (VecVT.isFixedLengthVector()) { 2295 ContainerVT = getContainerForFixedLengthVector(VecVT); 2296 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2297 } 2298 2299 // If the index is 0, the vector is already in the right position. 2300 if (!isNullConstant(Idx)) { 2301 // Use a VL of 1 to avoid processing more elements than we need. 2302 SDValue VL = DAG.getConstant(1, DL, XLenVT); 2303 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2304 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2305 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2306 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 2307 } 2308 2309 if (!EltVT.isInteger()) { 2310 // Floating-point extracts are handled in TableGen. 2311 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 2312 DAG.getConstant(0, DL, XLenVT)); 2313 } 2314 2315 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 2316 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 2317 } 2318 2319 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 2320 SelectionDAG &DAG) const { 2321 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2322 SDLoc DL(Op); 2323 2324 if (Subtarget.hasStdExtV()) { 2325 // Some RVV intrinsics may claim that they want an integer operand to be 2326 // extended. 2327 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2328 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 2329 if (II->ExtendedOperand) { 2330 assert(II->ExtendedOperand < Op.getNumOperands()); 2331 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2332 SDValue &ScalarOp = Operands[II->ExtendedOperand]; 2333 EVT OpVT = ScalarOp.getValueType(); 2334 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 2335 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 2336 // If the operand is a constant, sign extend to increase our chances 2337 // of being able to use a .vi instruction. ANY_EXTEND would become a 2338 // a zero extend and the simm5 check in isel would fail. 2339 // FIXME: Should we ignore the upper bits in isel instead? 2340 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 2341 : ISD::ANY_EXTEND; 2342 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 2343 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), 2344 Operands); 2345 } 2346 } 2347 } 2348 } 2349 2350 switch (IntNo) { 2351 default: 2352 return SDValue(); // Don't custom lower most intrinsics. 2353 case Intrinsic::thread_pointer: { 2354 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2355 return DAG.getRegister(RISCV::X4, PtrVT); 2356 } 2357 case Intrinsic::riscv_vmv_x_s: 2358 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); 2359 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 2360 Op.getOperand(1)); 2361 case Intrinsic::riscv_vmv_v_x: { 2362 SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), 2363 Op.getOperand(1)); 2364 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 2365 Scalar, Op.getOperand(2)); 2366 } 2367 case Intrinsic::riscv_vfmv_v_f: 2368 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 2369 Op.getOperand(1), Op.getOperand(2)); 2370 } 2371 } 2372 2373 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 2374 SelectionDAG &DAG) const { 2375 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2376 SDLoc DL(Op); 2377 2378 if (Subtarget.hasStdExtV()) { 2379 // Some RVV intrinsics may claim that they want an integer operand to be 2380 // extended. 2381 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2382 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 2383 if (II->ExtendedOperand) { 2384 // The operands start from the second argument in INTRINSIC_W_CHAIN. 2385 unsigned ExtendOp = II->ExtendedOperand + 1; 2386 assert(ExtendOp < Op.getNumOperands()); 2387 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 2388 SDValue &ScalarOp = Operands[ExtendOp]; 2389 EVT OpVT = ScalarOp.getValueType(); 2390 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 2391 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 2392 // If the operand is a constant, sign extend to increase our chances 2393 // of being able to use a .vi instruction. ANY_EXTEND would become a 2394 // a zero extend and the simm5 check in isel would fail. 2395 // FIXME: Should we ignore the upper bits in isel instead? 2396 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 2397 : ISD::ANY_EXTEND; 2398 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 2399 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), 2400 Operands); 2401 } 2402 } 2403 } 2404 } 2405 2406 return SDValue(); // Don't custom lower most intrinsics. 2407 } 2408 2409 static MVT getLMUL1VT(MVT VT) { 2410 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 2411 "Unexpected vector MVT"); 2412 return MVT::getScalableVectorVT( 2413 VT.getVectorElementType(), 2414 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 2415 } 2416 2417 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 2418 switch (ISDOpcode) { 2419 default: 2420 llvm_unreachable("Unhandled reduction"); 2421 case ISD::VECREDUCE_ADD: 2422 return RISCVISD::VECREDUCE_ADD_VL; 2423 case ISD::VECREDUCE_UMAX: 2424 return RISCVISD::VECREDUCE_UMAX_VL; 2425 case ISD::VECREDUCE_SMAX: 2426 return RISCVISD::VECREDUCE_SMAX_VL; 2427 case ISD::VECREDUCE_UMIN: 2428 return RISCVISD::VECREDUCE_UMIN_VL; 2429 case ISD::VECREDUCE_SMIN: 2430 return RISCVISD::VECREDUCE_SMIN_VL; 2431 case ISD::VECREDUCE_AND: 2432 return RISCVISD::VECREDUCE_AND_VL; 2433 case ISD::VECREDUCE_OR: 2434 return RISCVISD::VECREDUCE_OR_VL; 2435 case ISD::VECREDUCE_XOR: 2436 return RISCVISD::VECREDUCE_XOR_VL; 2437 } 2438 } 2439 2440 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV 2441 // reduction opcode. Note that this returns a vector type, which must be 2442 // further processed to access the scalar result in element 0. 2443 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 2444 SelectionDAG &DAG) const { 2445 SDLoc DL(Op); 2446 MVT VecVT = Op.getOperand(0).getSimpleValueType(); 2447 MVT VecEltVT = VecVT.getVectorElementType(); 2448 2449 // Avoid creating vectors with illegal type. 2450 if (!isTypeLegal(VecVT)) 2451 return SDValue(); 2452 2453 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 2454 2455 SDValue Vec = Op.getOperand(0); 2456 2457 MVT ContainerVT = VecVT; 2458 if (VecVT.isFixedLengthVector()) { 2459 ContainerVT = getContainerForFixedLengthVector(VecVT); 2460 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2461 } 2462 2463 MVT M1VT = getLMUL1VT(ContainerVT); 2464 2465 SDValue Mask, VL; 2466 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2467 2468 // FIXME: This is a VLMAX splat which might be too large and can prevent 2469 // vsetvli removal. 2470 SDValue NeutralElem = DAG.getNeutralElement( 2471 ISD::getVecReduceBaseOpcode(Op.getOpcode()), DL, VecEltVT, SDNodeFlags()); 2472 SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem); 2473 SDValue Reduction = 2474 DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL); 2475 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2476 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2477 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 2478 } 2479 2480 // Given a reduction op, this function returns the matching reduction opcode, 2481 // the vector SDValue and the scalar SDValue required to lower this to a 2482 // RISCVISD node. 2483 static std::tuple<unsigned, SDValue, SDValue> 2484 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 2485 SDLoc DL(Op); 2486 switch (Op.getOpcode()) { 2487 default: 2488 llvm_unreachable("Unhandled reduction"); 2489 case ISD::VECREDUCE_FADD: 2490 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), 2491 DAG.getConstantFP(0.0, DL, EltVT)); 2492 case ISD::VECREDUCE_SEQ_FADD: 2493 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 2494 Op.getOperand(0)); 2495 } 2496 } 2497 2498 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 2499 SelectionDAG &DAG) const { 2500 SDLoc DL(Op); 2501 MVT VecEltVT = Op.getSimpleValueType(); 2502 2503 unsigned RVVOpcode; 2504 SDValue VectorVal, ScalarVal; 2505 std::tie(RVVOpcode, VectorVal, ScalarVal) = 2506 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 2507 MVT VecVT = VectorVal.getSimpleValueType(); 2508 2509 MVT ContainerVT = VecVT; 2510 if (VecVT.isFixedLengthVector()) { 2511 ContainerVT = getContainerForFixedLengthVector(VecVT); 2512 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 2513 } 2514 2515 MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); 2516 2517 SDValue Mask, VL; 2518 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 2519 2520 // FIXME: This is a VLMAX splat which might be too large and can prevent 2521 // vsetvli removal. 2522 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 2523 SDValue Reduction = 2524 DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL); 2525 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2526 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2527 } 2528 2529 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 2530 SelectionDAG &DAG) const { 2531 SDValue Vec = Op.getOperand(0); 2532 SDValue SubVec = Op.getOperand(1); 2533 MVT VecVT = Vec.getSimpleValueType(); 2534 MVT SubVecVT = SubVec.getSimpleValueType(); 2535 2536 SDLoc DL(Op); 2537 MVT XLenVT = Subtarget.getXLenVT(); 2538 unsigned OrigIdx = Op.getConstantOperandVal(2); 2539 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2540 2541 // We don't have the ability to slide mask vectors up indexed by their i1 2542 // elements; the smallest we can do is i8. Often we are able to bitcast to 2543 // equivalent i8 vectors. Note that when inserting a fixed-length vector 2544 // into a scalable one, we might not necessarily have enough scalable 2545 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 2546 if (SubVecVT.getVectorElementType() == MVT::i1 && 2547 (OrigIdx != 0 || !Vec.isUndef())) { 2548 if (VecVT.getVectorMinNumElements() >= 8 && 2549 SubVecVT.getVectorMinNumElements() >= 8) { 2550 assert(OrigIdx % 8 == 0 && "Invalid index"); 2551 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 2552 SubVecVT.getVectorMinNumElements() % 8 == 0 && 2553 "Unexpected mask vector lowering"); 2554 OrigIdx /= 8; 2555 SubVecVT = 2556 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 2557 SubVecVT.isScalableVector()); 2558 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 2559 VecVT.isScalableVector()); 2560 Vec = DAG.getBitcast(VecVT, Vec); 2561 SubVec = DAG.getBitcast(SubVecVT, SubVec); 2562 } else { 2563 // We can't slide this mask vector up indexed by its i1 elements. 2564 // This poses a problem when we wish to insert a scalable vector which 2565 // can't be re-expressed as a larger type. Just choose the slow path and 2566 // extend to a larger type, then truncate back down. 2567 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 2568 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 2569 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 2570 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 2571 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 2572 Op.getOperand(2)); 2573 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 2574 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 2575 } 2576 } 2577 2578 // If the subvector vector is a fixed-length type, we cannot use subregister 2579 // manipulation to simplify the codegen; we don't know which register of a 2580 // LMUL group contains the specific subvector as we only know the minimum 2581 // register size. Therefore we must slide the vector group up the full 2582 // amount. 2583 if (SubVecVT.isFixedLengthVector()) { 2584 if (OrigIdx == 0 && Vec.isUndef()) 2585 return Op; 2586 MVT ContainerVT = VecVT; 2587 if (VecVT.isFixedLengthVector()) { 2588 ContainerVT = getContainerForFixedLengthVector(VecVT); 2589 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2590 } 2591 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 2592 DAG.getUNDEF(ContainerVT), SubVec, 2593 DAG.getConstant(0, DL, XLenVT)); 2594 SDValue Mask = 2595 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 2596 // Set the vector length to only the number of elements we care about. Note 2597 // that for slideup this includes the offset. 2598 SDValue VL = 2599 DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT); 2600 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 2601 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 2602 SubVec, SlideupAmt, Mask, VL); 2603 if (!VecVT.isFixedLengthVector()) 2604 return Slideup; 2605 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 2606 } 2607 2608 unsigned SubRegIdx, RemIdx; 2609 std::tie(SubRegIdx, RemIdx) = 2610 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2611 VecVT, SubVecVT, OrigIdx, TRI); 2612 2613 RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 2614 bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 || 2615 SubVecLMUL == RISCVVLMUL::LMUL_F4 || 2616 SubVecLMUL == RISCVVLMUL::LMUL_F8; 2617 2618 // 1. If the Idx has been completely eliminated and this subvector's size is 2619 // a vector register or a multiple thereof, or the surrounding elements are 2620 // undef, then this is a subvector insert which naturally aligns to a vector 2621 // register. These can easily be handled using subregister manipulation. 2622 // 2. If the subvector is smaller than a vector register, then the insertion 2623 // must preserve the undisturbed elements of the register. We do this by 2624 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 2625 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 2626 // subvector within the vector register, and an INSERT_SUBVECTOR of that 2627 // LMUL=1 type back into the larger vector (resolving to another subregister 2628 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 2629 // to avoid allocating a large register group to hold our subvector. 2630 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 2631 return Op; 2632 2633 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 2634 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 2635 // (in our case undisturbed). This means we can set up a subvector insertion 2636 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 2637 // size of the subvector. 2638 MVT InterSubVT = VecVT; 2639 SDValue AlignedExtract = Vec; 2640 unsigned AlignedIdx = OrigIdx - RemIdx; 2641 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 2642 InterSubVT = getLMUL1VT(VecVT); 2643 // Extract a subvector equal to the nearest full vector register type. This 2644 // should resolve to a EXTRACT_SUBREG instruction. 2645 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 2646 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2647 } 2648 2649 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT); 2650 // For scalable vectors this must be further multiplied by vscale. 2651 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt); 2652 2653 SDValue Mask, VL; 2654 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 2655 2656 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 2657 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT); 2658 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL); 2659 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 2660 2661 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 2662 DAG.getUNDEF(InterSubVT), SubVec, 2663 DAG.getConstant(0, DL, XLenVT)); 2664 2665 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT, 2666 AlignedExtract, SubVec, SlideupAmt, Mask, VL); 2667 2668 // If required, insert this subvector back into the correct vector register. 2669 // This should resolve to an INSERT_SUBREG instruction. 2670 if (VecVT.bitsGT(InterSubVT)) 2671 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup, 2672 DAG.getConstant(AlignedIdx, DL, XLenVT)); 2673 2674 // We might have bitcast from a mask type: cast back to the original type if 2675 // required. 2676 return DAG.getBitcast(Op.getSimpleValueType(), Slideup); 2677 } 2678 2679 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 2680 SelectionDAG &DAG) const { 2681 SDValue Vec = Op.getOperand(0); 2682 MVT SubVecVT = Op.getSimpleValueType(); 2683 MVT VecVT = Vec.getSimpleValueType(); 2684 2685 SDLoc DL(Op); 2686 MVT XLenVT = Subtarget.getXLenVT(); 2687 unsigned OrigIdx = Op.getConstantOperandVal(1); 2688 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2689 2690 // We don't have the ability to slide mask vectors down indexed by their i1 2691 // elements; the smallest we can do is i8. Often we are able to bitcast to 2692 // equivalent i8 vectors. Note that when extracting a fixed-length vector 2693 // from a scalable one, we might not necessarily have enough scalable 2694 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 2695 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 2696 if (VecVT.getVectorMinNumElements() >= 8 && 2697 SubVecVT.getVectorMinNumElements() >= 8) { 2698 assert(OrigIdx % 8 == 0 && "Invalid index"); 2699 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 2700 SubVecVT.getVectorMinNumElements() % 8 == 0 && 2701 "Unexpected mask vector lowering"); 2702 OrigIdx /= 8; 2703 SubVecVT = 2704 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 2705 SubVecVT.isScalableVector()); 2706 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 2707 VecVT.isScalableVector()); 2708 Vec = DAG.getBitcast(VecVT, Vec); 2709 } else { 2710 // We can't slide this mask vector down, indexed by its i1 elements. 2711 // This poses a problem when we wish to extract a scalable vector which 2712 // can't be re-expressed as a larger type. Just choose the slow path and 2713 // extend to a larger type, then truncate back down. 2714 // TODO: We could probably improve this when extracting certain fixed 2715 // from fixed, where we can extract as i8 and shift the correct element 2716 // right to reach the desired subvector? 2717 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 2718 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 2719 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 2720 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 2721 Op.getOperand(1)); 2722 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 2723 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 2724 } 2725 } 2726 2727 // If the subvector vector is a fixed-length type, we cannot use subregister 2728 // manipulation to simplify the codegen; we don't know which register of a 2729 // LMUL group contains the specific subvector as we only know the minimum 2730 // register size. Therefore we must slide the vector group down the full 2731 // amount. 2732 if (SubVecVT.isFixedLengthVector()) { 2733 // With an index of 0 this is a cast-like subvector, which can be performed 2734 // with subregister operations. 2735 if (OrigIdx == 0) 2736 return Op; 2737 MVT ContainerVT = VecVT; 2738 if (VecVT.isFixedLengthVector()) { 2739 ContainerVT = getContainerForFixedLengthVector(VecVT); 2740 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2741 } 2742 SDValue Mask = 2743 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 2744 // Set the vector length to only the number of elements we care about. This 2745 // avoids sliding down elements we're going to discard straight away. 2746 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT); 2747 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 2748 SDValue Slidedown = 2749 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 2750 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 2751 // Now we can use a cast-like subvector extract to get the result. 2752 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 2753 DAG.getConstant(0, DL, XLenVT)); 2754 } 2755 2756 unsigned SubRegIdx, RemIdx; 2757 std::tie(SubRegIdx, RemIdx) = 2758 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2759 VecVT, SubVecVT, OrigIdx, TRI); 2760 2761 // If the Idx has been completely eliminated then this is a subvector extract 2762 // which naturally aligns to a vector register. These can easily be handled 2763 // using subregister manipulation. 2764 if (RemIdx == 0) 2765 return Op; 2766 2767 // Else we must shift our vector register directly to extract the subvector. 2768 // Do this using VSLIDEDOWN. 2769 2770 // If the vector type is an LMUL-group type, extract a subvector equal to the 2771 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG 2772 // instruction. 2773 MVT InterSubVT = VecVT; 2774 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 2775 InterSubVT = getLMUL1VT(VecVT); 2776 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 2777 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); 2778 } 2779 2780 // Slide this vector register down by the desired number of elements in order 2781 // to place the desired subvector starting at element 0. 2782 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); 2783 // For scalable vectors this must be further multiplied by vscale. 2784 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); 2785 2786 SDValue Mask, VL; 2787 std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 2788 SDValue Slidedown = 2789 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT, 2790 DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL); 2791 2792 // Now the vector is in the right position, extract our final subvector. This 2793 // should resolve to a COPY. 2794 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 2795 DAG.getConstant(0, DL, XLenVT)); 2796 2797 // We might have bitcast from a mask type: cast back to the original type if 2798 // required. 2799 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 2800 } 2801 2802 // Implement vector_reverse using vrgather.vv with indices determined by 2803 // subtracting the id of each element from (VLMAX-1). This will convert 2804 // the indices like so: 2805 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 2806 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 2807 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 2808 SelectionDAG &DAG) const { 2809 SDLoc DL(Op); 2810 MVT VecVT = Op.getSimpleValueType(); 2811 unsigned EltSize = VecVT.getScalarSizeInBits(); 2812 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 2813 2814 unsigned MaxVLMAX = 0; 2815 unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits(); 2816 if (VectorBitsMax != 0) 2817 MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; 2818 2819 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 2820 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 2821 2822 // If this is SEW=8 and VLMAX is unknown or more than 256, we need 2823 // to use vrgatherei16.vv. 2824 // TODO: It's also possible to use vrgatherei16.vv for other types to 2825 // decrease register width for the index calculation. 2826 if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) { 2827 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 2828 // Reverse each half, then reassemble them in reverse order. 2829 // NOTE: It's also possible that after splitting that VLMAX no longer 2830 // requires vrgatherei16.vv. 2831 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 2832 SDValue Lo, Hi; 2833 std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); 2834 EVT LoVT, HiVT; 2835 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); 2836 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 2837 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 2838 // Reassemble the low and high pieces reversed. 2839 // FIXME: This is a CONCAT_VECTORS. 2840 SDValue Res = 2841 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 2842 DAG.getIntPtrConstant(0, DL)); 2843 return DAG.getNode( 2844 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 2845 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 2846 } 2847 2848 // Just promote the int type to i16 which will double the LMUL. 2849 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 2850 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 2851 } 2852 2853 MVT XLenVT = Subtarget.getXLenVT(); 2854 SDValue Mask, VL; 2855 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 2856 2857 // Calculate VLMAX-1 for the desired SEW. 2858 unsigned MinElts = VecVT.getVectorMinNumElements(); 2859 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 2860 DAG.getConstant(MinElts, DL, XLenVT)); 2861 SDValue VLMinus1 = 2862 DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT)); 2863 2864 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 2865 bool IsRV32E64 = 2866 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 2867 SDValue SplatVL; 2868 if (!IsRV32E64) 2869 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 2870 else 2871 SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1); 2872 2873 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 2874 SDValue Indices = 2875 DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL); 2876 2877 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL); 2878 } 2879 2880 SDValue 2881 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 2882 SelectionDAG &DAG) const { 2883 auto *Load = cast<LoadSDNode>(Op); 2884 2885 SDLoc DL(Op); 2886 MVT VT = Op.getSimpleValueType(); 2887 MVT ContainerVT = getContainerForFixedLengthVector(VT); 2888 2889 SDValue VL = 2890 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2891 2892 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 2893 SDValue NewLoad = DAG.getMemIntrinsicNode( 2894 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 2895 Load->getMemoryVT(), Load->getMemOperand()); 2896 2897 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 2898 return DAG.getMergeValues({Result, Load->getChain()}, DL); 2899 } 2900 2901 SDValue 2902 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 2903 SelectionDAG &DAG) const { 2904 auto *Store = cast<StoreSDNode>(Op); 2905 2906 SDLoc DL(Op); 2907 MVT VT = Store->getValue().getSimpleValueType(); 2908 2909 // FIXME: We probably need to zero any extra bits in a byte for mask stores. 2910 // This is tricky to do. 2911 2912 MVT ContainerVT = getContainerForFixedLengthVector(VT); 2913 2914 SDValue VL = 2915 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2916 2917 SDValue NewValue = 2918 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 2919 return DAG.getMemIntrinsicNode( 2920 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 2921 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 2922 Store->getMemoryVT(), Store->getMemOperand()); 2923 } 2924 2925 SDValue 2926 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 2927 SelectionDAG &DAG) const { 2928 MVT InVT = Op.getOperand(0).getSimpleValueType(); 2929 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 2930 2931 MVT VT = Op.getSimpleValueType(); 2932 2933 SDValue Op1 = 2934 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 2935 SDValue Op2 = 2936 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 2937 2938 SDLoc DL(Op); 2939 SDValue VL = 2940 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2941 2942 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2943 2944 bool Invert = false; 2945 Optional<unsigned> LogicOpc; 2946 if (ContainerVT.isFloatingPoint()) { 2947 bool Swap = false; 2948 switch (CC) { 2949 default: 2950 break; 2951 case ISD::SETULE: 2952 case ISD::SETULT: 2953 Swap = true; 2954 LLVM_FALLTHROUGH; 2955 case ISD::SETUGE: 2956 case ISD::SETUGT: 2957 CC = getSetCCInverse(CC, ContainerVT); 2958 Invert = true; 2959 break; 2960 case ISD::SETOGE: 2961 case ISD::SETOGT: 2962 case ISD::SETGE: 2963 case ISD::SETGT: 2964 Swap = true; 2965 break; 2966 case ISD::SETUEQ: 2967 // Use !((OLT Op1, Op2) || (OLT Op2, Op1)) 2968 Invert = true; 2969 LogicOpc = RISCVISD::VMOR_VL; 2970 CC = ISD::SETOLT; 2971 break; 2972 case ISD::SETONE: 2973 // Use ((OLT Op1, Op2) || (OLT Op2, Op1)) 2974 LogicOpc = RISCVISD::VMOR_VL; 2975 CC = ISD::SETOLT; 2976 break; 2977 case ISD::SETO: 2978 // Use (OEQ Op1, Op1) && (OEQ Op2, Op2) 2979 LogicOpc = RISCVISD::VMAND_VL; 2980 CC = ISD::SETOEQ; 2981 break; 2982 case ISD::SETUO: 2983 // Use (UNE Op1, Op1) || (UNE Op2, Op2) 2984 LogicOpc = RISCVISD::VMOR_VL; 2985 CC = ISD::SETUNE; 2986 break; 2987 } 2988 2989 if (Swap) { 2990 CC = getSetCCSwappedOperands(CC); 2991 std::swap(Op1, Op2); 2992 } 2993 } 2994 2995 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2996 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2997 2998 // There are 3 cases we need to emit. 2999 // 1. For (OEQ Op1, Op1) && (OEQ Op2, Op2) or (UNE Op1, Op1) || (UNE Op2, Op2) 3000 // we need to compare each operand with itself. 3001 // 2. For (OLT Op1, Op2) || (OLT Op2, Op1) we need to compare Op1 and Op2 in 3002 // both orders. 3003 // 3. For any other case we just need one compare with Op1 and Op2. 3004 SDValue Cmp; 3005 if (LogicOpc && (CC == ISD::SETOEQ || CC == ISD::SETUNE)) { 3006 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op1, 3007 DAG.getCondCode(CC), Mask, VL); 3008 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op2, 3009 DAG.getCondCode(CC), Mask, VL); 3010 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 3011 } else { 3012 Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, 3013 DAG.getCondCode(CC), Mask, VL); 3014 if (LogicOpc) { 3015 SDValue Cmp2 = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op2, Op1, 3016 DAG.getCondCode(CC), Mask, VL); 3017 Cmp = DAG.getNode(*LogicOpc, DL, MaskVT, Cmp, Cmp2, VL); 3018 } 3019 } 3020 3021 if (Invert) { 3022 SDValue AllOnes = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3023 Cmp = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT, Cmp, AllOnes, VL); 3024 } 3025 3026 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 3027 } 3028 3029 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( 3030 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { 3031 MVT VT = Op.getSimpleValueType(); 3032 3033 if (VT.getVectorElementType() == MVT::i1) 3034 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); 3035 3036 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); 3037 } 3038 3039 // Lower vector ABS to smax(X, sub(0, X)). 3040 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 3041 SDLoc DL(Op); 3042 MVT VT = Op.getSimpleValueType(); 3043 SDValue X = Op.getOperand(0); 3044 3045 // For scalable vectors we just need to deal with i64 on RV32 since the 3046 // default expansion crashes in getConstant. 3047 if (VT.isScalableVector()) { 3048 assert(!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64 && 3049 "Unexpected custom lowering!"); 3050 SDValue SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, 3051 DAG.getConstant(0, DL, MVT::i32)); 3052 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, SplatZero, X); 3053 return DAG.getNode(ISD::SMAX, DL, VT, X, NegX); 3054 } 3055 3056 assert(VT.isFixedLengthVector() && "Unexpected type"); 3057 3058 MVT ContainerVT = 3059 RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); 3060 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 3061 3062 SDValue Mask, VL; 3063 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3064 3065 SDValue SplatZero = 3066 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3067 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 3068 SDValue NegX = 3069 DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL); 3070 SDValue Max = 3071 DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL); 3072 3073 return convertFromScalableVector(VT, Max, DAG, Subtarget); 3074 } 3075 3076 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 3077 SDValue Op, SelectionDAG &DAG) const { 3078 MVT VT = Op.getSimpleValueType(); 3079 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3080 3081 MVT I1ContainerVT = 3082 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3083 3084 SDValue CC = 3085 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 3086 SDValue Op1 = 3087 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 3088 SDValue Op2 = 3089 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 3090 3091 SDLoc DL(Op); 3092 SDValue Mask, VL; 3093 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3094 3095 SDValue Select = 3096 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 3097 3098 return convertFromScalableVector(VT, Select, DAG, Subtarget); 3099 } 3100 3101 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 3102 unsigned NewOpc, 3103 bool HasMask) const { 3104 MVT VT = Op.getSimpleValueType(); 3105 assert(useRVVForFixedLengthVectorVT(VT) && 3106 "Only expected to lower fixed length vector operation!"); 3107 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3108 3109 // Create list of operands by converting existing ones to scalable types. 3110 SmallVector<SDValue, 6> Ops; 3111 for (const SDValue &V : Op->op_values()) { 3112 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 3113 3114 // Pass through non-vector operands. 3115 if (!V.getValueType().isVector()) { 3116 Ops.push_back(V); 3117 continue; 3118 } 3119 3120 // "cast" fixed length vector to a scalable vector. 3121 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 3122 "Only fixed length vectors are supported!"); 3123 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 3124 } 3125 3126 SDLoc DL(Op); 3127 SDValue Mask, VL; 3128 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3129 if (HasMask) 3130 Ops.push_back(Mask); 3131 Ops.push_back(VL); 3132 3133 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 3134 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 3135 } 3136 3137 // Returns the opcode of the target-specific SDNode that implements the 32-bit 3138 // form of the given Opcode. 3139 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 3140 switch (Opcode) { 3141 default: 3142 llvm_unreachable("Unexpected opcode"); 3143 case ISD::SHL: 3144 return RISCVISD::SLLW; 3145 case ISD::SRA: 3146 return RISCVISD::SRAW; 3147 case ISD::SRL: 3148 return RISCVISD::SRLW; 3149 case ISD::SDIV: 3150 return RISCVISD::DIVW; 3151 case ISD::UDIV: 3152 return RISCVISD::DIVUW; 3153 case ISD::UREM: 3154 return RISCVISD::REMUW; 3155 case ISD::ROTL: 3156 return RISCVISD::ROLW; 3157 case ISD::ROTR: 3158 return RISCVISD::RORW; 3159 case RISCVISD::GREVI: 3160 return RISCVISD::GREVIW; 3161 case RISCVISD::GORCI: 3162 return RISCVISD::GORCIW; 3163 } 3164 } 3165 3166 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 3167 // Because i32 isn't a legal type for RV64, these operations would otherwise 3168 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 3169 // later one because the fact the operation was originally of type i32 is 3170 // lost. 3171 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 3172 unsigned ExtOpc = ISD::ANY_EXTEND) { 3173 SDLoc DL(N); 3174 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3175 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 3176 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 3177 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 3178 // ReplaceNodeResults requires we maintain the same type for the return value. 3179 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 3180 } 3181 3182 // Converts the given 32-bit operation to a i64 operation with signed extension 3183 // semantic to reduce the signed extension instructions. 3184 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 3185 SDLoc DL(N); 3186 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3187 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3188 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 3189 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 3190 DAG.getValueType(MVT::i32)); 3191 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 3192 } 3193 3194 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 3195 SmallVectorImpl<SDValue> &Results, 3196 SelectionDAG &DAG) const { 3197 SDLoc DL(N); 3198 switch (N->getOpcode()) { 3199 default: 3200 llvm_unreachable("Don't know how to custom type legalize this operation!"); 3201 case ISD::STRICT_FP_TO_SINT: 3202 case ISD::STRICT_FP_TO_UINT: 3203 case ISD::FP_TO_SINT: 3204 case ISD::FP_TO_UINT: { 3205 bool IsStrict = N->isStrictFPOpcode(); 3206 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3207 "Unexpected custom legalisation"); 3208 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 3209 // If the FP type needs to be softened, emit a library call using the 'si' 3210 // version. If we left it to default legalization we'd end up with 'di'. If 3211 // the FP type doesn't need to be softened just let generic type 3212 // legalization promote the result type. 3213 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 3214 TargetLowering::TypeSoftenFloat) 3215 return; 3216 RTLIB::Libcall LC; 3217 if (N->getOpcode() == ISD::FP_TO_SINT || 3218 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 3219 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 3220 else 3221 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 3222 MakeLibCallOptions CallOptions; 3223 EVT OpVT = Op0.getValueType(); 3224 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 3225 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 3226 SDValue Result; 3227 std::tie(Result, Chain) = 3228 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 3229 Results.push_back(Result); 3230 if (IsStrict) 3231 Results.push_back(Chain); 3232 break; 3233 } 3234 case ISD::READCYCLECOUNTER: { 3235 assert(!Subtarget.is64Bit() && 3236 "READCYCLECOUNTER only has custom type legalization on riscv32"); 3237 3238 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 3239 SDValue RCW = 3240 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 3241 3242 Results.push_back( 3243 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 3244 Results.push_back(RCW.getValue(2)); 3245 break; 3246 } 3247 case ISD::ADD: 3248 case ISD::SUB: 3249 case ISD::MUL: 3250 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3251 "Unexpected custom legalisation"); 3252 if (N->getOperand(1).getOpcode() == ISD::Constant) 3253 return; 3254 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 3255 break; 3256 case ISD::SHL: 3257 case ISD::SRA: 3258 case ISD::SRL: 3259 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3260 "Unexpected custom legalisation"); 3261 if (N->getOperand(1).getOpcode() == ISD::Constant) 3262 return; 3263 Results.push_back(customLegalizeToWOp(N, DAG)); 3264 break; 3265 case ISD::ROTL: 3266 case ISD::ROTR: 3267 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3268 "Unexpected custom legalisation"); 3269 Results.push_back(customLegalizeToWOp(N, DAG)); 3270 break; 3271 case ISD::SDIV: 3272 case ISD::UDIV: 3273 case ISD::UREM: { 3274 MVT VT = N->getSimpleValueType(0); 3275 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 3276 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 3277 "Unexpected custom legalisation"); 3278 if (N->getOperand(0).getOpcode() == ISD::Constant || 3279 N->getOperand(1).getOpcode() == ISD::Constant) 3280 return; 3281 3282 // If the input is i32, use ANY_EXTEND since the W instructions don't read 3283 // the upper 32 bits. For other types we need to sign or zero extend 3284 // based on the opcode. 3285 unsigned ExtOpc = ISD::ANY_EXTEND; 3286 if (VT != MVT::i32) 3287 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 3288 : ISD::ZERO_EXTEND; 3289 3290 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 3291 break; 3292 } 3293 case ISD::BITCAST: { 3294 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3295 Subtarget.hasStdExtF()) || 3296 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 3297 "Unexpected custom legalisation"); 3298 SDValue Op0 = N->getOperand(0); 3299 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 3300 if (Op0.getValueType() != MVT::f16) 3301 return; 3302 SDValue FPConv = 3303 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 3304 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 3305 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3306 Subtarget.hasStdExtF()) { 3307 if (Op0.getValueType() != MVT::f32) 3308 return; 3309 SDValue FPConv = 3310 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 3311 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 3312 } 3313 break; 3314 } 3315 case RISCVISD::GREVI: 3316 case RISCVISD::GORCI: { 3317 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3318 "Unexpected custom legalisation"); 3319 // This is similar to customLegalizeToWOp, except that we pass the second 3320 // operand (a TargetConstant) straight through: it is already of type 3321 // XLenVT. 3322 SDLoc DL(N); 3323 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 3324 SDValue NewOp0 = 3325 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3326 SDValue NewRes = 3327 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 3328 // ReplaceNodeResults requires we maintain the same type for the return 3329 // value. 3330 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 3331 break; 3332 } 3333 case RISCVISD::SHFLI: { 3334 // There is no SHFLIW instruction, but we can just promote the operation. 3335 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3336 "Unexpected custom legalisation"); 3337 SDLoc DL(N); 3338 SDValue NewOp0 = 3339 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3340 SDValue NewRes = 3341 DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1)); 3342 // ReplaceNodeResults requires we maintain the same type for the return 3343 // value. 3344 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 3345 break; 3346 } 3347 case ISD::BSWAP: 3348 case ISD::BITREVERSE: { 3349 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3350 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 3351 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 3352 N->getOperand(0)); 3353 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 3354 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 3355 DAG.getTargetConstant(Imm, DL, 3356 Subtarget.getXLenVT())); 3357 // ReplaceNodeResults requires we maintain the same type for the return 3358 // value. 3359 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 3360 break; 3361 } 3362 case ISD::FSHL: 3363 case ISD::FSHR: { 3364 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 3365 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 3366 SDValue NewOp0 = 3367 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 3368 SDValue NewOp1 = 3369 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 3370 SDValue NewOp2 = 3371 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 3372 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 3373 // Mask the shift amount to 5 bits. 3374 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 3375 DAG.getConstant(0x1f, DL, MVT::i64)); 3376 unsigned Opc = 3377 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 3378 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 3379 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 3380 break; 3381 } 3382 case ISD::EXTRACT_VECTOR_ELT: { 3383 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 3384 // type is illegal (currently only vXi64 RV32). 3385 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 3386 // transferred to the destination register. We issue two of these from the 3387 // upper- and lower- halves of the SEW-bit vector element, slid down to the 3388 // first element. 3389 SDLoc DL(N); 3390 SDValue Vec = N->getOperand(0); 3391 SDValue Idx = N->getOperand(1); 3392 3393 // The vector type hasn't been legalized yet so we can't issue target 3394 // specific nodes if it needs legalization. 3395 // FIXME: We would manually legalize if it's important. 3396 if (!isTypeLegal(Vec.getValueType())) 3397 return; 3398 3399 MVT VecVT = Vec.getSimpleValueType(); 3400 3401 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 3402 VecVT.getVectorElementType() == MVT::i64 && 3403 "Unexpected EXTRACT_VECTOR_ELT legalization"); 3404 3405 // If this is a fixed vector, we need to convert it to a scalable vector. 3406 MVT ContainerVT = VecVT; 3407 if (VecVT.isFixedLengthVector()) { 3408 ContainerVT = getContainerForFixedLengthVector(VecVT); 3409 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3410 } 3411 3412 MVT XLenVT = Subtarget.getXLenVT(); 3413 3414 // Use a VL of 1 to avoid processing more elements than we need. 3415 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 3416 SDValue VL = DAG.getConstant(1, DL, XLenVT); 3417 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3418 3419 // Unless the index is known to be 0, we must slide the vector down to get 3420 // the desired element into index 0. 3421 if (!isNullConstant(Idx)) { 3422 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 3423 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 3424 } 3425 3426 // Extract the lower XLEN bits of the correct vector element. 3427 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 3428 3429 // To extract the upper XLEN bits of the vector element, shift the first 3430 // element right by 32 bits and re-extract the lower XLEN bits. 3431 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 3432 DAG.getConstant(32, DL, XLenVT), VL); 3433 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, 3434 ThirtyTwoV, Mask, VL); 3435 3436 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 3437 3438 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 3439 break; 3440 } 3441 case ISD::INTRINSIC_WO_CHAIN: { 3442 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3443 switch (IntNo) { 3444 default: 3445 llvm_unreachable( 3446 "Don't know how to custom type legalize this intrinsic!"); 3447 case Intrinsic::riscv_vmv_x_s: { 3448 EVT VT = N->getValueType(0); 3449 assert((VT == MVT::i8 || VT == MVT::i16 || 3450 (Subtarget.is64Bit() && VT == MVT::i32)) && 3451 "Unexpected custom legalisation!"); 3452 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 3453 Subtarget.getXLenVT(), N->getOperand(1)); 3454 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 3455 break; 3456 } 3457 } 3458 break; 3459 } 3460 case ISD::VECREDUCE_ADD: 3461 case ISD::VECREDUCE_AND: 3462 case ISD::VECREDUCE_OR: 3463 case ISD::VECREDUCE_XOR: 3464 case ISD::VECREDUCE_SMAX: 3465 case ISD::VECREDUCE_UMAX: 3466 case ISD::VECREDUCE_SMIN: 3467 case ISD::VECREDUCE_UMIN: 3468 // The custom-lowering for these nodes returns a vector whose first element 3469 // is the result of the reduction. Extract its first element and let the 3470 // legalization for EXTRACT_VECTOR_ELT do the rest of the job. 3471 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 3472 Results.push_back(V); 3473 break; 3474 } 3475 } 3476 3477 // A structure to hold one of the bit-manipulation patterns below. Together, a 3478 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 3479 // (or (and (shl x, 1), 0xAAAAAAAA), 3480 // (and (srl x, 1), 0x55555555)) 3481 struct RISCVBitmanipPat { 3482 SDValue Op; 3483 unsigned ShAmt; 3484 bool IsSHL; 3485 3486 bool formsPairWith(const RISCVBitmanipPat &Other) const { 3487 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 3488 } 3489 }; 3490 3491 // Matches patterns of the form 3492 // (and (shl x, C2), (C1 << C2)) 3493 // (and (srl x, C2), C1) 3494 // (shl (and x, C1), C2) 3495 // (srl (and x, (C1 << C2)), C2) 3496 // Where C2 is a power of 2 and C1 has at least that many leading zeroes. 3497 // The expected masks for each shift amount are specified in BitmanipMasks where 3498 // BitmanipMasks[log2(C2)] specifies the expected C1 value. 3499 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether 3500 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible 3501 // XLen is 64. 3502 static Optional<RISCVBitmanipPat> 3503 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) { 3504 assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) && 3505 "Unexpected number of masks"); 3506 Optional<uint64_t> Mask; 3507 // Optionally consume a mask around the shift operation. 3508 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 3509 Mask = Op.getConstantOperandVal(1); 3510 Op = Op.getOperand(0); 3511 } 3512 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 3513 return None; 3514 bool IsSHL = Op.getOpcode() == ISD::SHL; 3515 3516 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3517 return None; 3518 uint64_t ShAmt = Op.getConstantOperandVal(1); 3519 3520 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 3521 if (ShAmt >= Width && !isPowerOf2_64(ShAmt)) 3522 return None; 3523 // If we don't have enough masks for 64 bit, then we must be trying to 3524 // match SHFL so we're only allowed to shift 1/4 of the width. 3525 if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2)) 3526 return None; 3527 3528 SDValue Src = Op.getOperand(0); 3529 3530 // The expected mask is shifted left when the AND is found around SHL 3531 // patterns. 3532 // ((x >> 1) & 0x55555555) 3533 // ((x << 1) & 0xAAAAAAAA) 3534 bool SHLExpMask = IsSHL; 3535 3536 if (!Mask) { 3537 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 3538 // the mask is all ones: consume that now. 3539 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 3540 Mask = Src.getConstantOperandVal(1); 3541 Src = Src.getOperand(0); 3542 // The expected mask is now in fact shifted left for SRL, so reverse the 3543 // decision. 3544 // ((x & 0xAAAAAAAA) >> 1) 3545 // ((x & 0x55555555) << 1) 3546 SHLExpMask = !SHLExpMask; 3547 } else { 3548 // Use a default shifted mask of all-ones if there's no AND, truncated 3549 // down to the expected width. This simplifies the logic later on. 3550 Mask = maskTrailingOnes<uint64_t>(Width); 3551 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 3552 } 3553 } 3554 3555 unsigned MaskIdx = Log2_32(ShAmt); 3556 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 3557 3558 if (SHLExpMask) 3559 ExpMask <<= ShAmt; 3560 3561 if (Mask != ExpMask) 3562 return None; 3563 3564 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 3565 } 3566 3567 // Matches any of the following bit-manipulation patterns: 3568 // (and (shl x, 1), (0x55555555 << 1)) 3569 // (and (srl x, 1), 0x55555555) 3570 // (shl (and x, 0x55555555), 1) 3571 // (srl (and x, (0x55555555 << 1)), 1) 3572 // where the shift amount and mask may vary thus: 3573 // [1] = 0x55555555 / 0xAAAAAAAA 3574 // [2] = 0x33333333 / 0xCCCCCCCC 3575 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 3576 // [8] = 0x00FF00FF / 0xFF00FF00 3577 // [16] = 0x0000FFFF / 0xFFFFFFFF 3578 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 3579 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) { 3580 // These are the unshifted masks which we use to match bit-manipulation 3581 // patterns. They may be shifted left in certain circumstances. 3582 static const uint64_t BitmanipMasks[] = { 3583 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 3584 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 3585 3586 return matchRISCVBitmanipPat(Op, BitmanipMasks); 3587 } 3588 3589 // Match the following pattern as a GREVI(W) operation 3590 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 3591 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 3592 const RISCVSubtarget &Subtarget) { 3593 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3594 EVT VT = Op.getValueType(); 3595 3596 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 3597 auto LHS = matchGREVIPat(Op.getOperand(0)); 3598 auto RHS = matchGREVIPat(Op.getOperand(1)); 3599 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 3600 SDLoc DL(Op); 3601 return DAG.getNode( 3602 RISCVISD::GREVI, DL, VT, LHS->Op, 3603 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 3604 } 3605 } 3606 return SDValue(); 3607 } 3608 3609 // Matches any the following pattern as a GORCI(W) operation 3610 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 3611 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 3612 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 3613 // Note that with the variant of 3., 3614 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 3615 // the inner pattern will first be matched as GREVI and then the outer 3616 // pattern will be matched to GORC via the first rule above. 3617 // 4. (or (rotl/rotr x, bitwidth/2), x) 3618 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 3619 const RISCVSubtarget &Subtarget) { 3620 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3621 EVT VT = Op.getValueType(); 3622 3623 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 3624 SDLoc DL(Op); 3625 SDValue Op0 = Op.getOperand(0); 3626 SDValue Op1 = Op.getOperand(1); 3627 3628 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 3629 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 3630 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 3631 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 3632 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 3633 if ((Reverse.getOpcode() == ISD::ROTL || 3634 Reverse.getOpcode() == ISD::ROTR) && 3635 Reverse.getOperand(0) == X && 3636 isa<ConstantSDNode>(Reverse.getOperand(1))) { 3637 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 3638 if (RotAmt == (VT.getSizeInBits() / 2)) 3639 return DAG.getNode( 3640 RISCVISD::GORCI, DL, VT, X, 3641 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 3642 } 3643 return SDValue(); 3644 }; 3645 3646 // Check for either commutable permutation of (or (GREVI x, shamt), x) 3647 if (SDValue V = MatchOROfReverse(Op0, Op1)) 3648 return V; 3649 if (SDValue V = MatchOROfReverse(Op1, Op0)) 3650 return V; 3651 3652 // OR is commutable so canonicalize its OR operand to the left 3653 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 3654 std::swap(Op0, Op1); 3655 if (Op0.getOpcode() != ISD::OR) 3656 return SDValue(); 3657 SDValue OrOp0 = Op0.getOperand(0); 3658 SDValue OrOp1 = Op0.getOperand(1); 3659 auto LHS = matchGREVIPat(OrOp0); 3660 // OR is commutable so swap the operands and try again: x might have been 3661 // on the left 3662 if (!LHS) { 3663 std::swap(OrOp0, OrOp1); 3664 LHS = matchGREVIPat(OrOp0); 3665 } 3666 auto RHS = matchGREVIPat(Op1); 3667 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 3668 return DAG.getNode( 3669 RISCVISD::GORCI, DL, VT, LHS->Op, 3670 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 3671 } 3672 } 3673 return SDValue(); 3674 } 3675 3676 // Matches any of the following bit-manipulation patterns: 3677 // (and (shl x, 1), (0x22222222 << 1)) 3678 // (and (srl x, 1), 0x22222222) 3679 // (shl (and x, 0x22222222), 1) 3680 // (srl (and x, (0x22222222 << 1)), 1) 3681 // where the shift amount and mask may vary thus: 3682 // [1] = 0x22222222 / 0x44444444 3683 // [2] = 0x0C0C0C0C / 0x3C3C3C3C 3684 // [4] = 0x00F000F0 / 0x0F000F00 3685 // [8] = 0x0000FF00 / 0x00FF0000 3686 // [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64) 3687 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) { 3688 // These are the unshifted masks which we use to match bit-manipulation 3689 // patterns. They may be shifted left in certain circumstances. 3690 static const uint64_t BitmanipMasks[] = { 3691 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL, 3692 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL}; 3693 3694 return matchRISCVBitmanipPat(Op, BitmanipMasks); 3695 } 3696 3697 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x) 3698 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, 3699 const RISCVSubtarget &Subtarget) { 3700 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 3701 EVT VT = Op.getValueType(); 3702 3703 if (VT != MVT::i32 && VT != Subtarget.getXLenVT()) 3704 return SDValue(); 3705 3706 SDValue Op0 = Op.getOperand(0); 3707 SDValue Op1 = Op.getOperand(1); 3708 3709 // Or is commutable so canonicalize the second OR to the LHS. 3710 if (Op0.getOpcode() != ISD::OR) 3711 std::swap(Op0, Op1); 3712 if (Op0.getOpcode() != ISD::OR) 3713 return SDValue(); 3714 3715 // We found an inner OR, so our operands are the operands of the inner OR 3716 // and the other operand of the outer OR. 3717 SDValue A = Op0.getOperand(0); 3718 SDValue B = Op0.getOperand(1); 3719 SDValue C = Op1; 3720 3721 auto Match1 = matchSHFLPat(A); 3722 auto Match2 = matchSHFLPat(B); 3723 3724 // If neither matched, we failed. 3725 if (!Match1 && !Match2) 3726 return SDValue(); 3727 3728 // We had at least one match. if one failed, try the remaining C operand. 3729 if (!Match1) { 3730 std::swap(A, C); 3731 Match1 = matchSHFLPat(A); 3732 if (!Match1) 3733 return SDValue(); 3734 } else if (!Match2) { 3735 std::swap(B, C); 3736 Match2 = matchSHFLPat(B); 3737 if (!Match2) 3738 return SDValue(); 3739 } 3740 assert(Match1 && Match2); 3741 3742 // Make sure our matches pair up. 3743 if (!Match1->formsPairWith(*Match2)) 3744 return SDValue(); 3745 3746 // All the remains is to make sure C is an AND with the same input, that masks 3747 // out the bits that are being shuffled. 3748 if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) || 3749 C.getOperand(0) != Match1->Op) 3750 return SDValue(); 3751 3752 uint64_t Mask = C.getConstantOperandVal(1); 3753 3754 static const uint64_t BitmanipMasks[] = { 3755 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL, 3756 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL, 3757 }; 3758 3759 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 3760 unsigned MaskIdx = Log2_32(Match1->ShAmt); 3761 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 3762 3763 if (Mask != ExpMask) 3764 return SDValue(); 3765 3766 SDLoc DL(Op); 3767 return DAG.getNode( 3768 RISCVISD::SHFLI, DL, VT, Match1->Op, 3769 DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT())); 3770 } 3771 3772 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 3773 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 3774 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 3775 // not undo itself, but they are redundant. 3776 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 3777 unsigned ShAmt1 = N->getConstantOperandVal(1); 3778 SDValue Src = N->getOperand(0); 3779 3780 if (Src.getOpcode() != N->getOpcode()) 3781 return SDValue(); 3782 3783 unsigned ShAmt2 = Src.getConstantOperandVal(1); 3784 Src = Src.getOperand(0); 3785 3786 unsigned CombinedShAmt; 3787 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 3788 CombinedShAmt = ShAmt1 | ShAmt2; 3789 else 3790 CombinedShAmt = ShAmt1 ^ ShAmt2; 3791 3792 if (CombinedShAmt == 0) 3793 return Src; 3794 3795 SDLoc DL(N); 3796 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 3797 DAG.getTargetConstant(CombinedShAmt, DL, 3798 N->getOperand(1).getValueType())); 3799 } 3800 3801 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 3802 DAGCombinerInfo &DCI) const { 3803 SelectionDAG &DAG = DCI.DAG; 3804 3805 switch (N->getOpcode()) { 3806 default: 3807 break; 3808 case RISCVISD::SplitF64: { 3809 SDValue Op0 = N->getOperand(0); 3810 // If the input to SplitF64 is just BuildPairF64 then the operation is 3811 // redundant. Instead, use BuildPairF64's operands directly. 3812 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 3813 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 3814 3815 SDLoc DL(N); 3816 3817 // It's cheaper to materialise two 32-bit integers than to load a double 3818 // from the constant pool and transfer it to integer registers through the 3819 // stack. 3820 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 3821 APInt V = C->getValueAPF().bitcastToAPInt(); 3822 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 3823 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 3824 return DCI.CombineTo(N, Lo, Hi); 3825 } 3826 3827 // This is a target-specific version of a DAGCombine performed in 3828 // DAGCombiner::visitBITCAST. It performs the equivalent of: 3829 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 3830 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 3831 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 3832 !Op0.getNode()->hasOneUse()) 3833 break; 3834 SDValue NewSplitF64 = 3835 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 3836 Op0.getOperand(0)); 3837 SDValue Lo = NewSplitF64.getValue(0); 3838 SDValue Hi = NewSplitF64.getValue(1); 3839 APInt SignBit = APInt::getSignMask(32); 3840 if (Op0.getOpcode() == ISD::FNEG) { 3841 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 3842 DAG.getConstant(SignBit, DL, MVT::i32)); 3843 return DCI.CombineTo(N, Lo, NewHi); 3844 } 3845 assert(Op0.getOpcode() == ISD::FABS); 3846 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 3847 DAG.getConstant(~SignBit, DL, MVT::i32)); 3848 return DCI.CombineTo(N, Lo, NewHi); 3849 } 3850 case RISCVISD::SLLW: 3851 case RISCVISD::SRAW: 3852 case RISCVISD::SRLW: 3853 case RISCVISD::ROLW: 3854 case RISCVISD::RORW: { 3855 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 3856 SDValue LHS = N->getOperand(0); 3857 SDValue RHS = N->getOperand(1); 3858 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 3859 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 3860 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 3861 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 3862 if (N->getOpcode() != ISD::DELETED_NODE) 3863 DCI.AddToWorklist(N); 3864 return SDValue(N, 0); 3865 } 3866 break; 3867 } 3868 case RISCVISD::FSL: 3869 case RISCVISD::FSR: { 3870 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 3871 SDValue ShAmt = N->getOperand(2); 3872 unsigned BitWidth = ShAmt.getValueSizeInBits(); 3873 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 3874 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 3875 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 3876 if (N->getOpcode() != ISD::DELETED_NODE) 3877 DCI.AddToWorklist(N); 3878 return SDValue(N, 0); 3879 } 3880 break; 3881 } 3882 case RISCVISD::FSLW: 3883 case RISCVISD::FSRW: { 3884 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 3885 // read. 3886 SDValue Op0 = N->getOperand(0); 3887 SDValue Op1 = N->getOperand(1); 3888 SDValue ShAmt = N->getOperand(2); 3889 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 3890 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 3891 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 3892 SimplifyDemandedBits(Op1, OpMask, DCI) || 3893 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 3894 if (N->getOpcode() != ISD::DELETED_NODE) 3895 DCI.AddToWorklist(N); 3896 return SDValue(N, 0); 3897 } 3898 break; 3899 } 3900 case RISCVISD::GREVIW: 3901 case RISCVISD::GORCIW: { 3902 // Only the lower 32 bits of the first operand are read 3903 SDValue Op0 = N->getOperand(0); 3904 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 3905 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 3906 if (N->getOpcode() != ISD::DELETED_NODE) 3907 DCI.AddToWorklist(N); 3908 return SDValue(N, 0); 3909 } 3910 3911 return combineGREVI_GORCI(N, DCI.DAG); 3912 } 3913 case RISCVISD::FMV_X_ANYEXTW_RV64: { 3914 SDLoc DL(N); 3915 SDValue Op0 = N->getOperand(0); 3916 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 3917 // conversion is unnecessary and can be replaced with an ANY_EXTEND 3918 // of the FMV_W_X_RV64 operand. 3919 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 3920 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 3921 "Unexpected value type!"); 3922 return Op0.getOperand(0); 3923 } 3924 3925 // This is a target-specific version of a DAGCombine performed in 3926 // DAGCombiner::visitBITCAST. It performs the equivalent of: 3927 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 3928 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 3929 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 3930 !Op0.getNode()->hasOneUse()) 3931 break; 3932 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 3933 Op0.getOperand(0)); 3934 APInt SignBit = APInt::getSignMask(32).sext(64); 3935 if (Op0.getOpcode() == ISD::FNEG) 3936 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 3937 DAG.getConstant(SignBit, DL, MVT::i64)); 3938 3939 assert(Op0.getOpcode() == ISD::FABS); 3940 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 3941 DAG.getConstant(~SignBit, DL, MVT::i64)); 3942 } 3943 case RISCVISD::GREVI: 3944 case RISCVISD::GORCI: 3945 return combineGREVI_GORCI(N, DCI.DAG); 3946 case ISD::OR: 3947 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 3948 return GREV; 3949 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 3950 return GORC; 3951 if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget)) 3952 return SHFL; 3953 break; 3954 case RISCVISD::SELECT_CC: { 3955 // Transform 3956 SDValue LHS = N->getOperand(0); 3957 SDValue RHS = N->getOperand(1); 3958 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 3959 if (!ISD::isIntEqualitySetCC(CCVal)) 3960 break; 3961 3962 // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) -> 3963 // (select_cc X, Y, lt, trueV, falseV) 3964 // Sometimes the setcc is introduced after select_cc has been formed. 3965 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 3966 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 3967 // If we're looking for eq 0 instead of ne 0, we need to invert the 3968 // condition. 3969 bool Invert = CCVal == ISD::SETEQ; 3970 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 3971 if (Invert) 3972 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 3973 3974 RHS = LHS.getOperand(1); 3975 LHS = LHS.getOperand(0); 3976 normaliseSetCC(LHS, RHS, CCVal); 3977 3978 SDLoc DL(N); 3979 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 3980 return DAG.getNode( 3981 RISCVISD::SELECT_CC, DL, N->getValueType(0), 3982 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 3983 } 3984 3985 // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) -> 3986 // (select_cc X, Y, eq/ne, trueV, falseV) 3987 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 3988 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0), 3989 {LHS.getOperand(0), LHS.getOperand(1), 3990 N->getOperand(2), N->getOperand(3), 3991 N->getOperand(4)}); 3992 // (select_cc X, 1, setne, trueV, falseV) -> 3993 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 3994 // This can occur when legalizing some floating point comparisons. 3995 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 3996 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 3997 SDLoc DL(N); 3998 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 3999 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 4000 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 4001 return DAG.getNode( 4002 RISCVISD::SELECT_CC, DL, N->getValueType(0), 4003 {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)}); 4004 } 4005 4006 break; 4007 } 4008 case ISD::SETCC: { 4009 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. 4010 // Comparing with 0 may allow us to fold into bnez/beqz. 4011 SDValue LHS = N->getOperand(0); 4012 SDValue RHS = N->getOperand(1); 4013 if (LHS.getValueType().isScalableVector()) 4014 break; 4015 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 4016 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 4017 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && 4018 DAG.MaskedValueIsZero(LHS, Mask)) { 4019 SDLoc DL(N); 4020 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); 4021 CC = ISD::getSetCCInverse(CC, LHS.getValueType()); 4022 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); 4023 } 4024 break; 4025 } 4026 case ISD::FCOPYSIGN: { 4027 EVT VT = N->getValueType(0); 4028 if (!VT.isVector()) 4029 break; 4030 // There is a form of VFSGNJ which injects the negated sign of its second 4031 // operand. Try and bubble any FNEG up after the extend/round to produce 4032 // this optimized pattern. Avoid modifying cases where FP_ROUND and 4033 // TRUNC=1. 4034 SDValue In2 = N->getOperand(1); 4035 // Avoid cases where the extend/round has multiple uses, as duplicating 4036 // those is typically more expensive than removing a fneg. 4037 if (!In2.hasOneUse()) 4038 break; 4039 if (In2.getOpcode() != ISD::FP_EXTEND && 4040 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 4041 break; 4042 In2 = In2.getOperand(0); 4043 if (In2.getOpcode() != ISD::FNEG) 4044 break; 4045 SDLoc DL(N); 4046 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 4047 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 4048 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 4049 } 4050 } 4051 4052 return SDValue(); 4053 } 4054 4055 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 4056 const SDNode *N, CombineLevel Level) const { 4057 // The following folds are only desirable if `(OP _, c1 << c2)` can be 4058 // materialised in fewer instructions than `(OP _, c1)`: 4059 // 4060 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4061 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 4062 SDValue N0 = N->getOperand(0); 4063 EVT Ty = N0.getValueType(); 4064 if (Ty.isScalarInteger() && 4065 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 4066 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 4067 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4068 if (C1 && C2) { 4069 const APInt &C1Int = C1->getAPIntValue(); 4070 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 4071 4072 // We can materialise `c1 << c2` into an add immediate, so it's "free", 4073 // and the combine should happen, to potentially allow further combines 4074 // later. 4075 if (ShiftedC1Int.getMinSignedBits() <= 64 && 4076 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 4077 return true; 4078 4079 // We can materialise `c1` in an add immediate, so it's "free", and the 4080 // combine should be prevented. 4081 if (C1Int.getMinSignedBits() <= 64 && 4082 isLegalAddImmediate(C1Int.getSExtValue())) 4083 return false; 4084 4085 // Neither constant will fit into an immediate, so find materialisation 4086 // costs. 4087 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 4088 Subtarget.is64Bit()); 4089 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 4090 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 4091 4092 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 4093 // combine should be prevented. 4094 if (C1Cost < ShiftedC1Cost) 4095 return false; 4096 } 4097 } 4098 return true; 4099 } 4100 4101 bool RISCVTargetLowering::targetShrinkDemandedConstant( 4102 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 4103 TargetLoweringOpt &TLO) const { 4104 // Delay this optimization as late as possible. 4105 if (!TLO.LegalOps) 4106 return false; 4107 4108 EVT VT = Op.getValueType(); 4109 if (VT.isVector()) 4110 return false; 4111 4112 // Only handle AND for now. 4113 if (Op.getOpcode() != ISD::AND) 4114 return false; 4115 4116 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4117 if (!C) 4118 return false; 4119 4120 const APInt &Mask = C->getAPIntValue(); 4121 4122 // Clear all non-demanded bits initially. 4123 APInt ShrunkMask = Mask & DemandedBits; 4124 4125 // If the shrunk mask fits in sign extended 12 bits, let the target 4126 // independent code apply it. 4127 if (ShrunkMask.isSignedIntN(12)) 4128 return false; 4129 4130 // Try to make a smaller immediate by setting undemanded bits. 4131 4132 // We need to be able to make a negative number through a combination of mask 4133 // and undemanded bits. 4134 APInt ExpandedMask = Mask | ~DemandedBits; 4135 if (!ExpandedMask.isNegative()) 4136 return false; 4137 4138 // What is the fewest number of bits we need to represent the negative number. 4139 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 4140 4141 // Try to make a 12 bit negative immediate. If that fails try to make a 32 4142 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 4143 APInt NewMask = ShrunkMask; 4144 if (MinSignedBits <= 12) 4145 NewMask.setBitsFrom(11); 4146 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 4147 NewMask.setBitsFrom(31); 4148 else 4149 return false; 4150 4151 // Sanity check that our new mask is a subset of the demanded mask. 4152 assert(NewMask.isSubsetOf(ExpandedMask)); 4153 4154 // If we aren't changing the mask, just return true to keep it and prevent 4155 // the caller from optimizing. 4156 if (NewMask == Mask) 4157 return true; 4158 4159 // Replace the constant with the new mask. 4160 SDLoc DL(Op); 4161 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 4162 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 4163 return TLO.CombineTo(Op, NewOp); 4164 } 4165 4166 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 4167 KnownBits &Known, 4168 const APInt &DemandedElts, 4169 const SelectionDAG &DAG, 4170 unsigned Depth) const { 4171 unsigned BitWidth = Known.getBitWidth(); 4172 unsigned Opc = Op.getOpcode(); 4173 assert((Opc >= ISD::BUILTIN_OP_END || 4174 Opc == ISD::INTRINSIC_WO_CHAIN || 4175 Opc == ISD::INTRINSIC_W_CHAIN || 4176 Opc == ISD::INTRINSIC_VOID) && 4177 "Should use MaskedValueIsZero if you don't know whether Op" 4178 " is a target node!"); 4179 4180 Known.resetAll(); 4181 switch (Opc) { 4182 default: break; 4183 case RISCVISD::REMUW: { 4184 KnownBits Known2; 4185 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4186 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4187 // We only care about the lower 32 bits. 4188 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 4189 // Restore the original width by sign extending. 4190 Known = Known.sext(BitWidth); 4191 break; 4192 } 4193 case RISCVISD::DIVUW: { 4194 KnownBits Known2; 4195 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4196 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4197 // We only care about the lower 32 bits. 4198 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 4199 // Restore the original width by sign extending. 4200 Known = Known.sext(BitWidth); 4201 break; 4202 } 4203 case RISCVISD::READ_VLENB: 4204 // We assume VLENB is at least 8 bytes. 4205 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 4206 Known.Zero.setLowBits(3); 4207 break; 4208 } 4209 } 4210 4211 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 4212 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 4213 unsigned Depth) const { 4214 switch (Op.getOpcode()) { 4215 default: 4216 break; 4217 case RISCVISD::SLLW: 4218 case RISCVISD::SRAW: 4219 case RISCVISD::SRLW: 4220 case RISCVISD::DIVW: 4221 case RISCVISD::DIVUW: 4222 case RISCVISD::REMUW: 4223 case RISCVISD::ROLW: 4224 case RISCVISD::RORW: 4225 case RISCVISD::GREVIW: 4226 case RISCVISD::GORCIW: 4227 case RISCVISD::FSLW: 4228 case RISCVISD::FSRW: 4229 // TODO: As the result is sign-extended, this is conservatively correct. A 4230 // more precise answer could be calculated for SRAW depending on known 4231 // bits in the shift amount. 4232 return 33; 4233 case RISCVISD::SHFLI: { 4234 // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word 4235 // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but 4236 // will stay within the upper 32 bits. If there were more than 32 sign bits 4237 // before there will be at least 33 sign bits after. 4238 if (Op.getValueType() == MVT::i64 && 4239 (Op.getConstantOperandVal(1) & 0x10) == 0) { 4240 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 4241 if (Tmp > 32) 4242 return 33; 4243 } 4244 break; 4245 } 4246 case RISCVISD::VMV_X_S: 4247 // The number of sign bits of the scalar result is computed by obtaining the 4248 // element type of the input vector operand, subtracting its width from the 4249 // XLEN, and then adding one (sign bit within the element type). If the 4250 // element type is wider than XLen, the least-significant XLEN bits are 4251 // taken. 4252 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 4253 return 1; 4254 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 4255 } 4256 4257 return 1; 4258 } 4259 4260 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 4261 MachineBasicBlock *BB) { 4262 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 4263 4264 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 4265 // Should the count have wrapped while it was being read, we need to try 4266 // again. 4267 // ... 4268 // read: 4269 // rdcycleh x3 # load high word of cycle 4270 // rdcycle x2 # load low word of cycle 4271 // rdcycleh x4 # load high word of cycle 4272 // bne x3, x4, read # check if high word reads match, otherwise try again 4273 // ... 4274 4275 MachineFunction &MF = *BB->getParent(); 4276 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4277 MachineFunction::iterator It = ++BB->getIterator(); 4278 4279 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 4280 MF.insert(It, LoopMBB); 4281 4282 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 4283 MF.insert(It, DoneMBB); 4284 4285 // Transfer the remainder of BB and its successor edges to DoneMBB. 4286 DoneMBB->splice(DoneMBB->begin(), BB, 4287 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 4288 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 4289 4290 BB->addSuccessor(LoopMBB); 4291 4292 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4293 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 4294 Register LoReg = MI.getOperand(0).getReg(); 4295 Register HiReg = MI.getOperand(1).getReg(); 4296 DebugLoc DL = MI.getDebugLoc(); 4297 4298 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 4299 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 4300 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 4301 .addReg(RISCV::X0); 4302 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 4303 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 4304 .addReg(RISCV::X0); 4305 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 4306 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 4307 .addReg(RISCV::X0); 4308 4309 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 4310 .addReg(HiReg) 4311 .addReg(ReadAgainReg) 4312 .addMBB(LoopMBB); 4313 4314 LoopMBB->addSuccessor(LoopMBB); 4315 LoopMBB->addSuccessor(DoneMBB); 4316 4317 MI.eraseFromParent(); 4318 4319 return DoneMBB; 4320 } 4321 4322 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 4323 MachineBasicBlock *BB) { 4324 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 4325 4326 MachineFunction &MF = *BB->getParent(); 4327 DebugLoc DL = MI.getDebugLoc(); 4328 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4329 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 4330 Register LoReg = MI.getOperand(0).getReg(); 4331 Register HiReg = MI.getOperand(1).getReg(); 4332 Register SrcReg = MI.getOperand(2).getReg(); 4333 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 4334 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 4335 4336 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 4337 RI); 4338 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 4339 MachineMemOperand *MMOLo = 4340 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 4341 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 4342 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 4343 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 4344 .addFrameIndex(FI) 4345 .addImm(0) 4346 .addMemOperand(MMOLo); 4347 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 4348 .addFrameIndex(FI) 4349 .addImm(4) 4350 .addMemOperand(MMOHi); 4351 MI.eraseFromParent(); // The pseudo instruction is gone now. 4352 return BB; 4353 } 4354 4355 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 4356 MachineBasicBlock *BB) { 4357 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 4358 "Unexpected instruction"); 4359 4360 MachineFunction &MF = *BB->getParent(); 4361 DebugLoc DL = MI.getDebugLoc(); 4362 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4363 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 4364 Register DstReg = MI.getOperand(0).getReg(); 4365 Register LoReg = MI.getOperand(1).getReg(); 4366 Register HiReg = MI.getOperand(2).getReg(); 4367 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 4368 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 4369 4370 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 4371 MachineMemOperand *MMOLo = 4372 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 4373 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 4374 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 4375 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 4376 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 4377 .addFrameIndex(FI) 4378 .addImm(0) 4379 .addMemOperand(MMOLo); 4380 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 4381 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 4382 .addFrameIndex(FI) 4383 .addImm(4) 4384 .addMemOperand(MMOHi); 4385 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 4386 MI.eraseFromParent(); // The pseudo instruction is gone now. 4387 return BB; 4388 } 4389 4390 static bool isSelectPseudo(MachineInstr &MI) { 4391 switch (MI.getOpcode()) { 4392 default: 4393 return false; 4394 case RISCV::Select_GPR_Using_CC_GPR: 4395 case RISCV::Select_FPR16_Using_CC_GPR: 4396 case RISCV::Select_FPR32_Using_CC_GPR: 4397 case RISCV::Select_FPR64_Using_CC_GPR: 4398 return true; 4399 } 4400 } 4401 4402 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 4403 MachineBasicBlock *BB) { 4404 // To "insert" Select_* instructions, we actually have to insert the triangle 4405 // control-flow pattern. The incoming instructions know the destination vreg 4406 // to set, the condition code register to branch on, the true/false values to 4407 // select between, and the condcode to use to select the appropriate branch. 4408 // 4409 // We produce the following control flow: 4410 // HeadMBB 4411 // | \ 4412 // | IfFalseMBB 4413 // | / 4414 // TailMBB 4415 // 4416 // When we find a sequence of selects we attempt to optimize their emission 4417 // by sharing the control flow. Currently we only handle cases where we have 4418 // multiple selects with the exact same condition (same LHS, RHS and CC). 4419 // The selects may be interleaved with other instructions if the other 4420 // instructions meet some requirements we deem safe: 4421 // - They are debug instructions. Otherwise, 4422 // - They do not have side-effects, do not access memory and their inputs do 4423 // not depend on the results of the select pseudo-instructions. 4424 // The TrueV/FalseV operands of the selects cannot depend on the result of 4425 // previous selects in the sequence. 4426 // These conditions could be further relaxed. See the X86 target for a 4427 // related approach and more information. 4428 Register LHS = MI.getOperand(1).getReg(); 4429 Register RHS = MI.getOperand(2).getReg(); 4430 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 4431 4432 SmallVector<MachineInstr *, 4> SelectDebugValues; 4433 SmallSet<Register, 4> SelectDests; 4434 SelectDests.insert(MI.getOperand(0).getReg()); 4435 4436 MachineInstr *LastSelectPseudo = &MI; 4437 4438 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 4439 SequenceMBBI != E; ++SequenceMBBI) { 4440 if (SequenceMBBI->isDebugInstr()) 4441 continue; 4442 else if (isSelectPseudo(*SequenceMBBI)) { 4443 if (SequenceMBBI->getOperand(1).getReg() != LHS || 4444 SequenceMBBI->getOperand(2).getReg() != RHS || 4445 SequenceMBBI->getOperand(3).getImm() != CC || 4446 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 4447 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 4448 break; 4449 LastSelectPseudo = &*SequenceMBBI; 4450 SequenceMBBI->collectDebugValues(SelectDebugValues); 4451 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 4452 } else { 4453 if (SequenceMBBI->hasUnmodeledSideEffects() || 4454 SequenceMBBI->mayLoadOrStore()) 4455 break; 4456 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 4457 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 4458 })) 4459 break; 4460 } 4461 } 4462 4463 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 4464 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4465 DebugLoc DL = MI.getDebugLoc(); 4466 MachineFunction::iterator I = ++BB->getIterator(); 4467 4468 MachineBasicBlock *HeadMBB = BB; 4469 MachineFunction *F = BB->getParent(); 4470 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 4471 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 4472 4473 F->insert(I, IfFalseMBB); 4474 F->insert(I, TailMBB); 4475 4476 // Transfer debug instructions associated with the selects to TailMBB. 4477 for (MachineInstr *DebugInstr : SelectDebugValues) { 4478 TailMBB->push_back(DebugInstr->removeFromParent()); 4479 } 4480 4481 // Move all instructions after the sequence to TailMBB. 4482 TailMBB->splice(TailMBB->end(), HeadMBB, 4483 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 4484 // Update machine-CFG edges by transferring all successors of the current 4485 // block to the new block which will contain the Phi nodes for the selects. 4486 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 4487 // Set the successors for HeadMBB. 4488 HeadMBB->addSuccessor(IfFalseMBB); 4489 HeadMBB->addSuccessor(TailMBB); 4490 4491 // Insert appropriate branch. 4492 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 4493 4494 BuildMI(HeadMBB, DL, TII.get(Opcode)) 4495 .addReg(LHS) 4496 .addReg(RHS) 4497 .addMBB(TailMBB); 4498 4499 // IfFalseMBB just falls through to TailMBB. 4500 IfFalseMBB->addSuccessor(TailMBB); 4501 4502 // Create PHIs for all of the select pseudo-instructions. 4503 auto SelectMBBI = MI.getIterator(); 4504 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 4505 auto InsertionPoint = TailMBB->begin(); 4506 while (SelectMBBI != SelectEnd) { 4507 auto Next = std::next(SelectMBBI); 4508 if (isSelectPseudo(*SelectMBBI)) { 4509 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 4510 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 4511 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 4512 .addReg(SelectMBBI->getOperand(4).getReg()) 4513 .addMBB(HeadMBB) 4514 .addReg(SelectMBBI->getOperand(5).getReg()) 4515 .addMBB(IfFalseMBB); 4516 SelectMBBI->eraseFromParent(); 4517 } 4518 SelectMBBI = Next; 4519 } 4520 4521 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 4522 return TailMBB; 4523 } 4524 4525 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 4526 int VLIndex, unsigned SEWIndex, 4527 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 4528 MachineFunction &MF = *BB->getParent(); 4529 DebugLoc DL = MI.getDebugLoc(); 4530 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 4531 4532 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 4533 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 4534 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 4535 4536 MachineRegisterInfo &MRI = MF.getRegInfo(); 4537 4538 auto BuildVSETVLI = [&]() { 4539 if (VLIndex >= 0) { 4540 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 4541 Register VLReg = MI.getOperand(VLIndex).getReg(); 4542 4543 // VL might be a compile time constant, but isel would have to put it 4544 // in a register. See if VL comes from an ADDI X0, imm. 4545 if (VLReg.isVirtual()) { 4546 MachineInstr *Def = MRI.getVRegDef(VLReg); 4547 if (Def && Def->getOpcode() == RISCV::ADDI && 4548 Def->getOperand(1).getReg() == RISCV::X0 && 4549 Def->getOperand(2).isImm()) { 4550 uint64_t Imm = Def->getOperand(2).getImm(); 4551 // VSETIVLI allows a 5-bit zero extended immediate. 4552 if (isUInt<5>(Imm)) 4553 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) 4554 .addReg(DestReg, RegState::Define | RegState::Dead) 4555 .addImm(Imm); 4556 } 4557 } 4558 4559 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 4560 .addReg(DestReg, RegState::Define | RegState::Dead) 4561 .addReg(VLReg); 4562 } 4563 4564 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 4565 return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) 4566 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 4567 .addReg(RISCV::X0, RegState::Kill); 4568 }; 4569 4570 MachineInstrBuilder MIB = BuildVSETVLI(); 4571 4572 // Default to tail agnostic unless the destination is tied to a source. In 4573 // that case the user would have some control over the tail values. The tail 4574 // policy is also ignored on instructions that only update element 0 like 4575 // vmv.s.x or reductions so use agnostic there to match the common case. 4576 // FIXME: This is conservatively correct, but we might want to detect that 4577 // the input is undefined. 4578 bool TailAgnostic = true; 4579 unsigned UseOpIdx; 4580 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 4581 TailAgnostic = false; 4582 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 4583 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 4584 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 4585 if (UseMI && UseMI->isImplicitDef()) 4586 TailAgnostic = true; 4587 } 4588 4589 // For simplicity we reuse the vtype representation here. 4590 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 4591 /*TailAgnostic*/ TailAgnostic, 4592 /*MaskAgnostic*/ false)); 4593 4594 // Remove (now) redundant operands from pseudo 4595 if (VLIndex >= 0) { 4596 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 4597 MI.getOperand(VLIndex).setIsKill(false); 4598 } 4599 4600 return BB; 4601 } 4602 4603 MachineBasicBlock * 4604 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 4605 MachineBasicBlock *BB) const { 4606 uint64_t TSFlags = MI.getDesc().TSFlags; 4607 4608 if (TSFlags & RISCVII::HasSEWOpMask) { 4609 unsigned NumOperands = MI.getNumExplicitOperands(); 4610 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 4611 unsigned SEWIndex = NumOperands - 1; 4612 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 4613 4614 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 4615 RISCVII::VLMulShift); 4616 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 4617 } 4618 4619 switch (MI.getOpcode()) { 4620 default: 4621 llvm_unreachable("Unexpected instr type to insert"); 4622 case RISCV::ReadCycleWide: 4623 assert(!Subtarget.is64Bit() && 4624 "ReadCycleWrite is only to be used on riscv32"); 4625 return emitReadCycleWidePseudo(MI, BB); 4626 case RISCV::Select_GPR_Using_CC_GPR: 4627 case RISCV::Select_FPR16_Using_CC_GPR: 4628 case RISCV::Select_FPR32_Using_CC_GPR: 4629 case RISCV::Select_FPR64_Using_CC_GPR: 4630 return emitSelectPseudo(MI, BB); 4631 case RISCV::BuildPairF64Pseudo: 4632 return emitBuildPairF64Pseudo(MI, BB); 4633 case RISCV::SplitF64Pseudo: 4634 return emitSplitF64Pseudo(MI, BB); 4635 } 4636 } 4637 4638 // Calling Convention Implementation. 4639 // The expectations for frontend ABI lowering vary from target to target. 4640 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 4641 // details, but this is a longer term goal. For now, we simply try to keep the 4642 // role of the frontend as simple and well-defined as possible. The rules can 4643 // be summarised as: 4644 // * Never split up large scalar arguments. We handle them here. 4645 // * If a hardfloat calling convention is being used, and the struct may be 4646 // passed in a pair of registers (fp+fp, int+fp), and both registers are 4647 // available, then pass as two separate arguments. If either the GPRs or FPRs 4648 // are exhausted, then pass according to the rule below. 4649 // * If a struct could never be passed in registers or directly in a stack 4650 // slot (as it is larger than 2*XLEN and the floating point rules don't 4651 // apply), then pass it using a pointer with the byval attribute. 4652 // * If a struct is less than 2*XLEN, then coerce to either a two-element 4653 // word-sized array or a 2*XLEN scalar (depending on alignment). 4654 // * The frontend can determine whether a struct is returned by reference or 4655 // not based on its size and fields. If it will be returned by reference, the 4656 // frontend must modify the prototype so a pointer with the sret annotation is 4657 // passed as the first argument. This is not necessary for large scalar 4658 // returns. 4659 // * Struct return values and varargs should be coerced to structs containing 4660 // register-size fields in the same situations they would be for fixed 4661 // arguments. 4662 4663 static const MCPhysReg ArgGPRs[] = { 4664 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 4665 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 4666 }; 4667 static const MCPhysReg ArgFPR16s[] = { 4668 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 4669 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 4670 }; 4671 static const MCPhysReg ArgFPR32s[] = { 4672 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 4673 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 4674 }; 4675 static const MCPhysReg ArgFPR64s[] = { 4676 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 4677 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 4678 }; 4679 // This is an interim calling convention and it may be changed in the future. 4680 static const MCPhysReg ArgVRs[] = { 4681 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 4682 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 4683 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 4684 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 4685 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 4686 RISCV::V20M2, RISCV::V22M2}; 4687 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 4688 RISCV::V20M4}; 4689 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 4690 4691 // Pass a 2*XLEN argument that has been split into two XLEN values through 4692 // registers or the stack as necessary. 4693 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 4694 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 4695 MVT ValVT2, MVT LocVT2, 4696 ISD::ArgFlagsTy ArgFlags2) { 4697 unsigned XLenInBytes = XLen / 8; 4698 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4699 // At least one half can be passed via register. 4700 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 4701 VA1.getLocVT(), CCValAssign::Full)); 4702 } else { 4703 // Both halves must be passed on the stack, with proper alignment. 4704 Align StackAlign = 4705 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 4706 State.addLoc( 4707 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 4708 State.AllocateStack(XLenInBytes, StackAlign), 4709 VA1.getLocVT(), CCValAssign::Full)); 4710 State.addLoc(CCValAssign::getMem( 4711 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 4712 LocVT2, CCValAssign::Full)); 4713 return false; 4714 } 4715 4716 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4717 // The second half can also be passed via register. 4718 State.addLoc( 4719 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 4720 } else { 4721 // The second half is passed via the stack, without additional alignment. 4722 State.addLoc(CCValAssign::getMem( 4723 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 4724 LocVT2, CCValAssign::Full)); 4725 } 4726 4727 return false; 4728 } 4729 4730 // Implements the RISC-V calling convention. Returns true upon failure. 4731 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 4732 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 4733 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 4734 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 4735 Optional<unsigned> FirstMaskArgument) { 4736 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 4737 assert(XLen == 32 || XLen == 64); 4738 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 4739 4740 // Any return value split in to more than two values can't be returned 4741 // directly. 4742 if (IsRet && ValNo > 1) 4743 return true; 4744 4745 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 4746 // variadic argument, or if no F16/F32 argument registers are available. 4747 bool UseGPRForF16_F32 = true; 4748 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 4749 // variadic argument, or if no F64 argument registers are available. 4750 bool UseGPRForF64 = true; 4751 4752 switch (ABI) { 4753 default: 4754 llvm_unreachable("Unexpected ABI"); 4755 case RISCVABI::ABI_ILP32: 4756 case RISCVABI::ABI_LP64: 4757 break; 4758 case RISCVABI::ABI_ILP32F: 4759 case RISCVABI::ABI_LP64F: 4760 UseGPRForF16_F32 = !IsFixed; 4761 break; 4762 case RISCVABI::ABI_ILP32D: 4763 case RISCVABI::ABI_LP64D: 4764 UseGPRForF16_F32 = !IsFixed; 4765 UseGPRForF64 = !IsFixed; 4766 break; 4767 } 4768 4769 // FPR16, FPR32, and FPR64 alias each other. 4770 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 4771 UseGPRForF16_F32 = true; 4772 UseGPRForF64 = true; 4773 } 4774 4775 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 4776 // similar local variables rather than directly checking against the target 4777 // ABI. 4778 4779 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 4780 LocVT = XLenVT; 4781 LocInfo = CCValAssign::BCvt; 4782 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 4783 LocVT = MVT::i64; 4784 LocInfo = CCValAssign::BCvt; 4785 } 4786 4787 // If this is a variadic argument, the RISC-V calling convention requires 4788 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 4789 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 4790 // be used regardless of whether the original argument was split during 4791 // legalisation or not. The argument will not be passed by registers if the 4792 // original type is larger than 2*XLEN, so the register alignment rule does 4793 // not apply. 4794 unsigned TwoXLenInBytes = (2 * XLen) / 8; 4795 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 4796 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 4797 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 4798 // Skip 'odd' register if necessary. 4799 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 4800 State.AllocateReg(ArgGPRs); 4801 } 4802 4803 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 4804 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 4805 State.getPendingArgFlags(); 4806 4807 assert(PendingLocs.size() == PendingArgFlags.size() && 4808 "PendingLocs and PendingArgFlags out of sync"); 4809 4810 // Handle passing f64 on RV32D with a soft float ABI or when floating point 4811 // registers are exhausted. 4812 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 4813 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 4814 "Can't lower f64 if it is split"); 4815 // Depending on available argument GPRS, f64 may be passed in a pair of 4816 // GPRs, split between a GPR and the stack, or passed completely on the 4817 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 4818 // cases. 4819 Register Reg = State.AllocateReg(ArgGPRs); 4820 LocVT = MVT::i32; 4821 if (!Reg) { 4822 unsigned StackOffset = State.AllocateStack(8, Align(8)); 4823 State.addLoc( 4824 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 4825 return false; 4826 } 4827 if (!State.AllocateReg(ArgGPRs)) 4828 State.AllocateStack(4, Align(4)); 4829 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4830 return false; 4831 } 4832 4833 // Split arguments might be passed indirectly, so keep track of the pending 4834 // values. 4835 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 4836 LocVT = XLenVT; 4837 LocInfo = CCValAssign::Indirect; 4838 PendingLocs.push_back( 4839 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 4840 PendingArgFlags.push_back(ArgFlags); 4841 if (!ArgFlags.isSplitEnd()) { 4842 return false; 4843 } 4844 } 4845 4846 // If the split argument only had two elements, it should be passed directly 4847 // in registers or on the stack. 4848 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 4849 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 4850 // Apply the normal calling convention rules to the first half of the 4851 // split argument. 4852 CCValAssign VA = PendingLocs[0]; 4853 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 4854 PendingLocs.clear(); 4855 PendingArgFlags.clear(); 4856 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 4857 ArgFlags); 4858 } 4859 4860 // Allocate to a register if possible, or else a stack slot. 4861 Register Reg; 4862 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 4863 Reg = State.AllocateReg(ArgFPR16s); 4864 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 4865 Reg = State.AllocateReg(ArgFPR32s); 4866 else if (ValVT == MVT::f64 && !UseGPRForF64) 4867 Reg = State.AllocateReg(ArgFPR64s); 4868 else if (ValVT.isScalableVector()) { 4869 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 4870 if (RC == &RISCV::VRRegClass) { 4871 // Assign the first mask argument to V0. 4872 // This is an interim calling convention and it may be changed in the 4873 // future. 4874 if (FirstMaskArgument.hasValue() && 4875 ValNo == FirstMaskArgument.getValue()) { 4876 Reg = State.AllocateReg(RISCV::V0); 4877 } else { 4878 Reg = State.AllocateReg(ArgVRs); 4879 } 4880 } else if (RC == &RISCV::VRM2RegClass) { 4881 Reg = State.AllocateReg(ArgVRM2s); 4882 } else if (RC == &RISCV::VRM4RegClass) { 4883 Reg = State.AllocateReg(ArgVRM4s); 4884 } else if (RC == &RISCV::VRM8RegClass) { 4885 Reg = State.AllocateReg(ArgVRM8s); 4886 } else { 4887 llvm_unreachable("Unhandled class register for ValueType"); 4888 } 4889 if (!Reg) { 4890 LocInfo = CCValAssign::Indirect; 4891 // Try using a GPR to pass the address 4892 Reg = State.AllocateReg(ArgGPRs); 4893 LocVT = XLenVT; 4894 } 4895 } else 4896 Reg = State.AllocateReg(ArgGPRs); 4897 unsigned StackOffset = 4898 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 4899 4900 // If we reach this point and PendingLocs is non-empty, we must be at the 4901 // end of a split argument that must be passed indirectly. 4902 if (!PendingLocs.empty()) { 4903 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 4904 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 4905 4906 for (auto &It : PendingLocs) { 4907 if (Reg) 4908 It.convertToReg(Reg); 4909 else 4910 It.convertToMem(StackOffset); 4911 State.addLoc(It); 4912 } 4913 PendingLocs.clear(); 4914 PendingArgFlags.clear(); 4915 return false; 4916 } 4917 4918 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 4919 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 4920 "Expected an XLenVT or scalable vector types at this stage"); 4921 4922 if (Reg) { 4923 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 4924 return false; 4925 } 4926 4927 // When a floating-point value is passed on the stack, no bit-conversion is 4928 // needed. 4929 if (ValVT.isFloatingPoint()) { 4930 LocVT = ValVT; 4931 LocInfo = CCValAssign::Full; 4932 } 4933 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 4934 return false; 4935 } 4936 4937 template <typename ArgTy> 4938 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 4939 for (const auto &ArgIdx : enumerate(Args)) { 4940 MVT ArgVT = ArgIdx.value().VT; 4941 if (ArgVT.isScalableVector() && 4942 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 4943 return ArgIdx.index(); 4944 } 4945 return None; 4946 } 4947 4948 void RISCVTargetLowering::analyzeInputArgs( 4949 MachineFunction &MF, CCState &CCInfo, 4950 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 4951 unsigned NumArgs = Ins.size(); 4952 FunctionType *FType = MF.getFunction().getFunctionType(); 4953 4954 Optional<unsigned> FirstMaskArgument; 4955 if (Subtarget.hasStdExtV()) 4956 FirstMaskArgument = preAssignMask(Ins); 4957 4958 for (unsigned i = 0; i != NumArgs; ++i) { 4959 MVT ArgVT = Ins[i].VT; 4960 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 4961 4962 Type *ArgTy = nullptr; 4963 if (IsRet) 4964 ArgTy = FType->getReturnType(); 4965 else if (Ins[i].isOrigArg()) 4966 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 4967 4968 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 4969 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 4970 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 4971 FirstMaskArgument)) { 4972 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 4973 << EVT(ArgVT).getEVTString() << '\n'); 4974 llvm_unreachable(nullptr); 4975 } 4976 } 4977 } 4978 4979 void RISCVTargetLowering::analyzeOutputArgs( 4980 MachineFunction &MF, CCState &CCInfo, 4981 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 4982 CallLoweringInfo *CLI) const { 4983 unsigned NumArgs = Outs.size(); 4984 4985 Optional<unsigned> FirstMaskArgument; 4986 if (Subtarget.hasStdExtV()) 4987 FirstMaskArgument = preAssignMask(Outs); 4988 4989 for (unsigned i = 0; i != NumArgs; i++) { 4990 MVT ArgVT = Outs[i].VT; 4991 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4992 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 4993 4994 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 4995 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 4996 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 4997 FirstMaskArgument)) { 4998 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 4999 << EVT(ArgVT).getEVTString() << "\n"); 5000 llvm_unreachable(nullptr); 5001 } 5002 } 5003 } 5004 5005 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 5006 // values. 5007 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 5008 const CCValAssign &VA, const SDLoc &DL) { 5009 switch (VA.getLocInfo()) { 5010 default: 5011 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5012 case CCValAssign::Full: 5013 break; 5014 case CCValAssign::BCvt: 5015 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 5016 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 5017 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5018 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 5019 else 5020 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 5021 break; 5022 } 5023 return Val; 5024 } 5025 5026 // The caller is responsible for loading the full value if the argument is 5027 // passed with CCValAssign::Indirect. 5028 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 5029 const CCValAssign &VA, const SDLoc &DL, 5030 const RISCVTargetLowering &TLI) { 5031 MachineFunction &MF = DAG.getMachineFunction(); 5032 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5033 EVT LocVT = VA.getLocVT(); 5034 SDValue Val; 5035 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 5036 Register VReg = RegInfo.createVirtualRegister(RC); 5037 RegInfo.addLiveIn(VA.getLocReg(), VReg); 5038 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 5039 5040 if (VA.getLocInfo() == CCValAssign::Indirect) 5041 return Val; 5042 5043 return convertLocVTToValVT(DAG, Val, VA, DL); 5044 } 5045 5046 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 5047 const CCValAssign &VA, const SDLoc &DL) { 5048 EVT LocVT = VA.getLocVT(); 5049 5050 switch (VA.getLocInfo()) { 5051 default: 5052 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5053 case CCValAssign::Full: 5054 break; 5055 case CCValAssign::BCvt: 5056 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 5057 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 5058 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5059 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 5060 else 5061 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 5062 break; 5063 } 5064 return Val; 5065 } 5066 5067 // The caller is responsible for loading the full value if the argument is 5068 // passed with CCValAssign::Indirect. 5069 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 5070 const CCValAssign &VA, const SDLoc &DL) { 5071 MachineFunction &MF = DAG.getMachineFunction(); 5072 MachineFrameInfo &MFI = MF.getFrameInfo(); 5073 EVT LocVT = VA.getLocVT(); 5074 EVT ValVT = VA.getValVT(); 5075 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 5076 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 5077 VA.getLocMemOffset(), /*Immutable=*/true); 5078 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 5079 SDValue Val; 5080 5081 ISD::LoadExtType ExtType; 5082 switch (VA.getLocInfo()) { 5083 default: 5084 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5085 case CCValAssign::Full: 5086 case CCValAssign::Indirect: 5087 case CCValAssign::BCvt: 5088 ExtType = ISD::NON_EXTLOAD; 5089 break; 5090 } 5091 Val = DAG.getExtLoad( 5092 ExtType, DL, LocVT, Chain, FIN, 5093 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 5094 return Val; 5095 } 5096 5097 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 5098 const CCValAssign &VA, const SDLoc &DL) { 5099 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 5100 "Unexpected VA"); 5101 MachineFunction &MF = DAG.getMachineFunction(); 5102 MachineFrameInfo &MFI = MF.getFrameInfo(); 5103 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5104 5105 if (VA.isMemLoc()) { 5106 // f64 is passed on the stack. 5107 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 5108 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 5109 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 5110 MachinePointerInfo::getFixedStack(MF, FI)); 5111 } 5112 5113 assert(VA.isRegLoc() && "Expected register VA assignment"); 5114 5115 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 5116 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 5117 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 5118 SDValue Hi; 5119 if (VA.getLocReg() == RISCV::X17) { 5120 // Second half of f64 is passed on the stack. 5121 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 5122 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 5123 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 5124 MachinePointerInfo::getFixedStack(MF, FI)); 5125 } else { 5126 // Second half of f64 is passed in another GPR. 5127 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 5128 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 5129 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 5130 } 5131 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 5132 } 5133 5134 // FastCC has less than 1% performance improvement for some particular 5135 // benchmark. But theoretically, it may has benenfit for some cases. 5136 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 5137 CCValAssign::LocInfo LocInfo, 5138 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5139 5140 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5141 // X5 and X6 might be used for save-restore libcall. 5142 static const MCPhysReg GPRList[] = { 5143 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 5144 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 5145 RISCV::X29, RISCV::X30, RISCV::X31}; 5146 if (unsigned Reg = State.AllocateReg(GPRList)) { 5147 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5148 return false; 5149 } 5150 } 5151 5152 if (LocVT == MVT::f16) { 5153 static const MCPhysReg FPR16List[] = { 5154 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 5155 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 5156 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 5157 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 5158 if (unsigned Reg = State.AllocateReg(FPR16List)) { 5159 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5160 return false; 5161 } 5162 } 5163 5164 if (LocVT == MVT::f32) { 5165 static const MCPhysReg FPR32List[] = { 5166 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 5167 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 5168 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 5169 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 5170 if (unsigned Reg = State.AllocateReg(FPR32List)) { 5171 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5172 return false; 5173 } 5174 } 5175 5176 if (LocVT == MVT::f64) { 5177 static const MCPhysReg FPR64List[] = { 5178 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 5179 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 5180 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 5181 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 5182 if (unsigned Reg = State.AllocateReg(FPR64List)) { 5183 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5184 return false; 5185 } 5186 } 5187 5188 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 5189 unsigned Offset4 = State.AllocateStack(4, Align(4)); 5190 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 5191 return false; 5192 } 5193 5194 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 5195 unsigned Offset5 = State.AllocateStack(8, Align(8)); 5196 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 5197 return false; 5198 } 5199 5200 return true; // CC didn't match. 5201 } 5202 5203 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 5204 CCValAssign::LocInfo LocInfo, 5205 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5206 5207 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5208 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 5209 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 5210 static const MCPhysReg GPRList[] = { 5211 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 5212 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 5213 if (unsigned Reg = State.AllocateReg(GPRList)) { 5214 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5215 return false; 5216 } 5217 } 5218 5219 if (LocVT == MVT::f32) { 5220 // Pass in STG registers: F1, ..., F6 5221 // fs0 ... fs5 5222 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 5223 RISCV::F18_F, RISCV::F19_F, 5224 RISCV::F20_F, RISCV::F21_F}; 5225 if (unsigned Reg = State.AllocateReg(FPR32List)) { 5226 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5227 return false; 5228 } 5229 } 5230 5231 if (LocVT == MVT::f64) { 5232 // Pass in STG registers: D1, ..., D6 5233 // fs6 ... fs11 5234 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 5235 RISCV::F24_D, RISCV::F25_D, 5236 RISCV::F26_D, RISCV::F27_D}; 5237 if (unsigned Reg = State.AllocateReg(FPR64List)) { 5238 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5239 return false; 5240 } 5241 } 5242 5243 report_fatal_error("No registers left in GHC calling convention"); 5244 return true; 5245 } 5246 5247 // Transform physical registers into virtual registers. 5248 SDValue RISCVTargetLowering::LowerFormalArguments( 5249 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 5250 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 5251 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 5252 5253 MachineFunction &MF = DAG.getMachineFunction(); 5254 5255 switch (CallConv) { 5256 default: 5257 report_fatal_error("Unsupported calling convention"); 5258 case CallingConv::C: 5259 case CallingConv::Fast: 5260 break; 5261 case CallingConv::GHC: 5262 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 5263 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 5264 report_fatal_error( 5265 "GHC calling convention requires the F and D instruction set extensions"); 5266 } 5267 5268 const Function &Func = MF.getFunction(); 5269 if (Func.hasFnAttribute("interrupt")) { 5270 if (!Func.arg_empty()) 5271 report_fatal_error( 5272 "Functions with the interrupt attribute cannot have arguments!"); 5273 5274 StringRef Kind = 5275 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 5276 5277 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 5278 report_fatal_error( 5279 "Function interrupt attribute argument not supported!"); 5280 } 5281 5282 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5283 MVT XLenVT = Subtarget.getXLenVT(); 5284 unsigned XLenInBytes = Subtarget.getXLen() / 8; 5285 // Used with vargs to acumulate store chains. 5286 std::vector<SDValue> OutChains; 5287 5288 // Assign locations to all of the incoming arguments. 5289 SmallVector<CCValAssign, 16> ArgLocs; 5290 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5291 5292 if (CallConv == CallingConv::Fast) 5293 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 5294 else if (CallConv == CallingConv::GHC) 5295 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 5296 else 5297 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 5298 5299 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 5300 CCValAssign &VA = ArgLocs[i]; 5301 SDValue ArgValue; 5302 // Passing f64 on RV32D with a soft float ABI must be handled as a special 5303 // case. 5304 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 5305 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 5306 else if (VA.isRegLoc()) 5307 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 5308 else 5309 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 5310 5311 if (VA.getLocInfo() == CCValAssign::Indirect) { 5312 // If the original argument was split and passed by reference (e.g. i128 5313 // on RV32), we need to load all parts of it here (using the same 5314 // address). 5315 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 5316 MachinePointerInfo())); 5317 unsigned ArgIndex = Ins[i].OrigArgIndex; 5318 assert(Ins[i].PartOffset == 0); 5319 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 5320 CCValAssign &PartVA = ArgLocs[i + 1]; 5321 unsigned PartOffset = Ins[i + 1].PartOffset; 5322 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 5323 DAG.getIntPtrConstant(PartOffset, DL)); 5324 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 5325 MachinePointerInfo())); 5326 ++i; 5327 } 5328 continue; 5329 } 5330 InVals.push_back(ArgValue); 5331 } 5332 5333 if (IsVarArg) { 5334 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 5335 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 5336 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 5337 MachineFrameInfo &MFI = MF.getFrameInfo(); 5338 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5339 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 5340 5341 // Offset of the first variable argument from stack pointer, and size of 5342 // the vararg save area. For now, the varargs save area is either zero or 5343 // large enough to hold a0-a7. 5344 int VaArgOffset, VarArgsSaveSize; 5345 5346 // If all registers are allocated, then all varargs must be passed on the 5347 // stack and we don't need to save any argregs. 5348 if (ArgRegs.size() == Idx) { 5349 VaArgOffset = CCInfo.getNextStackOffset(); 5350 VarArgsSaveSize = 0; 5351 } else { 5352 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 5353 VaArgOffset = -VarArgsSaveSize; 5354 } 5355 5356 // Record the frame index of the first variable argument 5357 // which is a value necessary to VASTART. 5358 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 5359 RVFI->setVarArgsFrameIndex(FI); 5360 5361 // If saving an odd number of registers then create an extra stack slot to 5362 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 5363 // offsets to even-numbered registered remain 2*XLEN-aligned. 5364 if (Idx % 2) { 5365 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 5366 VarArgsSaveSize += XLenInBytes; 5367 } 5368 5369 // Copy the integer registers that may have been used for passing varargs 5370 // to the vararg save area. 5371 for (unsigned I = Idx; I < ArgRegs.size(); 5372 ++I, VaArgOffset += XLenInBytes) { 5373 const Register Reg = RegInfo.createVirtualRegister(RC); 5374 RegInfo.addLiveIn(ArgRegs[I], Reg); 5375 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 5376 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 5377 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5378 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 5379 MachinePointerInfo::getFixedStack(MF, FI)); 5380 cast<StoreSDNode>(Store.getNode()) 5381 ->getMemOperand() 5382 ->setValue((Value *)nullptr); 5383 OutChains.push_back(Store); 5384 } 5385 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 5386 } 5387 5388 // All stores are grouped in one node to allow the matching between 5389 // the size of Ins and InVals. This only happens for vararg functions. 5390 if (!OutChains.empty()) { 5391 OutChains.push_back(Chain); 5392 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 5393 } 5394 5395 return Chain; 5396 } 5397 5398 /// isEligibleForTailCallOptimization - Check whether the call is eligible 5399 /// for tail call optimization. 5400 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 5401 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 5402 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 5403 const SmallVector<CCValAssign, 16> &ArgLocs) const { 5404 5405 auto &Callee = CLI.Callee; 5406 auto CalleeCC = CLI.CallConv; 5407 auto &Outs = CLI.Outs; 5408 auto &Caller = MF.getFunction(); 5409 auto CallerCC = Caller.getCallingConv(); 5410 5411 // Exception-handling functions need a special set of instructions to 5412 // indicate a return to the hardware. Tail-calling another function would 5413 // probably break this. 5414 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 5415 // should be expanded as new function attributes are introduced. 5416 if (Caller.hasFnAttribute("interrupt")) 5417 return false; 5418 5419 // Do not tail call opt if the stack is used to pass parameters. 5420 if (CCInfo.getNextStackOffset() != 0) 5421 return false; 5422 5423 // Do not tail call opt if any parameters need to be passed indirectly. 5424 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 5425 // passed indirectly. So the address of the value will be passed in a 5426 // register, or if not available, then the address is put on the stack. In 5427 // order to pass indirectly, space on the stack often needs to be allocated 5428 // in order to store the value. In this case the CCInfo.getNextStackOffset() 5429 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 5430 // are passed CCValAssign::Indirect. 5431 for (auto &VA : ArgLocs) 5432 if (VA.getLocInfo() == CCValAssign::Indirect) 5433 return false; 5434 5435 // Do not tail call opt if either caller or callee uses struct return 5436 // semantics. 5437 auto IsCallerStructRet = Caller.hasStructRetAttr(); 5438 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 5439 if (IsCallerStructRet || IsCalleeStructRet) 5440 return false; 5441 5442 // Externally-defined functions with weak linkage should not be 5443 // tail-called. The behaviour of branch instructions in this situation (as 5444 // used for tail calls) is implementation-defined, so we cannot rely on the 5445 // linker replacing the tail call with a return. 5446 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 5447 const GlobalValue *GV = G->getGlobal(); 5448 if (GV->hasExternalWeakLinkage()) 5449 return false; 5450 } 5451 5452 // The callee has to preserve all registers the caller needs to preserve. 5453 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5454 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 5455 if (CalleeCC != CallerCC) { 5456 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 5457 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 5458 return false; 5459 } 5460 5461 // Byval parameters hand the function a pointer directly into the stack area 5462 // we want to reuse during a tail call. Working around this *is* possible 5463 // but less efficient and uglier in LowerCall. 5464 for (auto &Arg : Outs) 5465 if (Arg.Flags.isByVal()) 5466 return false; 5467 5468 return true; 5469 } 5470 5471 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 5472 // and output parameter nodes. 5473 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 5474 SmallVectorImpl<SDValue> &InVals) const { 5475 SelectionDAG &DAG = CLI.DAG; 5476 SDLoc &DL = CLI.DL; 5477 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 5478 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 5479 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 5480 SDValue Chain = CLI.Chain; 5481 SDValue Callee = CLI.Callee; 5482 bool &IsTailCall = CLI.IsTailCall; 5483 CallingConv::ID CallConv = CLI.CallConv; 5484 bool IsVarArg = CLI.IsVarArg; 5485 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5486 MVT XLenVT = Subtarget.getXLenVT(); 5487 5488 MachineFunction &MF = DAG.getMachineFunction(); 5489 5490 // Analyze the operands of the call, assigning locations to each operand. 5491 SmallVector<CCValAssign, 16> ArgLocs; 5492 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5493 5494 if (CallConv == CallingConv::Fast) 5495 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 5496 else if (CallConv == CallingConv::GHC) 5497 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 5498 else 5499 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 5500 5501 // Check if it's really possible to do a tail call. 5502 if (IsTailCall) 5503 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 5504 5505 if (IsTailCall) 5506 ++NumTailCalls; 5507 else if (CLI.CB && CLI.CB->isMustTailCall()) 5508 report_fatal_error("failed to perform tail call elimination on a call " 5509 "site marked musttail"); 5510 5511 // Get a count of how many bytes are to be pushed on the stack. 5512 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 5513 5514 // Create local copies for byval args 5515 SmallVector<SDValue, 8> ByValArgs; 5516 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5517 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5518 if (!Flags.isByVal()) 5519 continue; 5520 5521 SDValue Arg = OutVals[i]; 5522 unsigned Size = Flags.getByValSize(); 5523 Align Alignment = Flags.getNonZeroByValAlign(); 5524 5525 int FI = 5526 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 5527 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5528 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 5529 5530 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 5531 /*IsVolatile=*/false, 5532 /*AlwaysInline=*/false, IsTailCall, 5533 MachinePointerInfo(), MachinePointerInfo()); 5534 ByValArgs.push_back(FIPtr); 5535 } 5536 5537 if (!IsTailCall) 5538 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 5539 5540 // Copy argument values to their designated locations. 5541 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 5542 SmallVector<SDValue, 8> MemOpChains; 5543 SDValue StackPtr; 5544 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 5545 CCValAssign &VA = ArgLocs[i]; 5546 SDValue ArgValue = OutVals[i]; 5547 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5548 5549 // Handle passing f64 on RV32D with a soft float ABI as a special case. 5550 bool IsF64OnRV32DSoftABI = 5551 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 5552 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 5553 SDValue SplitF64 = DAG.getNode( 5554 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 5555 SDValue Lo = SplitF64.getValue(0); 5556 SDValue Hi = SplitF64.getValue(1); 5557 5558 Register RegLo = VA.getLocReg(); 5559 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 5560 5561 if (RegLo == RISCV::X17) { 5562 // Second half of f64 is passed on the stack. 5563 // Work out the address of the stack slot. 5564 if (!StackPtr.getNode()) 5565 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 5566 // Emit the store. 5567 MemOpChains.push_back( 5568 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 5569 } else { 5570 // Second half of f64 is passed in another GPR. 5571 assert(RegLo < RISCV::X31 && "Invalid register pair"); 5572 Register RegHigh = RegLo + 1; 5573 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 5574 } 5575 continue; 5576 } 5577 5578 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 5579 // as any other MemLoc. 5580 5581 // Promote the value if needed. 5582 // For now, only handle fully promoted and indirect arguments. 5583 if (VA.getLocInfo() == CCValAssign::Indirect) { 5584 // Store the argument in a stack slot and pass its address. 5585 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 5586 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 5587 MemOpChains.push_back( 5588 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 5589 MachinePointerInfo::getFixedStack(MF, FI))); 5590 // If the original argument was split (e.g. i128), we need 5591 // to store all parts of it here (and pass just one address). 5592 unsigned ArgIndex = Outs[i].OrigArgIndex; 5593 assert(Outs[i].PartOffset == 0); 5594 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 5595 SDValue PartValue = OutVals[i + 1]; 5596 unsigned PartOffset = Outs[i + 1].PartOffset; 5597 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 5598 DAG.getIntPtrConstant(PartOffset, DL)); 5599 MemOpChains.push_back( 5600 DAG.getStore(Chain, DL, PartValue, Address, 5601 MachinePointerInfo::getFixedStack(MF, FI))); 5602 ++i; 5603 } 5604 ArgValue = SpillSlot; 5605 } else { 5606 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 5607 } 5608 5609 // Use local copy if it is a byval arg. 5610 if (Flags.isByVal()) 5611 ArgValue = ByValArgs[j++]; 5612 5613 if (VA.isRegLoc()) { 5614 // Queue up the argument copies and emit them at the end. 5615 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 5616 } else { 5617 assert(VA.isMemLoc() && "Argument not register or memory"); 5618 assert(!IsTailCall && "Tail call not allowed if stack is used " 5619 "for passing parameters"); 5620 5621 // Work out the address of the stack slot. 5622 if (!StackPtr.getNode()) 5623 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 5624 SDValue Address = 5625 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 5626 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 5627 5628 // Emit the store. 5629 MemOpChains.push_back( 5630 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 5631 } 5632 } 5633 5634 // Join the stores, which are independent of one another. 5635 if (!MemOpChains.empty()) 5636 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 5637 5638 SDValue Glue; 5639 5640 // Build a sequence of copy-to-reg nodes, chained and glued together. 5641 for (auto &Reg : RegsToPass) { 5642 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 5643 Glue = Chain.getValue(1); 5644 } 5645 5646 // Validate that none of the argument registers have been marked as 5647 // reserved, if so report an error. Do the same for the return address if this 5648 // is not a tailcall. 5649 validateCCReservedRegs(RegsToPass, MF); 5650 if (!IsTailCall && 5651 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 5652 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5653 MF.getFunction(), 5654 "Return address register required, but has been reserved."}); 5655 5656 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 5657 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 5658 // split it and then direct call can be matched by PseudoCALL. 5659 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 5660 const GlobalValue *GV = S->getGlobal(); 5661 5662 unsigned OpFlags = RISCVII::MO_CALL; 5663 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 5664 OpFlags = RISCVII::MO_PLT; 5665 5666 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 5667 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 5668 unsigned OpFlags = RISCVII::MO_CALL; 5669 5670 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 5671 nullptr)) 5672 OpFlags = RISCVII::MO_PLT; 5673 5674 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 5675 } 5676 5677 // The first call operand is the chain and the second is the target address. 5678 SmallVector<SDValue, 8> Ops; 5679 Ops.push_back(Chain); 5680 Ops.push_back(Callee); 5681 5682 // Add argument registers to the end of the list so that they are 5683 // known live into the call. 5684 for (auto &Reg : RegsToPass) 5685 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 5686 5687 if (!IsTailCall) { 5688 // Add a register mask operand representing the call-preserved registers. 5689 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5690 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 5691 assert(Mask && "Missing call preserved mask for calling convention"); 5692 Ops.push_back(DAG.getRegisterMask(Mask)); 5693 } 5694 5695 // Glue the call to the argument copies, if any. 5696 if (Glue.getNode()) 5697 Ops.push_back(Glue); 5698 5699 // Emit the call. 5700 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 5701 5702 if (IsTailCall) { 5703 MF.getFrameInfo().setHasTailCall(); 5704 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 5705 } 5706 5707 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 5708 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 5709 Glue = Chain.getValue(1); 5710 5711 // Mark the end of the call, which is glued to the call itself. 5712 Chain = DAG.getCALLSEQ_END(Chain, 5713 DAG.getConstant(NumBytes, DL, PtrVT, true), 5714 DAG.getConstant(0, DL, PtrVT, true), 5715 Glue, DL); 5716 Glue = Chain.getValue(1); 5717 5718 // Assign locations to each value returned by this call. 5719 SmallVector<CCValAssign, 16> RVLocs; 5720 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 5721 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 5722 5723 // Copy all of the result registers out of their specified physreg. 5724 for (auto &VA : RVLocs) { 5725 // Copy the value out 5726 SDValue RetValue = 5727 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 5728 // Glue the RetValue to the end of the call sequence 5729 Chain = RetValue.getValue(1); 5730 Glue = RetValue.getValue(2); 5731 5732 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 5733 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 5734 SDValue RetValue2 = 5735 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 5736 Chain = RetValue2.getValue(1); 5737 Glue = RetValue2.getValue(2); 5738 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 5739 RetValue2); 5740 } 5741 5742 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 5743 5744 InVals.push_back(RetValue); 5745 } 5746 5747 return Chain; 5748 } 5749 5750 bool RISCVTargetLowering::CanLowerReturn( 5751 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 5752 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 5753 SmallVector<CCValAssign, 16> RVLocs; 5754 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 5755 5756 Optional<unsigned> FirstMaskArgument; 5757 if (Subtarget.hasStdExtV()) 5758 FirstMaskArgument = preAssignMask(Outs); 5759 5760 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5761 MVT VT = Outs[i].VT; 5762 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 5763 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 5764 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 5765 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 5766 *this, FirstMaskArgument)) 5767 return false; 5768 } 5769 return true; 5770 } 5771 5772 SDValue 5773 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 5774 bool IsVarArg, 5775 const SmallVectorImpl<ISD::OutputArg> &Outs, 5776 const SmallVectorImpl<SDValue> &OutVals, 5777 const SDLoc &DL, SelectionDAG &DAG) const { 5778 const MachineFunction &MF = DAG.getMachineFunction(); 5779 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 5780 5781 // Stores the assignment of the return value to a location. 5782 SmallVector<CCValAssign, 16> RVLocs; 5783 5784 // Info about the registers and stack slot. 5785 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 5786 *DAG.getContext()); 5787 5788 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 5789 nullptr); 5790 5791 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 5792 report_fatal_error("GHC functions return void only"); 5793 5794 SDValue Glue; 5795 SmallVector<SDValue, 4> RetOps(1, Chain); 5796 5797 // Copy the result values into the output registers. 5798 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 5799 SDValue Val = OutVals[i]; 5800 CCValAssign &VA = RVLocs[i]; 5801 assert(VA.isRegLoc() && "Can only return in registers!"); 5802 5803 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 5804 // Handle returning f64 on RV32D with a soft float ABI. 5805 assert(VA.isRegLoc() && "Expected return via registers"); 5806 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 5807 DAG.getVTList(MVT::i32, MVT::i32), Val); 5808 SDValue Lo = SplitF64.getValue(0); 5809 SDValue Hi = SplitF64.getValue(1); 5810 Register RegLo = VA.getLocReg(); 5811 assert(RegLo < RISCV::X31 && "Invalid register pair"); 5812 Register RegHi = RegLo + 1; 5813 5814 if (STI.isRegisterReservedByUser(RegLo) || 5815 STI.isRegisterReservedByUser(RegHi)) 5816 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5817 MF.getFunction(), 5818 "Return value register required, but has been reserved."}); 5819 5820 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 5821 Glue = Chain.getValue(1); 5822 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 5823 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 5824 Glue = Chain.getValue(1); 5825 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 5826 } else { 5827 // Handle a 'normal' return. 5828 Val = convertValVTToLocVT(DAG, Val, VA, DL); 5829 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 5830 5831 if (STI.isRegisterReservedByUser(VA.getLocReg())) 5832 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 5833 MF.getFunction(), 5834 "Return value register required, but has been reserved."}); 5835 5836 // Guarantee that all emitted copies are stuck together. 5837 Glue = Chain.getValue(1); 5838 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5839 } 5840 } 5841 5842 RetOps[0] = Chain; // Update chain. 5843 5844 // Add the glue node if we have it. 5845 if (Glue.getNode()) { 5846 RetOps.push_back(Glue); 5847 } 5848 5849 // Interrupt service routines use different return instructions. 5850 const Function &Func = DAG.getMachineFunction().getFunction(); 5851 if (Func.hasFnAttribute("interrupt")) { 5852 if (!Func.getReturnType()->isVoidTy()) 5853 report_fatal_error( 5854 "Functions with the interrupt attribute must have void return type!"); 5855 5856 MachineFunction &MF = DAG.getMachineFunction(); 5857 StringRef Kind = 5858 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 5859 5860 unsigned RetOpc; 5861 if (Kind == "user") 5862 RetOpc = RISCVISD::URET_FLAG; 5863 else if (Kind == "supervisor") 5864 RetOpc = RISCVISD::SRET_FLAG; 5865 else 5866 RetOpc = RISCVISD::MRET_FLAG; 5867 5868 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 5869 } 5870 5871 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 5872 } 5873 5874 void RISCVTargetLowering::validateCCReservedRegs( 5875 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 5876 MachineFunction &MF) const { 5877 const Function &F = MF.getFunction(); 5878 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 5879 5880 if (llvm::any_of(Regs, [&STI](auto Reg) { 5881 return STI.isRegisterReservedByUser(Reg.first); 5882 })) 5883 F.getContext().diagnose(DiagnosticInfoUnsupported{ 5884 F, "Argument register required, but has been reserved."}); 5885 } 5886 5887 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 5888 return CI->isTailCall(); 5889 } 5890 5891 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 5892 #define NODE_NAME_CASE(NODE) \ 5893 case RISCVISD::NODE: \ 5894 return "RISCVISD::" #NODE; 5895 // clang-format off 5896 switch ((RISCVISD::NodeType)Opcode) { 5897 case RISCVISD::FIRST_NUMBER: 5898 break; 5899 NODE_NAME_CASE(RET_FLAG) 5900 NODE_NAME_CASE(URET_FLAG) 5901 NODE_NAME_CASE(SRET_FLAG) 5902 NODE_NAME_CASE(MRET_FLAG) 5903 NODE_NAME_CASE(CALL) 5904 NODE_NAME_CASE(SELECT_CC) 5905 NODE_NAME_CASE(BuildPairF64) 5906 NODE_NAME_CASE(SplitF64) 5907 NODE_NAME_CASE(TAIL) 5908 NODE_NAME_CASE(SLLW) 5909 NODE_NAME_CASE(SRAW) 5910 NODE_NAME_CASE(SRLW) 5911 NODE_NAME_CASE(DIVW) 5912 NODE_NAME_CASE(DIVUW) 5913 NODE_NAME_CASE(REMUW) 5914 NODE_NAME_CASE(ROLW) 5915 NODE_NAME_CASE(RORW) 5916 NODE_NAME_CASE(FSLW) 5917 NODE_NAME_CASE(FSRW) 5918 NODE_NAME_CASE(FSL) 5919 NODE_NAME_CASE(FSR) 5920 NODE_NAME_CASE(FMV_H_X) 5921 NODE_NAME_CASE(FMV_X_ANYEXTH) 5922 NODE_NAME_CASE(FMV_W_X_RV64) 5923 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 5924 NODE_NAME_CASE(READ_CYCLE_WIDE) 5925 NODE_NAME_CASE(GREVI) 5926 NODE_NAME_CASE(GREVIW) 5927 NODE_NAME_CASE(GORCI) 5928 NODE_NAME_CASE(GORCIW) 5929 NODE_NAME_CASE(SHFLI) 5930 NODE_NAME_CASE(VMV_V_X_VL) 5931 NODE_NAME_CASE(VFMV_V_F_VL) 5932 NODE_NAME_CASE(VMV_X_S) 5933 NODE_NAME_CASE(VMV_S_XF_VL) 5934 NODE_NAME_CASE(SPLAT_VECTOR_I64) 5935 NODE_NAME_CASE(READ_VLENB) 5936 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 5937 NODE_NAME_CASE(VLEFF) 5938 NODE_NAME_CASE(VLEFF_MASK) 5939 NODE_NAME_CASE(VSLIDEUP_VL) 5940 NODE_NAME_CASE(VSLIDEDOWN_VL) 5941 NODE_NAME_CASE(VID_VL) 5942 NODE_NAME_CASE(VFNCVT_ROD_VL) 5943 NODE_NAME_CASE(VECREDUCE_ADD_VL) 5944 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 5945 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 5946 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 5947 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 5948 NODE_NAME_CASE(VECREDUCE_AND_VL) 5949 NODE_NAME_CASE(VECREDUCE_OR_VL) 5950 NODE_NAME_CASE(VECREDUCE_XOR_VL) 5951 NODE_NAME_CASE(VECREDUCE_FADD_VL) 5952 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 5953 NODE_NAME_CASE(ADD_VL) 5954 NODE_NAME_CASE(AND_VL) 5955 NODE_NAME_CASE(MUL_VL) 5956 NODE_NAME_CASE(OR_VL) 5957 NODE_NAME_CASE(SDIV_VL) 5958 NODE_NAME_CASE(SHL_VL) 5959 NODE_NAME_CASE(SREM_VL) 5960 NODE_NAME_CASE(SRA_VL) 5961 NODE_NAME_CASE(SRL_VL) 5962 NODE_NAME_CASE(SUB_VL) 5963 NODE_NAME_CASE(UDIV_VL) 5964 NODE_NAME_CASE(UREM_VL) 5965 NODE_NAME_CASE(XOR_VL) 5966 NODE_NAME_CASE(FADD_VL) 5967 NODE_NAME_CASE(FSUB_VL) 5968 NODE_NAME_CASE(FMUL_VL) 5969 NODE_NAME_CASE(FDIV_VL) 5970 NODE_NAME_CASE(FNEG_VL) 5971 NODE_NAME_CASE(FABS_VL) 5972 NODE_NAME_CASE(FSQRT_VL) 5973 NODE_NAME_CASE(FMA_VL) 5974 NODE_NAME_CASE(SMIN_VL) 5975 NODE_NAME_CASE(SMAX_VL) 5976 NODE_NAME_CASE(UMIN_VL) 5977 NODE_NAME_CASE(UMAX_VL) 5978 NODE_NAME_CASE(MULHS_VL) 5979 NODE_NAME_CASE(MULHU_VL) 5980 NODE_NAME_CASE(FP_TO_SINT_VL) 5981 NODE_NAME_CASE(FP_TO_UINT_VL) 5982 NODE_NAME_CASE(SINT_TO_FP_VL) 5983 NODE_NAME_CASE(UINT_TO_FP_VL) 5984 NODE_NAME_CASE(FP_EXTEND_VL) 5985 NODE_NAME_CASE(FP_ROUND_VL) 5986 NODE_NAME_CASE(SETCC_VL) 5987 NODE_NAME_CASE(VSELECT_VL) 5988 NODE_NAME_CASE(VMAND_VL) 5989 NODE_NAME_CASE(VMOR_VL) 5990 NODE_NAME_CASE(VMXOR_VL) 5991 NODE_NAME_CASE(VMCLR_VL) 5992 NODE_NAME_CASE(VMSET_VL) 5993 NODE_NAME_CASE(VRGATHER_VX_VL) 5994 NODE_NAME_CASE(VRGATHER_VV_VL) 5995 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 5996 NODE_NAME_CASE(VSEXT_VL) 5997 NODE_NAME_CASE(VZEXT_VL) 5998 NODE_NAME_CASE(VLE_VL) 5999 NODE_NAME_CASE(VSE_VL) 6000 } 6001 // clang-format on 6002 return nullptr; 6003 #undef NODE_NAME_CASE 6004 } 6005 6006 /// getConstraintType - Given a constraint letter, return the type of 6007 /// constraint it is for this target. 6008 RISCVTargetLowering::ConstraintType 6009 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 6010 if (Constraint.size() == 1) { 6011 switch (Constraint[0]) { 6012 default: 6013 break; 6014 case 'f': 6015 return C_RegisterClass; 6016 case 'I': 6017 case 'J': 6018 case 'K': 6019 return C_Immediate; 6020 case 'A': 6021 return C_Memory; 6022 } 6023 } 6024 return TargetLowering::getConstraintType(Constraint); 6025 } 6026 6027 std::pair<unsigned, const TargetRegisterClass *> 6028 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 6029 StringRef Constraint, 6030 MVT VT) const { 6031 // First, see if this is a constraint that directly corresponds to a 6032 // RISCV register class. 6033 if (Constraint.size() == 1) { 6034 switch (Constraint[0]) { 6035 case 'r': 6036 return std::make_pair(0U, &RISCV::GPRRegClass); 6037 case 'f': 6038 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 6039 return std::make_pair(0U, &RISCV::FPR16RegClass); 6040 if (Subtarget.hasStdExtF() && VT == MVT::f32) 6041 return std::make_pair(0U, &RISCV::FPR32RegClass); 6042 if (Subtarget.hasStdExtD() && VT == MVT::f64) 6043 return std::make_pair(0U, &RISCV::FPR64RegClass); 6044 break; 6045 default: 6046 break; 6047 } 6048 } 6049 6050 // Clang will correctly decode the usage of register name aliases into their 6051 // official names. However, other frontends like `rustc` do not. This allows 6052 // users of these frontends to use the ABI names for registers in LLVM-style 6053 // register constraints. 6054 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 6055 .Case("{zero}", RISCV::X0) 6056 .Case("{ra}", RISCV::X1) 6057 .Case("{sp}", RISCV::X2) 6058 .Case("{gp}", RISCV::X3) 6059 .Case("{tp}", RISCV::X4) 6060 .Case("{t0}", RISCV::X5) 6061 .Case("{t1}", RISCV::X6) 6062 .Case("{t2}", RISCV::X7) 6063 .Cases("{s0}", "{fp}", RISCV::X8) 6064 .Case("{s1}", RISCV::X9) 6065 .Case("{a0}", RISCV::X10) 6066 .Case("{a1}", RISCV::X11) 6067 .Case("{a2}", RISCV::X12) 6068 .Case("{a3}", RISCV::X13) 6069 .Case("{a4}", RISCV::X14) 6070 .Case("{a5}", RISCV::X15) 6071 .Case("{a6}", RISCV::X16) 6072 .Case("{a7}", RISCV::X17) 6073 .Case("{s2}", RISCV::X18) 6074 .Case("{s3}", RISCV::X19) 6075 .Case("{s4}", RISCV::X20) 6076 .Case("{s5}", RISCV::X21) 6077 .Case("{s6}", RISCV::X22) 6078 .Case("{s7}", RISCV::X23) 6079 .Case("{s8}", RISCV::X24) 6080 .Case("{s9}", RISCV::X25) 6081 .Case("{s10}", RISCV::X26) 6082 .Case("{s11}", RISCV::X27) 6083 .Case("{t3}", RISCV::X28) 6084 .Case("{t4}", RISCV::X29) 6085 .Case("{t5}", RISCV::X30) 6086 .Case("{t6}", RISCV::X31) 6087 .Default(RISCV::NoRegister); 6088 if (XRegFromAlias != RISCV::NoRegister) 6089 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 6090 6091 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 6092 // TableGen record rather than the AsmName to choose registers for InlineAsm 6093 // constraints, plus we want to match those names to the widest floating point 6094 // register type available, manually select floating point registers here. 6095 // 6096 // The second case is the ABI name of the register, so that frontends can also 6097 // use the ABI names in register constraint lists. 6098 if (Subtarget.hasStdExtF()) { 6099 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 6100 .Cases("{f0}", "{ft0}", RISCV::F0_F) 6101 .Cases("{f1}", "{ft1}", RISCV::F1_F) 6102 .Cases("{f2}", "{ft2}", RISCV::F2_F) 6103 .Cases("{f3}", "{ft3}", RISCV::F3_F) 6104 .Cases("{f4}", "{ft4}", RISCV::F4_F) 6105 .Cases("{f5}", "{ft5}", RISCV::F5_F) 6106 .Cases("{f6}", "{ft6}", RISCV::F6_F) 6107 .Cases("{f7}", "{ft7}", RISCV::F7_F) 6108 .Cases("{f8}", "{fs0}", RISCV::F8_F) 6109 .Cases("{f9}", "{fs1}", RISCV::F9_F) 6110 .Cases("{f10}", "{fa0}", RISCV::F10_F) 6111 .Cases("{f11}", "{fa1}", RISCV::F11_F) 6112 .Cases("{f12}", "{fa2}", RISCV::F12_F) 6113 .Cases("{f13}", "{fa3}", RISCV::F13_F) 6114 .Cases("{f14}", "{fa4}", RISCV::F14_F) 6115 .Cases("{f15}", "{fa5}", RISCV::F15_F) 6116 .Cases("{f16}", "{fa6}", RISCV::F16_F) 6117 .Cases("{f17}", "{fa7}", RISCV::F17_F) 6118 .Cases("{f18}", "{fs2}", RISCV::F18_F) 6119 .Cases("{f19}", "{fs3}", RISCV::F19_F) 6120 .Cases("{f20}", "{fs4}", RISCV::F20_F) 6121 .Cases("{f21}", "{fs5}", RISCV::F21_F) 6122 .Cases("{f22}", "{fs6}", RISCV::F22_F) 6123 .Cases("{f23}", "{fs7}", RISCV::F23_F) 6124 .Cases("{f24}", "{fs8}", RISCV::F24_F) 6125 .Cases("{f25}", "{fs9}", RISCV::F25_F) 6126 .Cases("{f26}", "{fs10}", RISCV::F26_F) 6127 .Cases("{f27}", "{fs11}", RISCV::F27_F) 6128 .Cases("{f28}", "{ft8}", RISCV::F28_F) 6129 .Cases("{f29}", "{ft9}", RISCV::F29_F) 6130 .Cases("{f30}", "{ft10}", RISCV::F30_F) 6131 .Cases("{f31}", "{ft11}", RISCV::F31_F) 6132 .Default(RISCV::NoRegister); 6133 if (FReg != RISCV::NoRegister) { 6134 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 6135 if (Subtarget.hasStdExtD()) { 6136 unsigned RegNo = FReg - RISCV::F0_F; 6137 unsigned DReg = RISCV::F0_D + RegNo; 6138 return std::make_pair(DReg, &RISCV::FPR64RegClass); 6139 } 6140 return std::make_pair(FReg, &RISCV::FPR32RegClass); 6141 } 6142 } 6143 6144 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 6145 } 6146 6147 unsigned 6148 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 6149 // Currently only support length 1 constraints. 6150 if (ConstraintCode.size() == 1) { 6151 switch (ConstraintCode[0]) { 6152 case 'A': 6153 return InlineAsm::Constraint_A; 6154 default: 6155 break; 6156 } 6157 } 6158 6159 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 6160 } 6161 6162 void RISCVTargetLowering::LowerAsmOperandForConstraint( 6163 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 6164 SelectionDAG &DAG) const { 6165 // Currently only support length 1 constraints. 6166 if (Constraint.length() == 1) { 6167 switch (Constraint[0]) { 6168 case 'I': 6169 // Validate & create a 12-bit signed immediate operand. 6170 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6171 uint64_t CVal = C->getSExtValue(); 6172 if (isInt<12>(CVal)) 6173 Ops.push_back( 6174 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 6175 } 6176 return; 6177 case 'J': 6178 // Validate & create an integer zero operand. 6179 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 6180 if (C->getZExtValue() == 0) 6181 Ops.push_back( 6182 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 6183 return; 6184 case 'K': 6185 // Validate & create a 5-bit unsigned immediate operand. 6186 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6187 uint64_t CVal = C->getZExtValue(); 6188 if (isUInt<5>(CVal)) 6189 Ops.push_back( 6190 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 6191 } 6192 return; 6193 default: 6194 break; 6195 } 6196 } 6197 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 6198 } 6199 6200 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 6201 Instruction *Inst, 6202 AtomicOrdering Ord) const { 6203 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 6204 return Builder.CreateFence(Ord); 6205 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 6206 return Builder.CreateFence(AtomicOrdering::Release); 6207 return nullptr; 6208 } 6209 6210 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 6211 Instruction *Inst, 6212 AtomicOrdering Ord) const { 6213 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 6214 return Builder.CreateFence(AtomicOrdering::Acquire); 6215 return nullptr; 6216 } 6217 6218 TargetLowering::AtomicExpansionKind 6219 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 6220 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 6221 // point operations can't be used in an lr/sc sequence without breaking the 6222 // forward-progress guarantee. 6223 if (AI->isFloatingPointOperation()) 6224 return AtomicExpansionKind::CmpXChg; 6225 6226 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 6227 if (Size == 8 || Size == 16) 6228 return AtomicExpansionKind::MaskedIntrinsic; 6229 return AtomicExpansionKind::None; 6230 } 6231 6232 static Intrinsic::ID 6233 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 6234 if (XLen == 32) { 6235 switch (BinOp) { 6236 default: 6237 llvm_unreachable("Unexpected AtomicRMW BinOp"); 6238 case AtomicRMWInst::Xchg: 6239 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 6240 case AtomicRMWInst::Add: 6241 return Intrinsic::riscv_masked_atomicrmw_add_i32; 6242 case AtomicRMWInst::Sub: 6243 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 6244 case AtomicRMWInst::Nand: 6245 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 6246 case AtomicRMWInst::Max: 6247 return Intrinsic::riscv_masked_atomicrmw_max_i32; 6248 case AtomicRMWInst::Min: 6249 return Intrinsic::riscv_masked_atomicrmw_min_i32; 6250 case AtomicRMWInst::UMax: 6251 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 6252 case AtomicRMWInst::UMin: 6253 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 6254 } 6255 } 6256 6257 if (XLen == 64) { 6258 switch (BinOp) { 6259 default: 6260 llvm_unreachable("Unexpected AtomicRMW BinOp"); 6261 case AtomicRMWInst::Xchg: 6262 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 6263 case AtomicRMWInst::Add: 6264 return Intrinsic::riscv_masked_atomicrmw_add_i64; 6265 case AtomicRMWInst::Sub: 6266 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 6267 case AtomicRMWInst::Nand: 6268 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 6269 case AtomicRMWInst::Max: 6270 return Intrinsic::riscv_masked_atomicrmw_max_i64; 6271 case AtomicRMWInst::Min: 6272 return Intrinsic::riscv_masked_atomicrmw_min_i64; 6273 case AtomicRMWInst::UMax: 6274 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 6275 case AtomicRMWInst::UMin: 6276 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 6277 } 6278 } 6279 6280 llvm_unreachable("Unexpected XLen\n"); 6281 } 6282 6283 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 6284 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 6285 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 6286 unsigned XLen = Subtarget.getXLen(); 6287 Value *Ordering = 6288 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 6289 Type *Tys[] = {AlignedAddr->getType()}; 6290 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 6291 AI->getModule(), 6292 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 6293 6294 if (XLen == 64) { 6295 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 6296 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 6297 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 6298 } 6299 6300 Value *Result; 6301 6302 // Must pass the shift amount needed to sign extend the loaded value prior 6303 // to performing a signed comparison for min/max. ShiftAmt is the number of 6304 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 6305 // is the number of bits to left+right shift the value in order to 6306 // sign-extend. 6307 if (AI->getOperation() == AtomicRMWInst::Min || 6308 AI->getOperation() == AtomicRMWInst::Max) { 6309 const DataLayout &DL = AI->getModule()->getDataLayout(); 6310 unsigned ValWidth = 6311 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 6312 Value *SextShamt = 6313 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 6314 Result = Builder.CreateCall(LrwOpScwLoop, 6315 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 6316 } else { 6317 Result = 6318 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 6319 } 6320 6321 if (XLen == 64) 6322 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 6323 return Result; 6324 } 6325 6326 TargetLowering::AtomicExpansionKind 6327 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 6328 AtomicCmpXchgInst *CI) const { 6329 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 6330 if (Size == 8 || Size == 16) 6331 return AtomicExpansionKind::MaskedIntrinsic; 6332 return AtomicExpansionKind::None; 6333 } 6334 6335 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 6336 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 6337 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 6338 unsigned XLen = Subtarget.getXLen(); 6339 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 6340 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 6341 if (XLen == 64) { 6342 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 6343 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 6344 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 6345 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 6346 } 6347 Type *Tys[] = {AlignedAddr->getType()}; 6348 Function *MaskedCmpXchg = 6349 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 6350 Value *Result = Builder.CreateCall( 6351 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 6352 if (XLen == 64) 6353 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 6354 return Result; 6355 } 6356 6357 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 6358 EVT VT) const { 6359 VT = VT.getScalarType(); 6360 6361 if (!VT.isSimple()) 6362 return false; 6363 6364 switch (VT.getSimpleVT().SimpleTy) { 6365 case MVT::f16: 6366 return Subtarget.hasStdExtZfh(); 6367 case MVT::f32: 6368 return Subtarget.hasStdExtF(); 6369 case MVT::f64: 6370 return Subtarget.hasStdExtD(); 6371 default: 6372 break; 6373 } 6374 6375 return false; 6376 } 6377 6378 Register RISCVTargetLowering::getExceptionPointerRegister( 6379 const Constant *PersonalityFn) const { 6380 return RISCV::X10; 6381 } 6382 6383 Register RISCVTargetLowering::getExceptionSelectorRegister( 6384 const Constant *PersonalityFn) const { 6385 return RISCV::X11; 6386 } 6387 6388 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 6389 // Return false to suppress the unnecessary extensions if the LibCall 6390 // arguments or return value is f32 type for LP64 ABI. 6391 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 6392 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 6393 return false; 6394 6395 return true; 6396 } 6397 6398 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 6399 if (Subtarget.is64Bit() && Type == MVT::i32) 6400 return true; 6401 6402 return IsSigned; 6403 } 6404 6405 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 6406 SDValue C) const { 6407 // Check integral scalar types. 6408 if (VT.isScalarInteger()) { 6409 // Omit the optimization if the sub target has the M extension and the data 6410 // size exceeds XLen. 6411 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 6412 return false; 6413 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 6414 // Break the MUL to a SLLI and an ADD/SUB. 6415 const APInt &Imm = ConstNode->getAPIntValue(); 6416 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 6417 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 6418 return true; 6419 // Omit the following optimization if the sub target has the M extension 6420 // and the data size >= XLen. 6421 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 6422 return false; 6423 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 6424 // a pair of LUI/ADDI. 6425 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 6426 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 6427 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 6428 (1 - ImmS).isPowerOf2()) 6429 return true; 6430 } 6431 } 6432 } 6433 6434 return false; 6435 } 6436 6437 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 6438 if (!Subtarget.useRVVForFixedLengthVectors()) 6439 return false; 6440 6441 if (!VT.isFixedLengthVector()) 6442 return false; 6443 6444 // Don't use RVV for vectors we cannot scalarize if required. 6445 switch (VT.getVectorElementType().SimpleTy) { 6446 // i1 is supported but has different rules. 6447 default: 6448 return false; 6449 case MVT::i1: 6450 // Masks can only use a single register. 6451 if (VT.getVectorNumElements() > Subtarget.getMinRVVVectorSizeInBits()) 6452 return false; 6453 break; 6454 case MVT::i8: 6455 case MVT::i16: 6456 case MVT::i32: 6457 case MVT::i64: 6458 break; 6459 case MVT::f16: 6460 if (!Subtarget.hasStdExtZfh()) 6461 return false; 6462 break; 6463 case MVT::f32: 6464 if (!Subtarget.hasStdExtF()) 6465 return false; 6466 break; 6467 case MVT::f64: 6468 if (!Subtarget.hasStdExtD()) 6469 return false; 6470 break; 6471 } 6472 6473 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 6474 // Don't use RVV for types that don't fit. 6475 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 6476 return false; 6477 6478 // TODO: Perhaps an artificial restriction, but worth having whilst getting 6479 // the base fixed length RVV support in place. 6480 if (!VT.isPow2VectorType()) 6481 return false; 6482 6483 return true; 6484 } 6485 6486 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 6487 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 6488 bool *Fast) const { 6489 if (!VT.isScalableVector()) 6490 return false; 6491 6492 EVT ElemVT = VT.getVectorElementType(); 6493 if (Alignment >= ElemVT.getStoreSize()) { 6494 if (Fast) 6495 *Fast = true; 6496 return true; 6497 } 6498 6499 return false; 6500 } 6501 6502 #define GET_REGISTER_MATCHER 6503 #include "RISCVGenAsmMatcher.inc" 6504 6505 Register 6506 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 6507 const MachineFunction &MF) const { 6508 Register Reg = MatchRegisterAltName(RegName); 6509 if (Reg == RISCV::NoRegister) 6510 Reg = MatchRegisterName(RegName); 6511 if (Reg == RISCV::NoRegister) 6512 report_fatal_error( 6513 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 6514 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 6515 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 6516 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 6517 StringRef(RegName) + "\".")); 6518 return Reg; 6519 } 6520 6521 namespace llvm { 6522 namespace RISCVVIntrinsicsTable { 6523 6524 #define GET_RISCVVIntrinsicsTable_IMPL 6525 #include "RISCVGenSearchableTables.inc" 6526 6527 } // namespace RISCVVIntrinsicsTable 6528 6529 } // namespace llvm 6530