1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 185 186 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 187 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 188 189 setOperationAction(ISD::VASTART, MVT::Other, Custom); 190 setOperationAction(ISD::VAARG, MVT::Other, Expand); 191 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 192 setOperationAction(ISD::VAEND, MVT::Other, Expand); 193 194 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 195 if (!Subtarget.hasStdExtZbb()) { 196 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 198 } 199 200 if (Subtarget.is64Bit()) { 201 setOperationAction(ISD::ADD, MVT::i32, Custom); 202 setOperationAction(ISD::SUB, MVT::i32, Custom); 203 setOperationAction(ISD::SHL, MVT::i32, Custom); 204 setOperationAction(ISD::SRA, MVT::i32, Custom); 205 setOperationAction(ISD::SRL, MVT::i32, Custom); 206 } 207 208 if (!Subtarget.hasStdExtM()) { 209 setOperationAction(ISD::MUL, XLenVT, Expand); 210 setOperationAction(ISD::MULHS, XLenVT, Expand); 211 setOperationAction(ISD::MULHU, XLenVT, Expand); 212 setOperationAction(ISD::SDIV, XLenVT, Expand); 213 setOperationAction(ISD::UDIV, XLenVT, Expand); 214 setOperationAction(ISD::SREM, XLenVT, Expand); 215 setOperationAction(ISD::UREM, XLenVT, Expand); 216 } 217 218 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 219 setOperationAction(ISD::MUL, MVT::i32, Custom); 220 221 setOperationAction(ISD::SDIV, MVT::i8, Custom); 222 setOperationAction(ISD::UDIV, MVT::i8, Custom); 223 setOperationAction(ISD::UREM, MVT::i8, Custom); 224 setOperationAction(ISD::SDIV, MVT::i16, Custom); 225 setOperationAction(ISD::UDIV, MVT::i16, Custom); 226 setOperationAction(ISD::UREM, MVT::i16, Custom); 227 setOperationAction(ISD::SDIV, MVT::i32, Custom); 228 setOperationAction(ISD::UDIV, MVT::i32, Custom); 229 setOperationAction(ISD::UREM, MVT::i32, Custom); 230 } 231 232 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 233 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 234 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 235 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 236 237 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 238 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 239 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 240 241 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 242 if (Subtarget.is64Bit()) { 243 setOperationAction(ISD::ROTL, MVT::i32, Custom); 244 setOperationAction(ISD::ROTR, MVT::i32, Custom); 245 } 246 } else { 247 setOperationAction(ISD::ROTL, XLenVT, Expand); 248 setOperationAction(ISD::ROTR, XLenVT, Expand); 249 } 250 251 if (Subtarget.hasStdExtZbp()) { 252 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 253 // more combining. 254 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 255 setOperationAction(ISD::BSWAP, XLenVT, Custom); 256 257 if (Subtarget.is64Bit()) { 258 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 259 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 260 } 261 } else { 262 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 263 // pattern match it directly in isel. 264 setOperationAction(ISD::BSWAP, XLenVT, 265 Subtarget.hasStdExtZbb() ? Legal : Expand); 266 } 267 268 if (Subtarget.hasStdExtZbb()) { 269 setOperationAction(ISD::SMIN, XLenVT, Legal); 270 setOperationAction(ISD::SMAX, XLenVT, Legal); 271 setOperationAction(ISD::UMIN, XLenVT, Legal); 272 setOperationAction(ISD::UMAX, XLenVT, Legal); 273 } else { 274 setOperationAction(ISD::CTTZ, XLenVT, Expand); 275 setOperationAction(ISD::CTLZ, XLenVT, Expand); 276 setOperationAction(ISD::CTPOP, XLenVT, Expand); 277 } 278 279 if (Subtarget.hasStdExtZbt()) { 280 setOperationAction(ISD::FSHL, XLenVT, Custom); 281 setOperationAction(ISD::FSHR, XLenVT, Custom); 282 setOperationAction(ISD::SELECT, XLenVT, Legal); 283 284 if (Subtarget.is64Bit()) { 285 setOperationAction(ISD::FSHL, MVT::i32, Custom); 286 setOperationAction(ISD::FSHR, MVT::i32, Custom); 287 } 288 } else { 289 setOperationAction(ISD::SELECT, XLenVT, Custom); 290 } 291 292 ISD::CondCode FPCCToExpand[] = { 293 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 294 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 295 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 296 297 ISD::NodeType FPOpToExpand[] = { 298 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 299 ISD::FP_TO_FP16}; 300 301 if (Subtarget.hasStdExtZfh()) 302 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 303 304 if (Subtarget.hasStdExtZfh()) { 305 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 306 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 307 for (auto CC : FPCCToExpand) 308 setCondCodeAction(CC, MVT::f16, Expand); 309 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 310 setOperationAction(ISD::SELECT, MVT::f16, Custom); 311 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 312 for (auto Op : FPOpToExpand) 313 setOperationAction(Op, MVT::f16, Expand); 314 } 315 316 if (Subtarget.hasStdExtF()) { 317 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 318 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 319 for (auto CC : FPCCToExpand) 320 setCondCodeAction(CC, MVT::f32, Expand); 321 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 322 setOperationAction(ISD::SELECT, MVT::f32, Custom); 323 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 324 for (auto Op : FPOpToExpand) 325 setOperationAction(Op, MVT::f32, Expand); 326 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 327 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 328 } 329 330 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 331 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 332 333 if (Subtarget.hasStdExtD()) { 334 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 335 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 336 for (auto CC : FPCCToExpand) 337 setCondCodeAction(CC, MVT::f64, Expand); 338 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 339 setOperationAction(ISD::SELECT, MVT::f64, Custom); 340 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 341 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 342 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 343 for (auto Op : FPOpToExpand) 344 setOperationAction(Op, MVT::f64, Expand); 345 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 346 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 347 } 348 349 if (Subtarget.is64Bit()) { 350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 351 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 352 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 353 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 354 } 355 356 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 357 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 358 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 359 setOperationAction(ISD::JumpTable, XLenVT, Custom); 360 361 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 362 363 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 364 // Unfortunately this can't be determined just from the ISA naming string. 365 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 366 Subtarget.is64Bit() ? Legal : Custom); 367 368 setOperationAction(ISD::TRAP, MVT::Other, Legal); 369 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 370 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 371 372 if (Subtarget.hasStdExtA()) { 373 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 374 setMinCmpXchgSizeInBits(32); 375 } else { 376 setMaxAtomicSizeInBitsSupported(0); 377 } 378 379 setBooleanContents(ZeroOrOneBooleanContent); 380 381 if (Subtarget.hasStdExtV()) { 382 setBooleanVectorContents(ZeroOrOneBooleanContent); 383 384 setOperationAction(ISD::VSCALE, XLenVT, Custom); 385 386 // RVV intrinsics may have illegal operands. 387 // We also need to custom legalize vmv.x.s. 388 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 389 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 390 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 391 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 392 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 393 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 394 395 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 396 397 if (Subtarget.is64Bit()) { 398 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 399 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 400 } else { 401 // We must custom-lower certain vXi64 operations on RV32 due to the vector 402 // element type being illegal. 403 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); 404 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 405 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 406 407 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 408 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 409 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 410 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 411 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 412 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 413 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 414 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 415 } 416 417 for (MVT VT : BoolVecVTs) { 418 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 419 420 // Mask VTs are custom-expanded into a series of standard nodes 421 setOperationAction(ISD::TRUNCATE, VT, Custom); 422 } 423 424 for (MVT VT : IntVecVTs) { 425 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 426 427 setOperationAction(ISD::SMIN, VT, Legal); 428 setOperationAction(ISD::SMAX, VT, Legal); 429 setOperationAction(ISD::UMIN, VT, Legal); 430 setOperationAction(ISD::UMAX, VT, Legal); 431 432 setOperationAction(ISD::ROTL, VT, Expand); 433 setOperationAction(ISD::ROTR, VT, Expand); 434 435 // Custom-lower extensions and truncations from/to mask types. 436 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 437 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 438 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 439 440 // RVV has native int->float & float->int conversions where the 441 // element type sizes are within one power-of-two of each other. Any 442 // wider distances between type sizes have to be lowered as sequences 443 // which progressively narrow the gap in stages. 444 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 445 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 446 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 447 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 448 449 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR" 450 // nodes which truncate by one power of two at a time. 451 setOperationAction(ISD::TRUNCATE, VT, Custom); 452 453 // Custom-lower insert/extract operations to simplify patterns. 454 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 455 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 456 457 // Custom-lower reduction operations to set up the corresponding custom 458 // nodes' operands. 459 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 460 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 461 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 462 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 463 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 464 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 465 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 466 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 467 } 468 469 // Expand various CCs to best match the RVV ISA, which natively supports UNE 470 // but no other unordered comparisons, and supports all ordered comparisons 471 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 472 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 473 // and we pattern-match those back to the "original", swapping operands once 474 // more. This way we catch both operations and both "vf" and "fv" forms with 475 // fewer patterns. 476 ISD::CondCode VFPCCToExpand[] = { 477 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 478 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 479 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 480 }; 481 482 // Sets common operation actions on RVV floating-point vector types. 483 const auto SetCommonVFPActions = [&](MVT VT) { 484 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 485 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 486 // sizes are within one power-of-two of each other. Therefore conversions 487 // between vXf16 and vXf64 must be lowered as sequences which convert via 488 // vXf32. 489 setOperationAction(ISD::FP_ROUND, VT, Custom); 490 setOperationAction(ISD::FP_EXTEND, VT, Custom); 491 // Custom-lower insert/extract operations to simplify patterns. 492 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 493 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 494 // Expand various condition codes (explained above). 495 for (auto CC : VFPCCToExpand) 496 setCondCodeAction(CC, VT, Expand); 497 498 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 499 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 500 }; 501 502 if (Subtarget.hasStdExtZfh()) 503 for (MVT VT : F16VecVTs) 504 SetCommonVFPActions(VT); 505 506 if (Subtarget.hasStdExtF()) 507 for (MVT VT : F32VecVTs) 508 SetCommonVFPActions(VT); 509 510 if (Subtarget.hasStdExtD()) 511 for (MVT VT : F64VecVTs) 512 SetCommonVFPActions(VT); 513 514 if (Subtarget.useRVVForFixedLengthVectors()) { 515 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 516 if (!useRVVForFixedLengthVectorVT(VT)) 517 continue; 518 519 // By default everything must be expanded. 520 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 521 setOperationAction(Op, VT, Expand); 522 523 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 524 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 525 526 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 527 528 setOperationAction(ISD::LOAD, VT, Custom); 529 setOperationAction(ISD::STORE, VT, Custom); 530 setOperationAction(ISD::ADD, VT, Custom); 531 setOperationAction(ISD::MUL, VT, Custom); 532 setOperationAction(ISD::SUB, VT, Custom); 533 setOperationAction(ISD::AND, VT, Custom); 534 setOperationAction(ISD::OR, VT, Custom); 535 setOperationAction(ISD::XOR, VT, Custom); 536 setOperationAction(ISD::SDIV, VT, Custom); 537 setOperationAction(ISD::SREM, VT, Custom); 538 setOperationAction(ISD::UDIV, VT, Custom); 539 setOperationAction(ISD::UREM, VT, Custom); 540 setOperationAction(ISD::SHL, VT, Custom); 541 setOperationAction(ISD::SRA, VT, Custom); 542 setOperationAction(ISD::SRL, VT, Custom); 543 } 544 545 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 546 if (!useRVVForFixedLengthVectorVT(VT)) 547 continue; 548 549 // By default everything must be expanded. 550 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 551 setOperationAction(Op, VT, Expand); 552 553 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 554 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 555 556 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 557 558 setOperationAction(ISD::LOAD, VT, Custom); 559 setOperationAction(ISD::STORE, VT, Custom); 560 setOperationAction(ISD::FADD, VT, Custom); 561 setOperationAction(ISD::FSUB, VT, Custom); 562 setOperationAction(ISD::FMUL, VT, Custom); 563 setOperationAction(ISD::FDIV, VT, Custom); 564 setOperationAction(ISD::FNEG, VT, Custom); 565 setOperationAction(ISD::FMA, VT, Custom); 566 } 567 } 568 } 569 570 // Function alignments. 571 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 572 setMinFunctionAlignment(FunctionAlignment); 573 setPrefFunctionAlignment(FunctionAlignment); 574 575 setMinimumJumpTableEntries(5); 576 577 // Jumps are expensive, compared to logic 578 setJumpIsExpensive(); 579 580 // We can use any register for comparisons 581 setHasMultipleConditionRegisters(); 582 583 setTargetDAGCombine(ISD::SETCC); 584 if (Subtarget.hasStdExtZbp()) { 585 setTargetDAGCombine(ISD::OR); 586 } 587 } 588 589 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 590 LLVMContext &Context, 591 EVT VT) const { 592 if (!VT.isVector()) 593 return getPointerTy(DL); 594 if (Subtarget.hasStdExtV() && VT.isScalableVector()) 595 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 596 return VT.changeVectorElementTypeToInteger(); 597 } 598 599 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 600 const CallInst &I, 601 MachineFunction &MF, 602 unsigned Intrinsic) const { 603 switch (Intrinsic) { 604 default: 605 return false; 606 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 607 case Intrinsic::riscv_masked_atomicrmw_add_i32: 608 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 609 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 610 case Intrinsic::riscv_masked_atomicrmw_max_i32: 611 case Intrinsic::riscv_masked_atomicrmw_min_i32: 612 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 613 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 614 case Intrinsic::riscv_masked_cmpxchg_i32: 615 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 616 Info.opc = ISD::INTRINSIC_W_CHAIN; 617 Info.memVT = MVT::getVT(PtrTy->getElementType()); 618 Info.ptrVal = I.getArgOperand(0); 619 Info.offset = 0; 620 Info.align = Align(4); 621 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 622 MachineMemOperand::MOVolatile; 623 return true; 624 } 625 } 626 627 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 628 const AddrMode &AM, Type *Ty, 629 unsigned AS, 630 Instruction *I) const { 631 // No global is ever allowed as a base. 632 if (AM.BaseGV) 633 return false; 634 635 // Require a 12-bit signed offset. 636 if (!isInt<12>(AM.BaseOffs)) 637 return false; 638 639 switch (AM.Scale) { 640 case 0: // "r+i" or just "i", depending on HasBaseReg. 641 break; 642 case 1: 643 if (!AM.HasBaseReg) // allow "r+i". 644 break; 645 return false; // disallow "r+r" or "r+r+i". 646 default: 647 return false; 648 } 649 650 return true; 651 } 652 653 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 654 return isInt<12>(Imm); 655 } 656 657 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 658 return isInt<12>(Imm); 659 } 660 661 // On RV32, 64-bit integers are split into their high and low parts and held 662 // in two different registers, so the trunc is free since the low register can 663 // just be used. 664 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 665 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 666 return false; 667 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 668 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 669 return (SrcBits == 64 && DestBits == 32); 670 } 671 672 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 673 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 674 !SrcVT.isInteger() || !DstVT.isInteger()) 675 return false; 676 unsigned SrcBits = SrcVT.getSizeInBits(); 677 unsigned DestBits = DstVT.getSizeInBits(); 678 return (SrcBits == 64 && DestBits == 32); 679 } 680 681 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 682 // Zexts are free if they can be combined with a load. 683 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 684 EVT MemVT = LD->getMemoryVT(); 685 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 686 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 687 (LD->getExtensionType() == ISD::NON_EXTLOAD || 688 LD->getExtensionType() == ISD::ZEXTLOAD)) 689 return true; 690 } 691 692 return TargetLowering::isZExtFree(Val, VT2); 693 } 694 695 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 696 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 697 } 698 699 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 700 return Subtarget.hasStdExtZbb(); 701 } 702 703 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 704 return Subtarget.hasStdExtZbb(); 705 } 706 707 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 708 bool ForCodeSize) const { 709 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 710 return false; 711 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 712 return false; 713 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 714 return false; 715 if (Imm.isNegZero()) 716 return false; 717 return Imm.isZero(); 718 } 719 720 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 721 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 722 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 723 (VT == MVT::f64 && Subtarget.hasStdExtD()); 724 } 725 726 // Changes the condition code and swaps operands if necessary, so the SetCC 727 // operation matches one of the comparisons supported directly in the RISC-V 728 // ISA. 729 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 730 switch (CC) { 731 default: 732 break; 733 case ISD::SETGT: 734 case ISD::SETLE: 735 case ISD::SETUGT: 736 case ISD::SETULE: 737 CC = ISD::getSetCCSwappedOperands(CC); 738 std::swap(LHS, RHS); 739 break; 740 } 741 } 742 743 // Return the RISC-V branch opcode that matches the given DAG integer 744 // condition code. The CondCode must be one of those supported by the RISC-V 745 // ISA (see normaliseSetCC). 746 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 747 switch (CC) { 748 default: 749 llvm_unreachable("Unsupported CondCode"); 750 case ISD::SETEQ: 751 return RISCV::BEQ; 752 case ISD::SETNE: 753 return RISCV::BNE; 754 case ISD::SETLT: 755 return RISCV::BLT; 756 case ISD::SETGE: 757 return RISCV::BGE; 758 case ISD::SETULT: 759 return RISCV::BLTU; 760 case ISD::SETUGE: 761 return RISCV::BGEU; 762 } 763 } 764 765 // Return the largest legal scalable vector type that matches VT's element type. 766 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, 767 const RISCVSubtarget &Subtarget) { 768 assert(VT.isFixedLengthVector() && 769 DAG.getTargetLoweringInfo().isTypeLegal(VT) && 770 "Expected legal fixed length vector!"); 771 772 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 773 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 774 775 switch (VT.getVectorElementType().SimpleTy) { 776 default: 777 llvm_unreachable("unexpected element type for RVV container"); 778 case MVT::i8: 779 return MVT::getScalableVectorVT(MVT::i8, LMul * 8); 780 case MVT::i16: 781 return MVT::getScalableVectorVT(MVT::i16, LMul * 4); 782 case MVT::i32: 783 return MVT::getScalableVectorVT(MVT::i32, LMul * 2); 784 case MVT::i64: 785 return MVT::getScalableVectorVT(MVT::i64, LMul); 786 case MVT::f16: 787 return MVT::getScalableVectorVT(MVT::f16, LMul * 4); 788 case MVT::f32: 789 return MVT::getScalableVectorVT(MVT::f32, LMul * 2); 790 case MVT::f64: 791 return MVT::getScalableVectorVT(MVT::f64, LMul); 792 } 793 } 794 795 // Grow V to consume an entire RVV register. 796 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 797 const RISCVSubtarget &Subtarget) { 798 assert(VT.isScalableVector() && 799 "Expected to convert into a scalable vector!"); 800 assert(V.getValueType().isFixedLengthVector() && 801 "Expected a fixed length vector operand!"); 802 SDLoc DL(V); 803 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 804 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 805 } 806 807 // Shrink V so it's just big enough to maintain a VT's worth of data. 808 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 809 const RISCVSubtarget &Subtarget) { 810 assert(VT.isFixedLengthVector() && 811 "Expected to convert into a fixed length vector!"); 812 assert(V.getValueType().isScalableVector() && 813 "Expected a scalable vector operand!"); 814 SDLoc DL(V); 815 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 816 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 817 } 818 819 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 820 const RISCVSubtarget &Subtarget) { 821 MVT VT = Op.getSimpleValueType(); 822 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 823 824 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 825 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 826 827 SDLoc DL(Op); 828 SDValue VL = 829 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 830 831 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 832 : RISCVISD::VMV_V_X_VL; 833 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 834 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 835 } 836 837 return SDValue(); 838 } 839 840 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 841 SelectionDAG &DAG) const { 842 switch (Op.getOpcode()) { 843 default: 844 report_fatal_error("unimplemented operand"); 845 case ISD::GlobalAddress: 846 return lowerGlobalAddress(Op, DAG); 847 case ISD::BlockAddress: 848 return lowerBlockAddress(Op, DAG); 849 case ISD::ConstantPool: 850 return lowerConstantPool(Op, DAG); 851 case ISD::JumpTable: 852 return lowerJumpTable(Op, DAG); 853 case ISD::GlobalTLSAddress: 854 return lowerGlobalTLSAddress(Op, DAG); 855 case ISD::SELECT: 856 return lowerSELECT(Op, DAG); 857 case ISD::VASTART: 858 return lowerVASTART(Op, DAG); 859 case ISD::FRAMEADDR: 860 return lowerFRAMEADDR(Op, DAG); 861 case ISD::RETURNADDR: 862 return lowerRETURNADDR(Op, DAG); 863 case ISD::SHL_PARTS: 864 return lowerShiftLeftParts(Op, DAG); 865 case ISD::SRA_PARTS: 866 return lowerShiftRightParts(Op, DAG, true); 867 case ISD::SRL_PARTS: 868 return lowerShiftRightParts(Op, DAG, false); 869 case ISD::BITCAST: { 870 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 871 Subtarget.hasStdExtZfh()) && 872 "Unexpected custom legalisation"); 873 SDLoc DL(Op); 874 SDValue Op0 = Op.getOperand(0); 875 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 876 if (Op0.getValueType() != MVT::i16) 877 return SDValue(); 878 SDValue NewOp0 = 879 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 880 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 881 return FPConv; 882 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 883 Subtarget.hasStdExtF()) { 884 if (Op0.getValueType() != MVT::i32) 885 return SDValue(); 886 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 887 SDValue FPConv = 888 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 889 return FPConv; 890 } 891 return SDValue(); 892 } 893 case ISD::INTRINSIC_WO_CHAIN: 894 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 895 case ISD::INTRINSIC_W_CHAIN: 896 return LowerINTRINSIC_W_CHAIN(Op, DAG); 897 case ISD::BSWAP: 898 case ISD::BITREVERSE: { 899 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 900 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 901 MVT VT = Op.getSimpleValueType(); 902 SDLoc DL(Op); 903 // Start with the maximum immediate value which is the bitwidth - 1. 904 unsigned Imm = VT.getSizeInBits() - 1; 905 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 906 if (Op.getOpcode() == ISD::BSWAP) 907 Imm &= ~0x7U; 908 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 909 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 910 } 911 case ISD::FSHL: 912 case ISD::FSHR: { 913 MVT VT = Op.getSimpleValueType(); 914 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 915 SDLoc DL(Op); 916 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 917 // use log(XLen) bits. Mask the shift amount accordingly. 918 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 919 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 920 DAG.getConstant(ShAmtWidth, DL, VT)); 921 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 922 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 923 } 924 case ISD::TRUNCATE: { 925 SDLoc DL(Op); 926 EVT VT = Op.getValueType(); 927 // Only custom-lower vector truncates 928 if (!VT.isVector()) 929 return Op; 930 931 // Truncates to mask types are handled differently 932 if (VT.getVectorElementType() == MVT::i1) 933 return lowerVectorMaskTrunc(Op, DAG); 934 935 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 936 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which 937 // truncate by one power of two at a time. 938 EVT DstEltVT = VT.getVectorElementType(); 939 940 SDValue Src = Op.getOperand(0); 941 EVT SrcVT = Src.getValueType(); 942 EVT SrcEltVT = SrcVT.getVectorElementType(); 943 944 assert(DstEltVT.bitsLT(SrcEltVT) && 945 isPowerOf2_64(DstEltVT.getSizeInBits()) && 946 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 947 "Unexpected vector truncate lowering"); 948 949 SDValue Result = Src; 950 LLVMContext &Context = *DAG.getContext(); 951 const ElementCount Count = SrcVT.getVectorElementCount(); 952 do { 953 SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2); 954 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 955 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result); 956 } while (SrcEltVT != DstEltVT); 957 958 return Result; 959 } 960 case ISD::ANY_EXTEND: 961 case ISD::ZERO_EXTEND: 962 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 963 case ISD::SIGN_EXTEND: 964 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 965 case ISD::SPLAT_VECTOR: 966 return lowerSPLATVECTOR(Op, DAG); 967 case ISD::INSERT_VECTOR_ELT: 968 return lowerINSERT_VECTOR_ELT(Op, DAG); 969 case ISD::EXTRACT_VECTOR_ELT: 970 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 971 case ISD::VSCALE: { 972 MVT VT = Op.getSimpleValueType(); 973 SDLoc DL(Op); 974 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 975 // We define our scalable vector types for lmul=1 to use a 64 bit known 976 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 977 // vscale as VLENB / 8. 978 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 979 DAG.getConstant(3, DL, VT)); 980 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 981 } 982 case ISD::FP_EXTEND: { 983 // RVV can only do fp_extend to types double the size as the source. We 984 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 985 // via f32. 986 MVT VT = Op.getSimpleValueType(); 987 MVT SrcVT = Op.getOperand(0).getSimpleValueType(); 988 // We only need to close the gap between vXf16->vXf64. 989 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 990 SrcVT.getVectorElementType() != MVT::f16) 991 return Op; 992 SDLoc DL(Op); 993 MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 994 SDValue IntermediateRound = 995 DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT); 996 return DAG.getFPExtendOrRound(IntermediateRound, DL, VT); 997 } 998 case ISD::FP_ROUND: { 999 // RVV can only do fp_round to types half the size as the source. We 1000 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1001 // conversion instruction. 1002 MVT VT = Op.getSimpleValueType(); 1003 MVT SrcVT = Op.getOperand(0).getSimpleValueType(); 1004 // We only need to close the gap between vXf64<->vXf16. 1005 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1006 SrcVT.getVectorElementType() != MVT::f64) 1007 return Op; 1008 SDLoc DL(Op); 1009 MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1010 SDValue IntermediateRound = 1011 DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0)); 1012 return DAG.getFPExtendOrRound(IntermediateRound, DL, VT); 1013 } 1014 case ISD::FP_TO_SINT: 1015 case ISD::FP_TO_UINT: 1016 case ISD::SINT_TO_FP: 1017 case ISD::UINT_TO_FP: { 1018 // RVV can only do fp<->int conversions to types half/double the size as 1019 // the source. We custom-lower any conversions that do two hops into 1020 // sequences. 1021 MVT VT = Op.getSimpleValueType(); 1022 if (!VT.isVector()) 1023 return Op; 1024 SDLoc DL(Op); 1025 SDValue Src = Op.getOperand(0); 1026 MVT EltVT = VT.getVectorElementType(); 1027 MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType(); 1028 unsigned EltSize = EltVT.getSizeInBits(); 1029 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1030 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1031 "Unexpected vector element types"); 1032 bool IsInt2FP = SrcEltVT.isInteger(); 1033 // Widening conversions 1034 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1035 if (IsInt2FP) { 1036 // Do a regular integer sign/zero extension then convert to float. 1037 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1038 VT.getVectorElementCount()); 1039 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1040 ? ISD::ZERO_EXTEND 1041 : ISD::SIGN_EXTEND; 1042 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1043 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1044 } 1045 // FP2Int 1046 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1047 // Do one doubling fp_extend then complete the operation by converting 1048 // to int. 1049 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1050 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1051 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1052 } 1053 1054 // Narrowing conversions 1055 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1056 if (IsInt2FP) { 1057 // One narrowing int_to_fp, then an fp_round. 1058 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1059 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1060 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1061 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1062 } 1063 // FP2Int 1064 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1065 // representable by the integer, the result is poison. 1066 MVT IVecVT = 1067 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1068 VT.getVectorElementCount()); 1069 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1070 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1071 } 1072 1073 return Op; 1074 } 1075 case ISD::VECREDUCE_ADD: 1076 case ISD::VECREDUCE_UMAX: 1077 case ISD::VECREDUCE_SMAX: 1078 case ISD::VECREDUCE_UMIN: 1079 case ISD::VECREDUCE_SMIN: 1080 case ISD::VECREDUCE_AND: 1081 case ISD::VECREDUCE_OR: 1082 case ISD::VECREDUCE_XOR: 1083 return lowerVECREDUCE(Op, DAG); 1084 case ISD::VECREDUCE_FADD: 1085 case ISD::VECREDUCE_SEQ_FADD: 1086 return lowerFPVECREDUCE(Op, DAG); 1087 case ISD::BUILD_VECTOR: 1088 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1089 case ISD::LOAD: 1090 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1091 case ISD::STORE: 1092 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1093 case ISD::ADD: 1094 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1095 case ISD::SUB: 1096 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1097 case ISD::MUL: 1098 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1099 case ISD::AND: 1100 return lowerToScalableOp(Op, DAG, RISCVISD::AND_VL); 1101 case ISD::OR: 1102 return lowerToScalableOp(Op, DAG, RISCVISD::OR_VL); 1103 case ISD::XOR: 1104 return lowerToScalableOp(Op, DAG, RISCVISD::XOR_VL); 1105 case ISD::SDIV: 1106 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1107 case ISD::SREM: 1108 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1109 case ISD::UDIV: 1110 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1111 case ISD::UREM: 1112 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1113 case ISD::SHL: 1114 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1115 case ISD::SRA: 1116 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1117 case ISD::SRL: 1118 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1119 case ISD::FADD: 1120 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1121 case ISD::FSUB: 1122 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1123 case ISD::FMUL: 1124 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1125 case ISD::FDIV: 1126 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1127 case ISD::FNEG: 1128 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1129 case ISD::FMA: 1130 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1131 } 1132 } 1133 1134 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1135 SelectionDAG &DAG, unsigned Flags) { 1136 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1137 } 1138 1139 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1140 SelectionDAG &DAG, unsigned Flags) { 1141 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1142 Flags); 1143 } 1144 1145 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1146 SelectionDAG &DAG, unsigned Flags) { 1147 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1148 N->getOffset(), Flags); 1149 } 1150 1151 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1152 SelectionDAG &DAG, unsigned Flags) { 1153 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1154 } 1155 1156 template <class NodeTy> 1157 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1158 bool IsLocal) const { 1159 SDLoc DL(N); 1160 EVT Ty = getPointerTy(DAG.getDataLayout()); 1161 1162 if (isPositionIndependent()) { 1163 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1164 if (IsLocal) 1165 // Use PC-relative addressing to access the symbol. This generates the 1166 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 1167 // %pcrel_lo(auipc)). 1168 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1169 1170 // Use PC-relative addressing to access the GOT for this symbol, then load 1171 // the address from the GOT. This generates the pattern (PseudoLA sym), 1172 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 1173 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 1174 } 1175 1176 switch (getTargetMachine().getCodeModel()) { 1177 default: 1178 report_fatal_error("Unsupported code model for lowering"); 1179 case CodeModel::Small: { 1180 // Generate a sequence for accessing addresses within the first 2 GiB of 1181 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 1182 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 1183 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 1184 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1185 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 1186 } 1187 case CodeModel::Medium: { 1188 // Generate a sequence for accessing addresses within any 2GiB range within 1189 // the address space. This generates the pattern (PseudoLLA sym), which 1190 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 1191 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1192 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1193 } 1194 } 1195 } 1196 1197 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 1198 SelectionDAG &DAG) const { 1199 SDLoc DL(Op); 1200 EVT Ty = Op.getValueType(); 1201 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1202 int64_t Offset = N->getOffset(); 1203 MVT XLenVT = Subtarget.getXLenVT(); 1204 1205 const GlobalValue *GV = N->getGlobal(); 1206 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1207 SDValue Addr = getAddr(N, DAG, IsLocal); 1208 1209 // In order to maximise the opportunity for common subexpression elimination, 1210 // emit a separate ADD node for the global address offset instead of folding 1211 // it in the global address node. Later peephole optimisations may choose to 1212 // fold it back in when profitable. 1213 if (Offset != 0) 1214 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1215 DAG.getConstant(Offset, DL, XLenVT)); 1216 return Addr; 1217 } 1218 1219 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1220 SelectionDAG &DAG) const { 1221 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1222 1223 return getAddr(N, DAG); 1224 } 1225 1226 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 1227 SelectionDAG &DAG) const { 1228 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 1229 1230 return getAddr(N, DAG); 1231 } 1232 1233 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 1234 SelectionDAG &DAG) const { 1235 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 1236 1237 return getAddr(N, DAG); 1238 } 1239 1240 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1241 SelectionDAG &DAG, 1242 bool UseGOT) const { 1243 SDLoc DL(N); 1244 EVT Ty = getPointerTy(DAG.getDataLayout()); 1245 const GlobalValue *GV = N->getGlobal(); 1246 MVT XLenVT = Subtarget.getXLenVT(); 1247 1248 if (UseGOT) { 1249 // Use PC-relative addressing to access the GOT for this TLS symbol, then 1250 // load the address from the GOT and add the thread pointer. This generates 1251 // the pattern (PseudoLA_TLS_IE sym), which expands to 1252 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 1253 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1254 SDValue Load = 1255 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 1256 1257 // Add the thread pointer. 1258 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1259 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 1260 } 1261 1262 // Generate a sequence for accessing the address relative to the thread 1263 // pointer, with the appropriate adjustment for the thread pointer offset. 1264 // This generates the pattern 1265 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 1266 SDValue AddrHi = 1267 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 1268 SDValue AddrAdd = 1269 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 1270 SDValue AddrLo = 1271 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 1272 1273 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1274 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1275 SDValue MNAdd = SDValue( 1276 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 1277 0); 1278 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 1279 } 1280 1281 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1282 SelectionDAG &DAG) const { 1283 SDLoc DL(N); 1284 EVT Ty = getPointerTy(DAG.getDataLayout()); 1285 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1286 const GlobalValue *GV = N->getGlobal(); 1287 1288 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1289 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 1290 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 1291 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1292 SDValue Load = 1293 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 1294 1295 // Prepare argument list to generate call. 1296 ArgListTy Args; 1297 ArgListEntry Entry; 1298 Entry.Node = Load; 1299 Entry.Ty = CallTy; 1300 Args.push_back(Entry); 1301 1302 // Setup call to __tls_get_addr. 1303 TargetLowering::CallLoweringInfo CLI(DAG); 1304 CLI.setDebugLoc(DL) 1305 .setChain(DAG.getEntryNode()) 1306 .setLibCallee(CallingConv::C, CallTy, 1307 DAG.getExternalSymbol("__tls_get_addr", Ty), 1308 std::move(Args)); 1309 1310 return LowerCallTo(CLI).first; 1311 } 1312 1313 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1314 SelectionDAG &DAG) const { 1315 SDLoc DL(Op); 1316 EVT Ty = Op.getValueType(); 1317 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1318 int64_t Offset = N->getOffset(); 1319 MVT XLenVT = Subtarget.getXLenVT(); 1320 1321 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 1322 1323 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1324 CallingConv::GHC) 1325 report_fatal_error("In GHC calling convention TLS is not supported"); 1326 1327 SDValue Addr; 1328 switch (Model) { 1329 case TLSModel::LocalExec: 1330 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 1331 break; 1332 case TLSModel::InitialExec: 1333 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 1334 break; 1335 case TLSModel::LocalDynamic: 1336 case TLSModel::GeneralDynamic: 1337 Addr = getDynamicTLSAddr(N, DAG); 1338 break; 1339 } 1340 1341 // In order to maximise the opportunity for common subexpression elimination, 1342 // emit a separate ADD node for the global address offset instead of folding 1343 // it in the global address node. Later peephole optimisations may choose to 1344 // fold it back in when profitable. 1345 if (Offset != 0) 1346 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1347 DAG.getConstant(Offset, DL, XLenVT)); 1348 return Addr; 1349 } 1350 1351 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1352 SDValue CondV = Op.getOperand(0); 1353 SDValue TrueV = Op.getOperand(1); 1354 SDValue FalseV = Op.getOperand(2); 1355 SDLoc DL(Op); 1356 MVT XLenVT = Subtarget.getXLenVT(); 1357 1358 // If the result type is XLenVT and CondV is the output of a SETCC node 1359 // which also operated on XLenVT inputs, then merge the SETCC node into the 1360 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1361 // compare+branch instructions. i.e.: 1362 // (select (setcc lhs, rhs, cc), truev, falsev) 1363 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1364 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1365 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1366 SDValue LHS = CondV.getOperand(0); 1367 SDValue RHS = CondV.getOperand(1); 1368 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1369 ISD::CondCode CCVal = CC->get(); 1370 1371 normaliseSetCC(LHS, RHS, CCVal); 1372 1373 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1374 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1375 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1376 } 1377 1378 // Otherwise: 1379 // (select condv, truev, falsev) 1380 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1381 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1382 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1383 1384 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1385 1386 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1387 } 1388 1389 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 1390 MachineFunction &MF = DAG.getMachineFunction(); 1391 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 1392 1393 SDLoc DL(Op); 1394 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1395 getPointerTy(MF.getDataLayout())); 1396 1397 // vastart just stores the address of the VarArgsFrameIndex slot into the 1398 // memory location argument. 1399 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1400 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1401 MachinePointerInfo(SV)); 1402 } 1403 1404 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 1405 SelectionDAG &DAG) const { 1406 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1407 MachineFunction &MF = DAG.getMachineFunction(); 1408 MachineFrameInfo &MFI = MF.getFrameInfo(); 1409 MFI.setFrameAddressIsTaken(true); 1410 Register FrameReg = RI.getFrameRegister(MF); 1411 int XLenInBytes = Subtarget.getXLen() / 8; 1412 1413 EVT VT = Op.getValueType(); 1414 SDLoc DL(Op); 1415 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1416 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1417 while (Depth--) { 1418 int Offset = -(XLenInBytes * 2); 1419 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1420 DAG.getIntPtrConstant(Offset, DL)); 1421 FrameAddr = 1422 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1423 } 1424 return FrameAddr; 1425 } 1426 1427 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 1428 SelectionDAG &DAG) const { 1429 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1430 MachineFunction &MF = DAG.getMachineFunction(); 1431 MachineFrameInfo &MFI = MF.getFrameInfo(); 1432 MFI.setReturnAddressIsTaken(true); 1433 MVT XLenVT = Subtarget.getXLenVT(); 1434 int XLenInBytes = Subtarget.getXLen() / 8; 1435 1436 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1437 return SDValue(); 1438 1439 EVT VT = Op.getValueType(); 1440 SDLoc DL(Op); 1441 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1442 if (Depth) { 1443 int Off = -XLenInBytes; 1444 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 1445 SDValue Offset = DAG.getConstant(Off, DL, VT); 1446 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 1447 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 1448 MachinePointerInfo()); 1449 } 1450 1451 // Return the value of the return address register, marking it an implicit 1452 // live-in. 1453 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 1454 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 1455 } 1456 1457 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 1458 SelectionDAG &DAG) const { 1459 SDLoc DL(Op); 1460 SDValue Lo = Op.getOperand(0); 1461 SDValue Hi = Op.getOperand(1); 1462 SDValue Shamt = Op.getOperand(2); 1463 EVT VT = Lo.getValueType(); 1464 1465 // if Shamt-XLEN < 0: // Shamt < XLEN 1466 // Lo = Lo << Shamt 1467 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 1468 // else: 1469 // Lo = 0 1470 // Hi = Lo << (Shamt-XLEN) 1471 1472 SDValue Zero = DAG.getConstant(0, DL, VT); 1473 SDValue One = DAG.getConstant(1, DL, VT); 1474 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1475 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1476 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1477 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1478 1479 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1480 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1481 SDValue ShiftRightLo = 1482 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 1483 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1484 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1485 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 1486 1487 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1488 1489 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1490 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1491 1492 SDValue Parts[2] = {Lo, Hi}; 1493 return DAG.getMergeValues(Parts, DL); 1494 } 1495 1496 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1497 bool IsSRA) const { 1498 SDLoc DL(Op); 1499 SDValue Lo = Op.getOperand(0); 1500 SDValue Hi = Op.getOperand(1); 1501 SDValue Shamt = Op.getOperand(2); 1502 EVT VT = Lo.getValueType(); 1503 1504 // SRA expansion: 1505 // if Shamt-XLEN < 0: // Shamt < XLEN 1506 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1507 // Hi = Hi >>s Shamt 1508 // else: 1509 // Lo = Hi >>s (Shamt-XLEN); 1510 // Hi = Hi >>s (XLEN-1) 1511 // 1512 // SRL expansion: 1513 // if Shamt-XLEN < 0: // Shamt < XLEN 1514 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1515 // Hi = Hi >>u Shamt 1516 // else: 1517 // Lo = Hi >>u (Shamt-XLEN); 1518 // Hi = 0; 1519 1520 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1521 1522 SDValue Zero = DAG.getConstant(0, DL, VT); 1523 SDValue One = DAG.getConstant(1, DL, VT); 1524 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1525 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1526 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1527 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1528 1529 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1530 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1531 SDValue ShiftLeftHi = 1532 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 1533 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1534 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1535 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 1536 SDValue HiFalse = 1537 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 1538 1539 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1540 1541 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 1542 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1543 1544 SDValue Parts[2] = {Lo, Hi}; 1545 return DAG.getMergeValues(Parts, DL); 1546 } 1547 1548 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is 1549 // illegal (currently only vXi64 RV32). 1550 // FIXME: We could also catch non-constant sign-extended i32 values and lower 1551 // them to SPLAT_VECTOR_I64 1552 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, 1553 SelectionDAG &DAG) const { 1554 SDLoc DL(Op); 1555 EVT VecVT = Op.getValueType(); 1556 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 1557 "Unexpected SPLAT_VECTOR lowering"); 1558 SDValue SplatVal = Op.getOperand(0); 1559 1560 // If we can prove that the value is a sign-extended 32-bit value, lower this 1561 // as a custom node in order to try and match RVV vector/scalar instructions. 1562 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { 1563 if (isInt<32>(CVal->getSExtValue())) 1564 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1565 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); 1566 } 1567 1568 if (SplatVal.getOpcode() == ISD::SIGN_EXTEND && 1569 SplatVal.getOperand(0).getValueType() == MVT::i32) { 1570 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1571 SplatVal.getOperand(0)); 1572 } 1573 1574 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 1575 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 1576 // vmv.v.x vX, hi 1577 // vsll.vx vX, vX, /*32*/ 1578 // vmv.v.x vY, lo 1579 // vsll.vx vY, vY, /*32*/ 1580 // vsrl.vx vY, vY, /*32*/ 1581 // vor.vv vX, vX, vY 1582 SDValue One = DAG.getConstant(1, DL, MVT::i32); 1583 SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 1584 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 1585 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); 1586 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); 1587 1588 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 1589 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 1590 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 1591 1592 if (isNullConstant(Hi)) 1593 return Lo; 1594 1595 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 1596 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 1597 1598 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 1599 } 1600 1601 // Custom-lower extensions from mask vectors by using a vselect either with 1 1602 // for zero/any-extension or -1 for sign-extension: 1603 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 1604 // Note that any-extension is lowered identically to zero-extension. 1605 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 1606 int64_t ExtTrueVal) const { 1607 SDLoc DL(Op); 1608 EVT VecVT = Op.getValueType(); 1609 SDValue Src = Op.getOperand(0); 1610 // Only custom-lower extensions from mask types 1611 if (!Src.getValueType().isVector() || 1612 Src.getValueType().getVectorElementType() != MVT::i1) 1613 return Op; 1614 1615 // Be careful not to introduce illegal scalar types at this stage, and be 1616 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1617 // illegal and must be expanded. Since we know that the constants are 1618 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1619 bool IsRV32E64 = 1620 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1621 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1622 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT()); 1623 1624 if (!IsRV32E64) { 1625 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1626 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 1627 } else { 1628 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1629 SplatTrueVal = 1630 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 1631 } 1632 1633 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 1634 } 1635 1636 // Custom-lower truncations from vectors to mask vectors by using a mask and a 1637 // setcc operation: 1638 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 1639 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 1640 SelectionDAG &DAG) const { 1641 SDLoc DL(Op); 1642 EVT MaskVT = Op.getValueType(); 1643 // Only expect to custom-lower truncations to mask types 1644 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 1645 "Unexpected type for vector mask lowering"); 1646 SDValue Src = Op.getOperand(0); 1647 EVT VecVT = Src.getValueType(); 1648 1649 // Be careful not to introduce illegal scalar types at this stage, and be 1650 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1651 // illegal and must be expanded. Since we know that the constants are 1652 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1653 bool IsRV32E64 = 1654 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1655 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 1656 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1657 1658 if (!IsRV32E64) { 1659 SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne); 1660 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1661 } else { 1662 SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne); 1663 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1664 } 1665 1666 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 1667 1668 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 1669 } 1670 1671 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 1672 SelectionDAG &DAG) const { 1673 SDLoc DL(Op); 1674 EVT VecVT = Op.getValueType(); 1675 SDValue Vec = Op.getOperand(0); 1676 SDValue Val = Op.getOperand(1); 1677 SDValue Idx = Op.getOperand(2); 1678 1679 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is 1680 // first slid down into position, the value is inserted into the first 1681 // position, and the vector is slid back up. We do this to simplify patterns. 1682 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), 1683 if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) { 1684 if (isNullConstant(Idx)) 1685 return Op; 1686 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1687 DAG.getUNDEF(VecVT), Vec, Idx); 1688 SDValue InsertElt0 = 1689 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val, 1690 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1691 1692 return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx); 1693 } 1694 1695 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type 1696 // is illegal (currently only vXi64 RV32). 1697 // Since there is no easy way of getting a single element into a vector when 1698 // XLEN<SEW, we lower the operation to the following sequence: 1699 // splat vVal, rVal 1700 // vid.v vVid 1701 // vmseq.vx mMask, vVid, rIdx 1702 // vmerge.vvm vDest, vSrc, vVal, mMask 1703 // This essentially merges the original vector with the inserted element by 1704 // using a mask whose only set bit is that corresponding to the insert 1705 // index. 1706 SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val); 1707 SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx); 1708 1709 SDValue VID = DAG.getNode(RISCVISD::VID, DL, VecVT); 1710 auto SetCCVT = 1711 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT); 1712 SDValue Mask = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ); 1713 1714 return DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, SplattedVal, Vec); 1715 } 1716 1717 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 1718 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 1719 // types this is done using VMV_X_S to allow us to glean information about the 1720 // sign bits of the result. 1721 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 1722 SelectionDAG &DAG) const { 1723 SDLoc DL(Op); 1724 SDValue Idx = Op.getOperand(1); 1725 SDValue Vec = Op.getOperand(0); 1726 EVT EltVT = Op.getValueType(); 1727 EVT VecVT = Vec.getValueType(); 1728 MVT XLenVT = Subtarget.getXLenVT(); 1729 1730 // If the index is 0, the vector is already in the right position. 1731 if (!isNullConstant(Idx)) { 1732 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, DAG.getUNDEF(VecVT), Vec, 1733 Idx); 1734 } 1735 1736 if (!EltVT.isInteger()) { 1737 // Floating-point extracts are handled in TableGen. 1738 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 1739 DAG.getConstant(0, DL, XLenVT)); 1740 } 1741 1742 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 1743 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 1744 } 1745 1746 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 1747 SelectionDAG &DAG) const { 1748 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1749 SDLoc DL(Op); 1750 1751 if (Subtarget.hasStdExtV()) { 1752 // Some RVV intrinsics may claim that they want an integer operand to be 1753 // extended. 1754 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1755 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1756 if (II->ExtendedOperand) { 1757 assert(II->ExtendedOperand < Op.getNumOperands()); 1758 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1759 SDValue &ScalarOp = Operands[II->ExtendedOperand]; 1760 EVT OpVT = ScalarOp.getValueType(); 1761 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1762 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1763 // If the operand is a constant, sign extend to increase our chances 1764 // of being able to use a .vi instruction. ANY_EXTEND would become a 1765 // a zero extend and the simm5 check in isel would fail. 1766 // FIXME: Should we ignore the upper bits in isel instead? 1767 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1768 : ISD::ANY_EXTEND; 1769 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1770 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), 1771 Operands); 1772 } 1773 } 1774 } 1775 } 1776 1777 switch (IntNo) { 1778 default: 1779 return SDValue(); // Don't custom lower most intrinsics. 1780 case Intrinsic::thread_pointer: { 1781 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1782 return DAG.getRegister(RISCV::X4, PtrVT); 1783 } 1784 case Intrinsic::riscv_vmv_x_s: 1785 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); 1786 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 1787 Op.getOperand(1)); 1788 case Intrinsic::riscv_vmv_v_x: { 1789 SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), 1790 Op.getOperand(1)); 1791 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 1792 Scalar, Op.getOperand(2)); 1793 } 1794 case Intrinsic::riscv_vfmv_v_f: 1795 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 1796 Op.getOperand(1), Op.getOperand(2)); 1797 } 1798 } 1799 1800 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 1801 SelectionDAG &DAG) const { 1802 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1803 SDLoc DL(Op); 1804 1805 if (Subtarget.hasStdExtV()) { 1806 // Some RVV intrinsics may claim that they want an integer operand to be 1807 // extended. 1808 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1809 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1810 if (II->ExtendedOperand) { 1811 // The operands start from the second argument in INTRINSIC_W_CHAIN. 1812 unsigned ExtendOp = II->ExtendedOperand + 1; 1813 assert(ExtendOp < Op.getNumOperands()); 1814 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1815 SDValue &ScalarOp = Operands[ExtendOp]; 1816 EVT OpVT = ScalarOp.getValueType(); 1817 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1818 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1819 // If the operand is a constant, sign extend to increase our chances 1820 // of being able to use a .vi instruction. ANY_EXTEND would become a 1821 // a zero extend and the simm5 check in isel would fail. 1822 // FIXME: Should we ignore the upper bits in isel instead? 1823 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1824 : ISD::ANY_EXTEND; 1825 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1826 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), 1827 Operands); 1828 } 1829 } 1830 } 1831 } 1832 1833 switch (IntNo) { 1834 default: 1835 return SDValue(); // Don't custom lower most intrinsics. 1836 case Intrinsic::riscv_vleff: { 1837 SDLoc DL(Op); 1838 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1839 SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0), 1840 Op.getOperand(2), Op.getOperand(3)); 1841 SDValue ReadVL = 1842 SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1), 1843 Load.getValue(2)), 1844 0); 1845 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1846 } 1847 case Intrinsic::riscv_vleff_mask: { 1848 SDLoc DL(Op); 1849 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1850 SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0), 1851 Op.getOperand(2), Op.getOperand(3), 1852 Op.getOperand(4), Op.getOperand(5)); 1853 SDValue ReadVL = 1854 SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1), 1855 Load.getValue(2)), 1856 0); 1857 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1858 } 1859 } 1860 } 1861 1862 static std::pair<unsigned, uint64_t> 1863 getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) { 1864 switch (ISDOpcode) { 1865 default: 1866 llvm_unreachable("Unhandled reduction"); 1867 case ISD::VECREDUCE_ADD: 1868 return {RISCVISD::VECREDUCE_ADD, 0}; 1869 case ISD::VECREDUCE_UMAX: 1870 return {RISCVISD::VECREDUCE_UMAX, 0}; 1871 case ISD::VECREDUCE_SMAX: 1872 return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)}; 1873 case ISD::VECREDUCE_UMIN: 1874 return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)}; 1875 case ISD::VECREDUCE_SMIN: 1876 return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)}; 1877 case ISD::VECREDUCE_AND: 1878 return {RISCVISD::VECREDUCE_AND, -1}; 1879 case ISD::VECREDUCE_OR: 1880 return {RISCVISD::VECREDUCE_OR, 0}; 1881 case ISD::VECREDUCE_XOR: 1882 return {RISCVISD::VECREDUCE_XOR, 0}; 1883 } 1884 } 1885 1886 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV 1887 // reduction opcode. Note that this returns a vector type, which must be 1888 // further processed to access the scalar result in element 0. 1889 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 1890 SelectionDAG &DAG) const { 1891 SDLoc DL(Op); 1892 assert(Op.getValueType().isSimple() && 1893 Op.getOperand(0).getValueType().isSimple() && 1894 "Unexpected vector-reduce lowering"); 1895 MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType(); 1896 unsigned RVVOpcode; 1897 uint64_t IdentityVal; 1898 std::tie(RVVOpcode, IdentityVal) = 1899 getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits()); 1900 // We have to perform a bit of a dance to get from our vector type to the 1901 // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector 1902 // element type to find the type which fills a single register. Be careful to 1903 // use the operand's vector element type rather than the reduction's value 1904 // type, as that has likely been extended to XLEN. 1905 unsigned NumElts = 64 / VecEltVT.getSizeInBits(); 1906 MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts); 1907 SDValue IdentitySplat = 1908 DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT)); 1909 SDValue Reduction = 1910 DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat); 1911 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 1912 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1913 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 1914 } 1915 1916 // Given a reduction op, this function returns the matching reduction opcode, 1917 // the vector SDValue and the scalar SDValue required to lower this to a 1918 // RISCVISD node. 1919 static std::tuple<unsigned, SDValue, SDValue> 1920 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 1921 SDLoc DL(Op); 1922 switch (Op.getOpcode()) { 1923 default: 1924 llvm_unreachable("Unhandled reduction"); 1925 case ISD::VECREDUCE_FADD: 1926 return std::make_tuple(RISCVISD::VECREDUCE_FADD, Op.getOperand(0), 1927 DAG.getConstantFP(0.0, DL, EltVT)); 1928 case ISD::VECREDUCE_SEQ_FADD: 1929 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD, Op.getOperand(1), 1930 Op.getOperand(0)); 1931 } 1932 } 1933 1934 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 1935 SelectionDAG &DAG) const { 1936 SDLoc DL(Op); 1937 MVT VecEltVT = Op.getSimpleValueType(); 1938 // We have to perform a bit of a dance to get from our vector type to the 1939 // correct LMUL=1 vector type. See above for an explanation. 1940 unsigned NumElts = 64 / VecEltVT.getSizeInBits(); 1941 MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts); 1942 1943 unsigned RVVOpcode; 1944 SDValue VectorVal, ScalarVal; 1945 std::tie(RVVOpcode, VectorVal, ScalarVal) = 1946 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 1947 1948 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 1949 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat); 1950 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 1951 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1952 } 1953 1954 SDValue 1955 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 1956 SelectionDAG &DAG) const { 1957 auto *Load = cast<LoadSDNode>(Op); 1958 1959 SDLoc DL(Op); 1960 MVT VT = Op.getSimpleValueType(); 1961 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 1962 1963 SDValue VL = 1964 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 1965 1966 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 1967 SDValue NewLoad = DAG.getMemIntrinsicNode( 1968 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 1969 Load->getMemoryVT(), Load->getMemOperand()); 1970 1971 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 1972 return DAG.getMergeValues({Result, Load->getChain()}, DL); 1973 } 1974 1975 SDValue 1976 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 1977 SelectionDAG &DAG) const { 1978 auto *Store = cast<StoreSDNode>(Op); 1979 1980 SDLoc DL(Op); 1981 MVT VT = Store->getValue().getSimpleValueType(); 1982 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 1983 1984 SDValue VL = 1985 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 1986 1987 SDValue NewValue = 1988 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 1989 return DAG.getMemIntrinsicNode( 1990 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 1991 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 1992 Store->getMemoryVT(), Store->getMemOperand()); 1993 } 1994 1995 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 1996 unsigned NewOpc) const { 1997 MVT VT = Op.getSimpleValueType(); 1998 assert(useRVVForFixedLengthVectorVT(VT) && 1999 "Only expected to lower fixed length vector operation!"); 2000 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2001 2002 // Create list of operands by converting existing ones to scalable types. 2003 SmallVector<SDValue, 6> Ops; 2004 for (const SDValue &V : Op->op_values()) { 2005 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 2006 2007 // Pass through non-vector operands. 2008 if (!V.getValueType().isVector()) { 2009 Ops.push_back(V); 2010 continue; 2011 } 2012 2013 // "cast" fixed length vector to a scalable vector. 2014 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 2015 "Only fixed length vectors are supported!"); 2016 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 2017 } 2018 2019 SDLoc DL(Op); 2020 SDValue VL = 2021 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2022 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2023 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2024 Ops.push_back(Mask); 2025 Ops.push_back(VL); 2026 2027 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 2028 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 2029 } 2030 2031 // Returns the opcode of the target-specific SDNode that implements the 32-bit 2032 // form of the given Opcode. 2033 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 2034 switch (Opcode) { 2035 default: 2036 llvm_unreachable("Unexpected opcode"); 2037 case ISD::SHL: 2038 return RISCVISD::SLLW; 2039 case ISD::SRA: 2040 return RISCVISD::SRAW; 2041 case ISD::SRL: 2042 return RISCVISD::SRLW; 2043 case ISD::SDIV: 2044 return RISCVISD::DIVW; 2045 case ISD::UDIV: 2046 return RISCVISD::DIVUW; 2047 case ISD::UREM: 2048 return RISCVISD::REMUW; 2049 case ISD::ROTL: 2050 return RISCVISD::ROLW; 2051 case ISD::ROTR: 2052 return RISCVISD::RORW; 2053 case RISCVISD::GREVI: 2054 return RISCVISD::GREVIW; 2055 case RISCVISD::GORCI: 2056 return RISCVISD::GORCIW; 2057 } 2058 } 2059 2060 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 2061 // Because i32 isn't a legal type for RV64, these operations would otherwise 2062 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 2063 // later one because the fact the operation was originally of type i32 is 2064 // lost. 2065 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 2066 unsigned ExtOpc = ISD::ANY_EXTEND) { 2067 SDLoc DL(N); 2068 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 2069 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 2070 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 2071 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 2072 // ReplaceNodeResults requires we maintain the same type for the return value. 2073 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 2074 } 2075 2076 // Converts the given 32-bit operation to a i64 operation with signed extension 2077 // semantic to reduce the signed extension instructions. 2078 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 2079 SDLoc DL(N); 2080 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2081 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 2082 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 2083 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 2084 DAG.getValueType(MVT::i32)); 2085 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 2086 } 2087 2088 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 2089 SmallVectorImpl<SDValue> &Results, 2090 SelectionDAG &DAG) const { 2091 SDLoc DL(N); 2092 switch (N->getOpcode()) { 2093 default: 2094 llvm_unreachable("Don't know how to custom type legalize this operation!"); 2095 case ISD::STRICT_FP_TO_SINT: 2096 case ISD::STRICT_FP_TO_UINT: 2097 case ISD::FP_TO_SINT: 2098 case ISD::FP_TO_UINT: { 2099 bool IsStrict = N->isStrictFPOpcode(); 2100 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2101 "Unexpected custom legalisation"); 2102 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 2103 // If the FP type needs to be softened, emit a library call using the 'si' 2104 // version. If we left it to default legalization we'd end up with 'di'. If 2105 // the FP type doesn't need to be softened just let generic type 2106 // legalization promote the result type. 2107 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 2108 TargetLowering::TypeSoftenFloat) 2109 return; 2110 RTLIB::Libcall LC; 2111 if (N->getOpcode() == ISD::FP_TO_SINT || 2112 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 2113 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 2114 else 2115 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 2116 MakeLibCallOptions CallOptions; 2117 EVT OpVT = Op0.getValueType(); 2118 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 2119 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 2120 SDValue Result; 2121 std::tie(Result, Chain) = 2122 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 2123 Results.push_back(Result); 2124 if (IsStrict) 2125 Results.push_back(Chain); 2126 break; 2127 } 2128 case ISD::READCYCLECOUNTER: { 2129 assert(!Subtarget.is64Bit() && 2130 "READCYCLECOUNTER only has custom type legalization on riscv32"); 2131 2132 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 2133 SDValue RCW = 2134 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 2135 2136 Results.push_back( 2137 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 2138 Results.push_back(RCW.getValue(2)); 2139 break; 2140 } 2141 case ISD::ADD: 2142 case ISD::SUB: 2143 case ISD::MUL: 2144 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2145 "Unexpected custom legalisation"); 2146 if (N->getOperand(1).getOpcode() == ISD::Constant) 2147 return; 2148 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 2149 break; 2150 case ISD::SHL: 2151 case ISD::SRA: 2152 case ISD::SRL: 2153 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2154 "Unexpected custom legalisation"); 2155 if (N->getOperand(1).getOpcode() == ISD::Constant) 2156 return; 2157 Results.push_back(customLegalizeToWOp(N, DAG)); 2158 break; 2159 case ISD::ROTL: 2160 case ISD::ROTR: 2161 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2162 "Unexpected custom legalisation"); 2163 Results.push_back(customLegalizeToWOp(N, DAG)); 2164 break; 2165 case ISD::SDIV: 2166 case ISD::UDIV: 2167 case ISD::UREM: { 2168 MVT VT = N->getSimpleValueType(0); 2169 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 2170 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 2171 "Unexpected custom legalisation"); 2172 if (N->getOperand(0).getOpcode() == ISD::Constant || 2173 N->getOperand(1).getOpcode() == ISD::Constant) 2174 return; 2175 2176 // If the input is i32, use ANY_EXTEND since the W instructions don't read 2177 // the upper 32 bits. For other types we need to sign or zero extend 2178 // based on the opcode. 2179 unsigned ExtOpc = ISD::ANY_EXTEND; 2180 if (VT != MVT::i32) 2181 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 2182 : ISD::ZERO_EXTEND; 2183 2184 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 2185 break; 2186 } 2187 case ISD::BITCAST: { 2188 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2189 Subtarget.hasStdExtF()) || 2190 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 2191 "Unexpected custom legalisation"); 2192 SDValue Op0 = N->getOperand(0); 2193 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 2194 if (Op0.getValueType() != MVT::f16) 2195 return; 2196 SDValue FPConv = 2197 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 2198 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 2199 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2200 Subtarget.hasStdExtF()) { 2201 if (Op0.getValueType() != MVT::f32) 2202 return; 2203 SDValue FPConv = 2204 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 2205 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 2206 } 2207 break; 2208 } 2209 case RISCVISD::GREVI: 2210 case RISCVISD::GORCI: { 2211 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2212 "Unexpected custom legalisation"); 2213 // This is similar to customLegalizeToWOp, except that we pass the second 2214 // operand (a TargetConstant) straight through: it is already of type 2215 // XLenVT. 2216 SDLoc DL(N); 2217 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 2218 SDValue NewOp0 = 2219 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2220 SDValue NewRes = 2221 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 2222 // ReplaceNodeResults requires we maintain the same type for the return 2223 // value. 2224 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 2225 break; 2226 } 2227 case ISD::BSWAP: 2228 case ISD::BITREVERSE: { 2229 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2230 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 2231 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 2232 N->getOperand(0)); 2233 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 2234 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 2235 DAG.getTargetConstant(Imm, DL, 2236 Subtarget.getXLenVT())); 2237 // ReplaceNodeResults requires we maintain the same type for the return 2238 // value. 2239 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 2240 break; 2241 } 2242 case ISD::FSHL: 2243 case ISD::FSHR: { 2244 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2245 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 2246 SDValue NewOp0 = 2247 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2248 SDValue NewOp1 = 2249 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 2250 SDValue NewOp2 = 2251 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 2252 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 2253 // Mask the shift amount to 5 bits. 2254 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 2255 DAG.getConstant(0x1f, DL, MVT::i64)); 2256 unsigned Opc = 2257 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 2258 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 2259 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 2260 break; 2261 } 2262 case ISD::EXTRACT_VECTOR_ELT: { 2263 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 2264 // type is illegal (currently only vXi64 RV32). 2265 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 2266 // transferred to the destination register. We issue two of these from the 2267 // upper- and lower- halves of the SEW-bit vector element, slid down to the 2268 // first element. 2269 SDLoc DL(N); 2270 SDValue Vec = N->getOperand(0); 2271 SDValue Idx = N->getOperand(1); 2272 EVT VecVT = Vec.getValueType(); 2273 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 2274 VecVT.getVectorElementType() == MVT::i64 && 2275 "Unexpected EXTRACT_VECTOR_ELT legalization"); 2276 2277 SDValue Slidedown = Vec; 2278 // Unless the index is known to be 0, we must slide the vector down to get 2279 // the desired element into index 0. 2280 if (!isNullConstant(Idx)) 2281 Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 2282 DAG.getUNDEF(VecVT), Vec, Idx); 2283 2284 MVT XLenVT = Subtarget.getXLenVT(); 2285 // Extract the lower XLEN bits of the correct vector element. 2286 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx); 2287 2288 // To extract the upper XLEN bits of the vector element, shift the first 2289 // element right by 32 bits and re-extract the lower XLEN bits. 2290 SDValue ThirtyTwoV = 2291 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 2292 DAG.getConstant(32, DL, Subtarget.getXLenVT())); 2293 SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV); 2294 2295 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx); 2296 2297 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 2298 break; 2299 } 2300 case ISD::INTRINSIC_WO_CHAIN: { 2301 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 2302 switch (IntNo) { 2303 default: 2304 llvm_unreachable( 2305 "Don't know how to custom type legalize this intrinsic!"); 2306 case Intrinsic::riscv_vmv_x_s: { 2307 EVT VT = N->getValueType(0); 2308 assert((VT == MVT::i8 || VT == MVT::i16 || 2309 (Subtarget.is64Bit() && VT == MVT::i32)) && 2310 "Unexpected custom legalisation!"); 2311 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 2312 Subtarget.getXLenVT(), N->getOperand(1)); 2313 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 2314 break; 2315 } 2316 } 2317 break; 2318 } 2319 case ISD::VECREDUCE_ADD: 2320 case ISD::VECREDUCE_AND: 2321 case ISD::VECREDUCE_OR: 2322 case ISD::VECREDUCE_XOR: 2323 case ISD::VECREDUCE_SMAX: 2324 case ISD::VECREDUCE_UMAX: 2325 case ISD::VECREDUCE_SMIN: 2326 case ISD::VECREDUCE_UMIN: 2327 // The custom-lowering for these nodes returns a vector whose first element 2328 // is the result of the reduction. Extract its first element and let the 2329 // legalization for EXTRACT_VECTOR_ELT do the rest of the job. 2330 Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG)); 2331 break; 2332 } 2333 } 2334 2335 // A structure to hold one of the bit-manipulation patterns below. Together, a 2336 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 2337 // (or (and (shl x, 1), 0xAAAAAAAA), 2338 // (and (srl x, 1), 0x55555555)) 2339 struct RISCVBitmanipPat { 2340 SDValue Op; 2341 unsigned ShAmt; 2342 bool IsSHL; 2343 2344 bool formsPairWith(const RISCVBitmanipPat &Other) const { 2345 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 2346 } 2347 }; 2348 2349 // Matches any of the following bit-manipulation patterns: 2350 // (and (shl x, 1), (0x55555555 << 1)) 2351 // (and (srl x, 1), 0x55555555) 2352 // (shl (and x, 0x55555555), 1) 2353 // (srl (and x, (0x55555555 << 1)), 1) 2354 // where the shift amount and mask may vary thus: 2355 // [1] = 0x55555555 / 0xAAAAAAAA 2356 // [2] = 0x33333333 / 0xCCCCCCCC 2357 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 2358 // [8] = 0x00FF00FF / 0xFF00FF00 2359 // [16] = 0x0000FFFF / 0xFFFFFFFF 2360 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 2361 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) { 2362 Optional<uint64_t> Mask; 2363 // Optionally consume a mask around the shift operation. 2364 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 2365 Mask = Op.getConstantOperandVal(1); 2366 Op = Op.getOperand(0); 2367 } 2368 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 2369 return None; 2370 bool IsSHL = Op.getOpcode() == ISD::SHL; 2371 2372 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2373 return None; 2374 auto ShAmt = Op.getConstantOperandVal(1); 2375 2376 if (!isPowerOf2_64(ShAmt)) 2377 return None; 2378 2379 // These are the unshifted masks which we use to match bit-manipulation 2380 // patterns. They may be shifted left in certain circumstances. 2381 static const uint64_t BitmanipMasks[] = { 2382 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 2383 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL, 2384 }; 2385 2386 unsigned MaskIdx = Log2_64(ShAmt); 2387 if (MaskIdx >= array_lengthof(BitmanipMasks)) 2388 return None; 2389 2390 auto Src = Op.getOperand(0); 2391 2392 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 2393 auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 2394 2395 // The expected mask is shifted left when the AND is found around SHL 2396 // patterns. 2397 // ((x >> 1) & 0x55555555) 2398 // ((x << 1) & 0xAAAAAAAA) 2399 bool SHLExpMask = IsSHL; 2400 2401 if (!Mask) { 2402 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 2403 // the mask is all ones: consume that now. 2404 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 2405 Mask = Src.getConstantOperandVal(1); 2406 Src = Src.getOperand(0); 2407 // The expected mask is now in fact shifted left for SRL, so reverse the 2408 // decision. 2409 // ((x & 0xAAAAAAAA) >> 1) 2410 // ((x & 0x55555555) << 1) 2411 SHLExpMask = !SHLExpMask; 2412 } else { 2413 // Use a default shifted mask of all-ones if there's no AND, truncated 2414 // down to the expected width. This simplifies the logic later on. 2415 Mask = maskTrailingOnes<uint64_t>(Width); 2416 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 2417 } 2418 } 2419 2420 if (SHLExpMask) 2421 ExpMask <<= ShAmt; 2422 2423 if (Mask != ExpMask) 2424 return None; 2425 2426 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 2427 } 2428 2429 // Match the following pattern as a GREVI(W) operation 2430 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 2431 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 2432 const RISCVSubtarget &Subtarget) { 2433 EVT VT = Op.getValueType(); 2434 2435 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 2436 auto LHS = matchRISCVBitmanipPat(Op.getOperand(0)); 2437 auto RHS = matchRISCVBitmanipPat(Op.getOperand(1)); 2438 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 2439 SDLoc DL(Op); 2440 return DAG.getNode( 2441 RISCVISD::GREVI, DL, VT, LHS->Op, 2442 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 2443 } 2444 } 2445 return SDValue(); 2446 } 2447 2448 // Matches any the following pattern as a GORCI(W) operation 2449 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 2450 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 2451 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 2452 // Note that with the variant of 3., 2453 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 2454 // the inner pattern will first be matched as GREVI and then the outer 2455 // pattern will be matched to GORC via the first rule above. 2456 // 4. (or (rotl/rotr x, bitwidth/2), x) 2457 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 2458 const RISCVSubtarget &Subtarget) { 2459 EVT VT = Op.getValueType(); 2460 2461 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 2462 SDLoc DL(Op); 2463 SDValue Op0 = Op.getOperand(0); 2464 SDValue Op1 = Op.getOperand(1); 2465 2466 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 2467 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 2468 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 2469 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 2470 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 2471 if ((Reverse.getOpcode() == ISD::ROTL || 2472 Reverse.getOpcode() == ISD::ROTR) && 2473 Reverse.getOperand(0) == X && 2474 isa<ConstantSDNode>(Reverse.getOperand(1))) { 2475 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 2476 if (RotAmt == (VT.getSizeInBits() / 2)) 2477 return DAG.getNode( 2478 RISCVISD::GORCI, DL, VT, X, 2479 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 2480 } 2481 return SDValue(); 2482 }; 2483 2484 // Check for either commutable permutation of (or (GREVI x, shamt), x) 2485 if (SDValue V = MatchOROfReverse(Op0, Op1)) 2486 return V; 2487 if (SDValue V = MatchOROfReverse(Op1, Op0)) 2488 return V; 2489 2490 // OR is commutable so canonicalize its OR operand to the left 2491 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 2492 std::swap(Op0, Op1); 2493 if (Op0.getOpcode() != ISD::OR) 2494 return SDValue(); 2495 SDValue OrOp0 = Op0.getOperand(0); 2496 SDValue OrOp1 = Op0.getOperand(1); 2497 auto LHS = matchRISCVBitmanipPat(OrOp0); 2498 // OR is commutable so swap the operands and try again: x might have been 2499 // on the left 2500 if (!LHS) { 2501 std::swap(OrOp0, OrOp1); 2502 LHS = matchRISCVBitmanipPat(OrOp0); 2503 } 2504 auto RHS = matchRISCVBitmanipPat(Op1); 2505 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 2506 return DAG.getNode( 2507 RISCVISD::GORCI, DL, VT, LHS->Op, 2508 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 2509 } 2510 } 2511 return SDValue(); 2512 } 2513 2514 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 2515 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 2516 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 2517 // not undo itself, but they are redundant. 2518 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 2519 unsigned ShAmt1 = N->getConstantOperandVal(1); 2520 SDValue Src = N->getOperand(0); 2521 2522 if (Src.getOpcode() != N->getOpcode()) 2523 return SDValue(); 2524 2525 unsigned ShAmt2 = Src.getConstantOperandVal(1); 2526 Src = Src.getOperand(0); 2527 2528 unsigned CombinedShAmt; 2529 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 2530 CombinedShAmt = ShAmt1 | ShAmt2; 2531 else 2532 CombinedShAmt = ShAmt1 ^ ShAmt2; 2533 2534 if (CombinedShAmt == 0) 2535 return Src; 2536 2537 SDLoc DL(N); 2538 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 2539 DAG.getTargetConstant(CombinedShAmt, DL, 2540 N->getOperand(1).getValueType())); 2541 } 2542 2543 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 2544 DAGCombinerInfo &DCI) const { 2545 SelectionDAG &DAG = DCI.DAG; 2546 2547 switch (N->getOpcode()) { 2548 default: 2549 break; 2550 case RISCVISD::SplitF64: { 2551 SDValue Op0 = N->getOperand(0); 2552 // If the input to SplitF64 is just BuildPairF64 then the operation is 2553 // redundant. Instead, use BuildPairF64's operands directly. 2554 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 2555 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 2556 2557 SDLoc DL(N); 2558 2559 // It's cheaper to materialise two 32-bit integers than to load a double 2560 // from the constant pool and transfer it to integer registers through the 2561 // stack. 2562 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 2563 APInt V = C->getValueAPF().bitcastToAPInt(); 2564 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 2565 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 2566 return DCI.CombineTo(N, Lo, Hi); 2567 } 2568 2569 // This is a target-specific version of a DAGCombine performed in 2570 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2571 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2572 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2573 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2574 !Op0.getNode()->hasOneUse()) 2575 break; 2576 SDValue NewSplitF64 = 2577 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 2578 Op0.getOperand(0)); 2579 SDValue Lo = NewSplitF64.getValue(0); 2580 SDValue Hi = NewSplitF64.getValue(1); 2581 APInt SignBit = APInt::getSignMask(32); 2582 if (Op0.getOpcode() == ISD::FNEG) { 2583 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 2584 DAG.getConstant(SignBit, DL, MVT::i32)); 2585 return DCI.CombineTo(N, Lo, NewHi); 2586 } 2587 assert(Op0.getOpcode() == ISD::FABS); 2588 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 2589 DAG.getConstant(~SignBit, DL, MVT::i32)); 2590 return DCI.CombineTo(N, Lo, NewHi); 2591 } 2592 case RISCVISD::SLLW: 2593 case RISCVISD::SRAW: 2594 case RISCVISD::SRLW: 2595 case RISCVISD::ROLW: 2596 case RISCVISD::RORW: { 2597 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 2598 SDValue LHS = N->getOperand(0); 2599 SDValue RHS = N->getOperand(1); 2600 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 2601 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 2602 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 2603 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 2604 if (N->getOpcode() != ISD::DELETED_NODE) 2605 DCI.AddToWorklist(N); 2606 return SDValue(N, 0); 2607 } 2608 break; 2609 } 2610 case RISCVISD::FSL: 2611 case RISCVISD::FSR: { 2612 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 2613 SDValue ShAmt = N->getOperand(2); 2614 unsigned BitWidth = ShAmt.getValueSizeInBits(); 2615 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 2616 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 2617 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 2618 if (N->getOpcode() != ISD::DELETED_NODE) 2619 DCI.AddToWorklist(N); 2620 return SDValue(N, 0); 2621 } 2622 break; 2623 } 2624 case RISCVISD::FSLW: 2625 case RISCVISD::FSRW: { 2626 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 2627 // read. 2628 SDValue Op0 = N->getOperand(0); 2629 SDValue Op1 = N->getOperand(1); 2630 SDValue ShAmt = N->getOperand(2); 2631 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2632 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 2633 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 2634 SimplifyDemandedBits(Op1, OpMask, DCI) || 2635 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 2636 if (N->getOpcode() != ISD::DELETED_NODE) 2637 DCI.AddToWorklist(N); 2638 return SDValue(N, 0); 2639 } 2640 break; 2641 } 2642 case RISCVISD::GREVIW: 2643 case RISCVISD::GORCIW: { 2644 // Only the lower 32 bits of the first operand are read 2645 SDValue Op0 = N->getOperand(0); 2646 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2647 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 2648 if (N->getOpcode() != ISD::DELETED_NODE) 2649 DCI.AddToWorklist(N); 2650 return SDValue(N, 0); 2651 } 2652 2653 return combineGREVI_GORCI(N, DCI.DAG); 2654 } 2655 case RISCVISD::FMV_X_ANYEXTW_RV64: { 2656 SDLoc DL(N); 2657 SDValue Op0 = N->getOperand(0); 2658 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 2659 // conversion is unnecessary and can be replaced with an ANY_EXTEND 2660 // of the FMV_W_X_RV64 operand. 2661 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 2662 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 2663 "Unexpected value type!"); 2664 return Op0.getOperand(0); 2665 } 2666 2667 // This is a target-specific version of a DAGCombine performed in 2668 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2669 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2670 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2671 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2672 !Op0.getNode()->hasOneUse()) 2673 break; 2674 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 2675 Op0.getOperand(0)); 2676 APInt SignBit = APInt::getSignMask(32).sext(64); 2677 if (Op0.getOpcode() == ISD::FNEG) 2678 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 2679 DAG.getConstant(SignBit, DL, MVT::i64)); 2680 2681 assert(Op0.getOpcode() == ISD::FABS); 2682 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 2683 DAG.getConstant(~SignBit, DL, MVT::i64)); 2684 } 2685 case RISCVISD::GREVI: 2686 case RISCVISD::GORCI: 2687 return combineGREVI_GORCI(N, DCI.DAG); 2688 case ISD::OR: 2689 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 2690 return GREV; 2691 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 2692 return GORC; 2693 break; 2694 case RISCVISD::SELECT_CC: { 2695 // Transform 2696 // (select_cc (xor X, 1), 0, setne, trueV, falseV) -> 2697 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 2698 // This can occur when legalizing some floating point comparisons. 2699 SDValue LHS = N->getOperand(0); 2700 SDValue RHS = N->getOperand(1); 2701 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 2702 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2703 if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) && 2704 LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) && 2705 DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) { 2706 SDLoc DL(N); 2707 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 2708 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 2709 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 2710 {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3), 2711 N->getOperand(4)}); 2712 } 2713 break; 2714 } 2715 case ISD::SETCC: { 2716 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. 2717 // Comparing with 0 may allow us to fold into bnez/beqz. 2718 SDValue LHS = N->getOperand(0); 2719 SDValue RHS = N->getOperand(1); 2720 if (LHS.getValueType().isScalableVector()) 2721 break; 2722 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2723 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2724 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && 2725 DAG.MaskedValueIsZero(LHS, Mask)) { 2726 SDLoc DL(N); 2727 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); 2728 CC = ISD::getSetCCInverse(CC, LHS.getValueType()); 2729 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); 2730 } 2731 break; 2732 } 2733 } 2734 2735 return SDValue(); 2736 } 2737 2738 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 2739 const SDNode *N, CombineLevel Level) const { 2740 // The following folds are only desirable if `(OP _, c1 << c2)` can be 2741 // materialised in fewer instructions than `(OP _, c1)`: 2742 // 2743 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 2744 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 2745 SDValue N0 = N->getOperand(0); 2746 EVT Ty = N0.getValueType(); 2747 if (Ty.isScalarInteger() && 2748 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 2749 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 2750 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2751 if (C1 && C2) { 2752 const APInt &C1Int = C1->getAPIntValue(); 2753 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 2754 2755 // We can materialise `c1 << c2` into an add immediate, so it's "free", 2756 // and the combine should happen, to potentially allow further combines 2757 // later. 2758 if (ShiftedC1Int.getMinSignedBits() <= 64 && 2759 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 2760 return true; 2761 2762 // We can materialise `c1` in an add immediate, so it's "free", and the 2763 // combine should be prevented. 2764 if (C1Int.getMinSignedBits() <= 64 && 2765 isLegalAddImmediate(C1Int.getSExtValue())) 2766 return false; 2767 2768 // Neither constant will fit into an immediate, so find materialisation 2769 // costs. 2770 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 2771 Subtarget.is64Bit()); 2772 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 2773 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 2774 2775 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 2776 // combine should be prevented. 2777 if (C1Cost < ShiftedC1Cost) 2778 return false; 2779 } 2780 } 2781 return true; 2782 } 2783 2784 bool RISCVTargetLowering::targetShrinkDemandedConstant( 2785 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 2786 TargetLoweringOpt &TLO) const { 2787 // Delay this optimization as late as possible. 2788 if (!TLO.LegalOps) 2789 return false; 2790 2791 EVT VT = Op.getValueType(); 2792 if (VT.isVector()) 2793 return false; 2794 2795 // Only handle AND for now. 2796 if (Op.getOpcode() != ISD::AND) 2797 return false; 2798 2799 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 2800 if (!C) 2801 return false; 2802 2803 const APInt &Mask = C->getAPIntValue(); 2804 2805 // Clear all non-demanded bits initially. 2806 APInt ShrunkMask = Mask & DemandedBits; 2807 2808 // If the shrunk mask fits in sign extended 12 bits, let the target 2809 // independent code apply it. 2810 if (ShrunkMask.isSignedIntN(12)) 2811 return false; 2812 2813 // Try to make a smaller immediate by setting undemanded bits. 2814 2815 // We need to be able to make a negative number through a combination of mask 2816 // and undemanded bits. 2817 APInt ExpandedMask = Mask | ~DemandedBits; 2818 if (!ExpandedMask.isNegative()) 2819 return false; 2820 2821 // What is the fewest number of bits we need to represent the negative number. 2822 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 2823 2824 // Try to make a 12 bit negative immediate. If that fails try to make a 32 2825 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 2826 APInt NewMask = ShrunkMask; 2827 if (MinSignedBits <= 12) 2828 NewMask.setBitsFrom(11); 2829 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 2830 NewMask.setBitsFrom(31); 2831 else 2832 return false; 2833 2834 // Sanity check that our new mask is a subset of the demanded mask. 2835 assert(NewMask.isSubsetOf(ExpandedMask)); 2836 2837 // If we aren't changing the mask, just return true to keep it and prevent 2838 // the caller from optimizing. 2839 if (NewMask == Mask) 2840 return true; 2841 2842 // Replace the constant with the new mask. 2843 SDLoc DL(Op); 2844 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 2845 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 2846 return TLO.CombineTo(Op, NewOp); 2847 } 2848 2849 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 2850 KnownBits &Known, 2851 const APInt &DemandedElts, 2852 const SelectionDAG &DAG, 2853 unsigned Depth) const { 2854 unsigned BitWidth = Known.getBitWidth(); 2855 unsigned Opc = Op.getOpcode(); 2856 assert((Opc >= ISD::BUILTIN_OP_END || 2857 Opc == ISD::INTRINSIC_WO_CHAIN || 2858 Opc == ISD::INTRINSIC_W_CHAIN || 2859 Opc == ISD::INTRINSIC_VOID) && 2860 "Should use MaskedValueIsZero if you don't know whether Op" 2861 " is a target node!"); 2862 2863 Known.resetAll(); 2864 switch (Opc) { 2865 default: break; 2866 case RISCVISD::REMUW: { 2867 KnownBits Known2; 2868 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2869 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2870 // We only care about the lower 32 bits. 2871 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 2872 // Restore the original width by sign extending. 2873 Known = Known.sext(BitWidth); 2874 break; 2875 } 2876 case RISCVISD::DIVUW: { 2877 KnownBits Known2; 2878 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2879 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2880 // We only care about the lower 32 bits. 2881 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 2882 // Restore the original width by sign extending. 2883 Known = Known.sext(BitWidth); 2884 break; 2885 } 2886 case RISCVISD::READ_VLENB: 2887 // We assume VLENB is at least 8 bytes. 2888 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 2889 Known.Zero.setLowBits(3); 2890 break; 2891 } 2892 } 2893 2894 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 2895 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 2896 unsigned Depth) const { 2897 switch (Op.getOpcode()) { 2898 default: 2899 break; 2900 case RISCVISD::SLLW: 2901 case RISCVISD::SRAW: 2902 case RISCVISD::SRLW: 2903 case RISCVISD::DIVW: 2904 case RISCVISD::DIVUW: 2905 case RISCVISD::REMUW: 2906 case RISCVISD::ROLW: 2907 case RISCVISD::RORW: 2908 case RISCVISD::GREVIW: 2909 case RISCVISD::GORCIW: 2910 case RISCVISD::FSLW: 2911 case RISCVISD::FSRW: 2912 // TODO: As the result is sign-extended, this is conservatively correct. A 2913 // more precise answer could be calculated for SRAW depending on known 2914 // bits in the shift amount. 2915 return 33; 2916 case RISCVISD::VMV_X_S: 2917 // The number of sign bits of the scalar result is computed by obtaining the 2918 // element type of the input vector operand, subtracting its width from the 2919 // XLEN, and then adding one (sign bit within the element type). If the 2920 // element type is wider than XLen, the least-significant XLEN bits are 2921 // taken. 2922 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 2923 return 1; 2924 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 2925 } 2926 2927 return 1; 2928 } 2929 2930 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 2931 MachineBasicBlock *BB) { 2932 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 2933 2934 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 2935 // Should the count have wrapped while it was being read, we need to try 2936 // again. 2937 // ... 2938 // read: 2939 // rdcycleh x3 # load high word of cycle 2940 // rdcycle x2 # load low word of cycle 2941 // rdcycleh x4 # load high word of cycle 2942 // bne x3, x4, read # check if high word reads match, otherwise try again 2943 // ... 2944 2945 MachineFunction &MF = *BB->getParent(); 2946 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2947 MachineFunction::iterator It = ++BB->getIterator(); 2948 2949 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 2950 MF.insert(It, LoopMBB); 2951 2952 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 2953 MF.insert(It, DoneMBB); 2954 2955 // Transfer the remainder of BB and its successor edges to DoneMBB. 2956 DoneMBB->splice(DoneMBB->begin(), BB, 2957 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 2958 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 2959 2960 BB->addSuccessor(LoopMBB); 2961 2962 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2963 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 2964 Register LoReg = MI.getOperand(0).getReg(); 2965 Register HiReg = MI.getOperand(1).getReg(); 2966 DebugLoc DL = MI.getDebugLoc(); 2967 2968 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 2969 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 2970 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 2971 .addReg(RISCV::X0); 2972 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 2973 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 2974 .addReg(RISCV::X0); 2975 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 2976 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 2977 .addReg(RISCV::X0); 2978 2979 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 2980 .addReg(HiReg) 2981 .addReg(ReadAgainReg) 2982 .addMBB(LoopMBB); 2983 2984 LoopMBB->addSuccessor(LoopMBB); 2985 LoopMBB->addSuccessor(DoneMBB); 2986 2987 MI.eraseFromParent(); 2988 2989 return DoneMBB; 2990 } 2991 2992 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 2993 MachineBasicBlock *BB) { 2994 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 2995 2996 MachineFunction &MF = *BB->getParent(); 2997 DebugLoc DL = MI.getDebugLoc(); 2998 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2999 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 3000 Register LoReg = MI.getOperand(0).getReg(); 3001 Register HiReg = MI.getOperand(1).getReg(); 3002 Register SrcReg = MI.getOperand(2).getReg(); 3003 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 3004 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 3005 3006 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 3007 RI); 3008 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 3009 MachineMemOperand *MMOLo = 3010 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 3011 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 3012 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 3013 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 3014 .addFrameIndex(FI) 3015 .addImm(0) 3016 .addMemOperand(MMOLo); 3017 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 3018 .addFrameIndex(FI) 3019 .addImm(4) 3020 .addMemOperand(MMOHi); 3021 MI.eraseFromParent(); // The pseudo instruction is gone now. 3022 return BB; 3023 } 3024 3025 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 3026 MachineBasicBlock *BB) { 3027 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 3028 "Unexpected instruction"); 3029 3030 MachineFunction &MF = *BB->getParent(); 3031 DebugLoc DL = MI.getDebugLoc(); 3032 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3033 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 3034 Register DstReg = MI.getOperand(0).getReg(); 3035 Register LoReg = MI.getOperand(1).getReg(); 3036 Register HiReg = MI.getOperand(2).getReg(); 3037 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 3038 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 3039 3040 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 3041 MachineMemOperand *MMOLo = 3042 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 3043 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 3044 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 3045 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 3046 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 3047 .addFrameIndex(FI) 3048 .addImm(0) 3049 .addMemOperand(MMOLo); 3050 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 3051 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 3052 .addFrameIndex(FI) 3053 .addImm(4) 3054 .addMemOperand(MMOHi); 3055 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 3056 MI.eraseFromParent(); // The pseudo instruction is gone now. 3057 return BB; 3058 } 3059 3060 static bool isSelectPseudo(MachineInstr &MI) { 3061 switch (MI.getOpcode()) { 3062 default: 3063 return false; 3064 case RISCV::Select_GPR_Using_CC_GPR: 3065 case RISCV::Select_FPR16_Using_CC_GPR: 3066 case RISCV::Select_FPR32_Using_CC_GPR: 3067 case RISCV::Select_FPR64_Using_CC_GPR: 3068 return true; 3069 } 3070 } 3071 3072 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 3073 MachineBasicBlock *BB) { 3074 // To "insert" Select_* instructions, we actually have to insert the triangle 3075 // control-flow pattern. The incoming instructions know the destination vreg 3076 // to set, the condition code register to branch on, the true/false values to 3077 // select between, and the condcode to use to select the appropriate branch. 3078 // 3079 // We produce the following control flow: 3080 // HeadMBB 3081 // | \ 3082 // | IfFalseMBB 3083 // | / 3084 // TailMBB 3085 // 3086 // When we find a sequence of selects we attempt to optimize their emission 3087 // by sharing the control flow. Currently we only handle cases where we have 3088 // multiple selects with the exact same condition (same LHS, RHS and CC). 3089 // The selects may be interleaved with other instructions if the other 3090 // instructions meet some requirements we deem safe: 3091 // - They are debug instructions. Otherwise, 3092 // - They do not have side-effects, do not access memory and their inputs do 3093 // not depend on the results of the select pseudo-instructions. 3094 // The TrueV/FalseV operands of the selects cannot depend on the result of 3095 // previous selects in the sequence. 3096 // These conditions could be further relaxed. See the X86 target for a 3097 // related approach and more information. 3098 Register LHS = MI.getOperand(1).getReg(); 3099 Register RHS = MI.getOperand(2).getReg(); 3100 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 3101 3102 SmallVector<MachineInstr *, 4> SelectDebugValues; 3103 SmallSet<Register, 4> SelectDests; 3104 SelectDests.insert(MI.getOperand(0).getReg()); 3105 3106 MachineInstr *LastSelectPseudo = &MI; 3107 3108 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 3109 SequenceMBBI != E; ++SequenceMBBI) { 3110 if (SequenceMBBI->isDebugInstr()) 3111 continue; 3112 else if (isSelectPseudo(*SequenceMBBI)) { 3113 if (SequenceMBBI->getOperand(1).getReg() != LHS || 3114 SequenceMBBI->getOperand(2).getReg() != RHS || 3115 SequenceMBBI->getOperand(3).getImm() != CC || 3116 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 3117 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 3118 break; 3119 LastSelectPseudo = &*SequenceMBBI; 3120 SequenceMBBI->collectDebugValues(SelectDebugValues); 3121 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 3122 } else { 3123 if (SequenceMBBI->hasUnmodeledSideEffects() || 3124 SequenceMBBI->mayLoadOrStore()) 3125 break; 3126 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 3127 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 3128 })) 3129 break; 3130 } 3131 } 3132 3133 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 3134 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3135 DebugLoc DL = MI.getDebugLoc(); 3136 MachineFunction::iterator I = ++BB->getIterator(); 3137 3138 MachineBasicBlock *HeadMBB = BB; 3139 MachineFunction *F = BB->getParent(); 3140 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 3141 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 3142 3143 F->insert(I, IfFalseMBB); 3144 F->insert(I, TailMBB); 3145 3146 // Transfer debug instructions associated with the selects to TailMBB. 3147 for (MachineInstr *DebugInstr : SelectDebugValues) { 3148 TailMBB->push_back(DebugInstr->removeFromParent()); 3149 } 3150 3151 // Move all instructions after the sequence to TailMBB. 3152 TailMBB->splice(TailMBB->end(), HeadMBB, 3153 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 3154 // Update machine-CFG edges by transferring all successors of the current 3155 // block to the new block which will contain the Phi nodes for the selects. 3156 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 3157 // Set the successors for HeadMBB. 3158 HeadMBB->addSuccessor(IfFalseMBB); 3159 HeadMBB->addSuccessor(TailMBB); 3160 3161 // Insert appropriate branch. 3162 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 3163 3164 BuildMI(HeadMBB, DL, TII.get(Opcode)) 3165 .addReg(LHS) 3166 .addReg(RHS) 3167 .addMBB(TailMBB); 3168 3169 // IfFalseMBB just falls through to TailMBB. 3170 IfFalseMBB->addSuccessor(TailMBB); 3171 3172 // Create PHIs for all of the select pseudo-instructions. 3173 auto SelectMBBI = MI.getIterator(); 3174 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 3175 auto InsertionPoint = TailMBB->begin(); 3176 while (SelectMBBI != SelectEnd) { 3177 auto Next = std::next(SelectMBBI); 3178 if (isSelectPseudo(*SelectMBBI)) { 3179 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 3180 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 3181 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 3182 .addReg(SelectMBBI->getOperand(4).getReg()) 3183 .addMBB(HeadMBB) 3184 .addReg(SelectMBBI->getOperand(5).getReg()) 3185 .addMBB(IfFalseMBB); 3186 SelectMBBI->eraseFromParent(); 3187 } 3188 SelectMBBI = Next; 3189 } 3190 3191 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 3192 return TailMBB; 3193 } 3194 3195 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 3196 int VLIndex, unsigned SEWIndex, 3197 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 3198 MachineFunction &MF = *BB->getParent(); 3199 DebugLoc DL = MI.getDebugLoc(); 3200 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3201 3202 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 3203 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 3204 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 3205 3206 MachineRegisterInfo &MRI = MF.getRegInfo(); 3207 3208 // VL and VTYPE are alive here. 3209 MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)); 3210 3211 if (VLIndex >= 0) { 3212 // Set VL (rs1 != X0). 3213 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3214 MIB.addReg(DestReg, RegState::Define | RegState::Dead) 3215 .addReg(MI.getOperand(VLIndex).getReg()); 3216 } else 3217 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 3218 MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead) 3219 .addReg(RISCV::X0, RegState::Kill); 3220 3221 // Default to tail agnostic unless the destination is tied to a source. In 3222 // that case the user would have some control over the tail values. The tail 3223 // policy is also ignored on instructions that only update element 0 like 3224 // vmv.s.x or reductions so use agnostic there to match the common case. 3225 // FIXME: This is conservatively correct, but we might want to detect that 3226 // the input is undefined. 3227 bool TailAgnostic = true; 3228 unsigned UseOpIdx; 3229 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 3230 TailAgnostic = false; 3231 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 3232 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 3233 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 3234 if (UseMI && UseMI->isImplicitDef()) 3235 TailAgnostic = true; 3236 } 3237 3238 // For simplicity we reuse the vtype representation here. 3239 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 3240 /*TailAgnostic*/ TailAgnostic, 3241 /*MaskAgnostic*/ false)); 3242 3243 // Remove (now) redundant operands from pseudo 3244 MI.getOperand(SEWIndex).setImm(-1); 3245 if (VLIndex >= 0) { 3246 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 3247 MI.getOperand(VLIndex).setIsKill(false); 3248 } 3249 3250 return BB; 3251 } 3252 3253 MachineBasicBlock * 3254 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 3255 MachineBasicBlock *BB) const { 3256 uint64_t TSFlags = MI.getDesc().TSFlags; 3257 3258 if (TSFlags & RISCVII::HasSEWOpMask) { 3259 unsigned NumOperands = MI.getNumExplicitOperands(); 3260 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 3261 unsigned SEWIndex = NumOperands - 1; 3262 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 3263 3264 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 3265 RISCVII::VLMulShift); 3266 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 3267 } 3268 3269 switch (MI.getOpcode()) { 3270 default: 3271 llvm_unreachable("Unexpected instr type to insert"); 3272 case RISCV::ReadCycleWide: 3273 assert(!Subtarget.is64Bit() && 3274 "ReadCycleWrite is only to be used on riscv32"); 3275 return emitReadCycleWidePseudo(MI, BB); 3276 case RISCV::Select_GPR_Using_CC_GPR: 3277 case RISCV::Select_FPR16_Using_CC_GPR: 3278 case RISCV::Select_FPR32_Using_CC_GPR: 3279 case RISCV::Select_FPR64_Using_CC_GPR: 3280 return emitSelectPseudo(MI, BB); 3281 case RISCV::BuildPairF64Pseudo: 3282 return emitBuildPairF64Pseudo(MI, BB); 3283 case RISCV::SplitF64Pseudo: 3284 return emitSplitF64Pseudo(MI, BB); 3285 } 3286 } 3287 3288 // Calling Convention Implementation. 3289 // The expectations for frontend ABI lowering vary from target to target. 3290 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 3291 // details, but this is a longer term goal. For now, we simply try to keep the 3292 // role of the frontend as simple and well-defined as possible. The rules can 3293 // be summarised as: 3294 // * Never split up large scalar arguments. We handle them here. 3295 // * If a hardfloat calling convention is being used, and the struct may be 3296 // passed in a pair of registers (fp+fp, int+fp), and both registers are 3297 // available, then pass as two separate arguments. If either the GPRs or FPRs 3298 // are exhausted, then pass according to the rule below. 3299 // * If a struct could never be passed in registers or directly in a stack 3300 // slot (as it is larger than 2*XLEN and the floating point rules don't 3301 // apply), then pass it using a pointer with the byval attribute. 3302 // * If a struct is less than 2*XLEN, then coerce to either a two-element 3303 // word-sized array or a 2*XLEN scalar (depending on alignment). 3304 // * The frontend can determine whether a struct is returned by reference or 3305 // not based on its size and fields. If it will be returned by reference, the 3306 // frontend must modify the prototype so a pointer with the sret annotation is 3307 // passed as the first argument. This is not necessary for large scalar 3308 // returns. 3309 // * Struct return values and varargs should be coerced to structs containing 3310 // register-size fields in the same situations they would be for fixed 3311 // arguments. 3312 3313 static const MCPhysReg ArgGPRs[] = { 3314 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 3315 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 3316 }; 3317 static const MCPhysReg ArgFPR16s[] = { 3318 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 3319 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 3320 }; 3321 static const MCPhysReg ArgFPR32s[] = { 3322 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 3323 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 3324 }; 3325 static const MCPhysReg ArgFPR64s[] = { 3326 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 3327 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 3328 }; 3329 // This is an interim calling convention and it may be changed in the future. 3330 static const MCPhysReg ArgVRs[] = { 3331 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 3332 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 3333 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 3334 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 3335 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 3336 RISCV::V20M2, RISCV::V22M2}; 3337 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 3338 RISCV::V20M4}; 3339 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 3340 3341 // Pass a 2*XLEN argument that has been split into two XLEN values through 3342 // registers or the stack as necessary. 3343 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 3344 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 3345 MVT ValVT2, MVT LocVT2, 3346 ISD::ArgFlagsTy ArgFlags2) { 3347 unsigned XLenInBytes = XLen / 8; 3348 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3349 // At least one half can be passed via register. 3350 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 3351 VA1.getLocVT(), CCValAssign::Full)); 3352 } else { 3353 // Both halves must be passed on the stack, with proper alignment. 3354 Align StackAlign = 3355 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 3356 State.addLoc( 3357 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 3358 State.AllocateStack(XLenInBytes, StackAlign), 3359 VA1.getLocVT(), CCValAssign::Full)); 3360 State.addLoc(CCValAssign::getMem( 3361 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 3362 LocVT2, CCValAssign::Full)); 3363 return false; 3364 } 3365 3366 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3367 // The second half can also be passed via register. 3368 State.addLoc( 3369 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 3370 } else { 3371 // The second half is passed via the stack, without additional alignment. 3372 State.addLoc(CCValAssign::getMem( 3373 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 3374 LocVT2, CCValAssign::Full)); 3375 } 3376 3377 return false; 3378 } 3379 3380 // Implements the RISC-V calling convention. Returns true upon failure. 3381 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 3382 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 3383 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 3384 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 3385 Optional<unsigned> FirstMaskArgument) { 3386 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 3387 assert(XLen == 32 || XLen == 64); 3388 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 3389 3390 // Any return value split in to more than two values can't be returned 3391 // directly. 3392 if (IsRet && ValNo > 1) 3393 return true; 3394 3395 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 3396 // variadic argument, or if no F16/F32 argument registers are available. 3397 bool UseGPRForF16_F32 = true; 3398 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 3399 // variadic argument, or if no F64 argument registers are available. 3400 bool UseGPRForF64 = true; 3401 3402 switch (ABI) { 3403 default: 3404 llvm_unreachable("Unexpected ABI"); 3405 case RISCVABI::ABI_ILP32: 3406 case RISCVABI::ABI_LP64: 3407 break; 3408 case RISCVABI::ABI_ILP32F: 3409 case RISCVABI::ABI_LP64F: 3410 UseGPRForF16_F32 = !IsFixed; 3411 break; 3412 case RISCVABI::ABI_ILP32D: 3413 case RISCVABI::ABI_LP64D: 3414 UseGPRForF16_F32 = !IsFixed; 3415 UseGPRForF64 = !IsFixed; 3416 break; 3417 } 3418 3419 // FPR16, FPR32, and FPR64 alias each other. 3420 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 3421 UseGPRForF16_F32 = true; 3422 UseGPRForF64 = true; 3423 } 3424 3425 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 3426 // similar local variables rather than directly checking against the target 3427 // ABI. 3428 3429 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 3430 LocVT = XLenVT; 3431 LocInfo = CCValAssign::BCvt; 3432 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 3433 LocVT = MVT::i64; 3434 LocInfo = CCValAssign::BCvt; 3435 } 3436 3437 // If this is a variadic argument, the RISC-V calling convention requires 3438 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 3439 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 3440 // be used regardless of whether the original argument was split during 3441 // legalisation or not. The argument will not be passed by registers if the 3442 // original type is larger than 2*XLEN, so the register alignment rule does 3443 // not apply. 3444 unsigned TwoXLenInBytes = (2 * XLen) / 8; 3445 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 3446 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 3447 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 3448 // Skip 'odd' register if necessary. 3449 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 3450 State.AllocateReg(ArgGPRs); 3451 } 3452 3453 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 3454 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 3455 State.getPendingArgFlags(); 3456 3457 assert(PendingLocs.size() == PendingArgFlags.size() && 3458 "PendingLocs and PendingArgFlags out of sync"); 3459 3460 // Handle passing f64 on RV32D with a soft float ABI or when floating point 3461 // registers are exhausted. 3462 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 3463 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 3464 "Can't lower f64 if it is split"); 3465 // Depending on available argument GPRS, f64 may be passed in a pair of 3466 // GPRs, split between a GPR and the stack, or passed completely on the 3467 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 3468 // cases. 3469 Register Reg = State.AllocateReg(ArgGPRs); 3470 LocVT = MVT::i32; 3471 if (!Reg) { 3472 unsigned StackOffset = State.AllocateStack(8, Align(8)); 3473 State.addLoc( 3474 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3475 return false; 3476 } 3477 if (!State.AllocateReg(ArgGPRs)) 3478 State.AllocateStack(4, Align(4)); 3479 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3480 return false; 3481 } 3482 3483 // Split arguments might be passed indirectly, so keep track of the pending 3484 // values. 3485 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 3486 LocVT = XLenVT; 3487 LocInfo = CCValAssign::Indirect; 3488 PendingLocs.push_back( 3489 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 3490 PendingArgFlags.push_back(ArgFlags); 3491 if (!ArgFlags.isSplitEnd()) { 3492 return false; 3493 } 3494 } 3495 3496 // If the split argument only had two elements, it should be passed directly 3497 // in registers or on the stack. 3498 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 3499 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 3500 // Apply the normal calling convention rules to the first half of the 3501 // split argument. 3502 CCValAssign VA = PendingLocs[0]; 3503 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 3504 PendingLocs.clear(); 3505 PendingArgFlags.clear(); 3506 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 3507 ArgFlags); 3508 } 3509 3510 // Allocate to a register if possible, or else a stack slot. 3511 Register Reg; 3512 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 3513 Reg = State.AllocateReg(ArgFPR16s); 3514 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 3515 Reg = State.AllocateReg(ArgFPR32s); 3516 else if (ValVT == MVT::f64 && !UseGPRForF64) 3517 Reg = State.AllocateReg(ArgFPR64s); 3518 else if (ValVT.isScalableVector()) { 3519 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 3520 if (RC == &RISCV::VRRegClass) { 3521 // Assign the first mask argument to V0. 3522 // This is an interim calling convention and it may be changed in the 3523 // future. 3524 if (FirstMaskArgument.hasValue() && 3525 ValNo == FirstMaskArgument.getValue()) { 3526 Reg = State.AllocateReg(RISCV::V0); 3527 } else { 3528 Reg = State.AllocateReg(ArgVRs); 3529 } 3530 } else if (RC == &RISCV::VRM2RegClass) { 3531 Reg = State.AllocateReg(ArgVRM2s); 3532 } else if (RC == &RISCV::VRM4RegClass) { 3533 Reg = State.AllocateReg(ArgVRM4s); 3534 } else if (RC == &RISCV::VRM8RegClass) { 3535 Reg = State.AllocateReg(ArgVRM8s); 3536 } else { 3537 llvm_unreachable("Unhandled class register for ValueType"); 3538 } 3539 if (!Reg) { 3540 LocInfo = CCValAssign::Indirect; 3541 // Try using a GPR to pass the address 3542 Reg = State.AllocateReg(ArgGPRs); 3543 LocVT = XLenVT; 3544 } 3545 } else 3546 Reg = State.AllocateReg(ArgGPRs); 3547 unsigned StackOffset = 3548 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 3549 3550 // If we reach this point and PendingLocs is non-empty, we must be at the 3551 // end of a split argument that must be passed indirectly. 3552 if (!PendingLocs.empty()) { 3553 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 3554 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 3555 3556 for (auto &It : PendingLocs) { 3557 if (Reg) 3558 It.convertToReg(Reg); 3559 else 3560 It.convertToMem(StackOffset); 3561 State.addLoc(It); 3562 } 3563 PendingLocs.clear(); 3564 PendingArgFlags.clear(); 3565 return false; 3566 } 3567 3568 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 3569 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 3570 "Expected an XLenVT or scalable vector types at this stage"); 3571 3572 if (Reg) { 3573 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3574 return false; 3575 } 3576 3577 // When a floating-point value is passed on the stack, no bit-conversion is 3578 // needed. 3579 if (ValVT.isFloatingPoint()) { 3580 LocVT = ValVT; 3581 LocInfo = CCValAssign::Full; 3582 } 3583 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3584 return false; 3585 } 3586 3587 template <typename ArgTy> 3588 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 3589 for (const auto &ArgIdx : enumerate(Args)) { 3590 MVT ArgVT = ArgIdx.value().VT; 3591 if (ArgVT.isScalableVector() && 3592 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 3593 return ArgIdx.index(); 3594 } 3595 return None; 3596 } 3597 3598 void RISCVTargetLowering::analyzeInputArgs( 3599 MachineFunction &MF, CCState &CCInfo, 3600 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 3601 unsigned NumArgs = Ins.size(); 3602 FunctionType *FType = MF.getFunction().getFunctionType(); 3603 3604 Optional<unsigned> FirstMaskArgument; 3605 if (Subtarget.hasStdExtV()) 3606 FirstMaskArgument = preAssignMask(Ins); 3607 3608 for (unsigned i = 0; i != NumArgs; ++i) { 3609 MVT ArgVT = Ins[i].VT; 3610 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 3611 3612 Type *ArgTy = nullptr; 3613 if (IsRet) 3614 ArgTy = FType->getReturnType(); 3615 else if (Ins[i].isOrigArg()) 3616 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 3617 3618 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3619 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3620 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 3621 FirstMaskArgument)) { 3622 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 3623 << EVT(ArgVT).getEVTString() << '\n'); 3624 llvm_unreachable(nullptr); 3625 } 3626 } 3627 } 3628 3629 void RISCVTargetLowering::analyzeOutputArgs( 3630 MachineFunction &MF, CCState &CCInfo, 3631 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 3632 CallLoweringInfo *CLI) const { 3633 unsigned NumArgs = Outs.size(); 3634 3635 Optional<unsigned> FirstMaskArgument; 3636 if (Subtarget.hasStdExtV()) 3637 FirstMaskArgument = preAssignMask(Outs); 3638 3639 for (unsigned i = 0; i != NumArgs; i++) { 3640 MVT ArgVT = Outs[i].VT; 3641 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3642 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 3643 3644 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3645 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3646 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 3647 FirstMaskArgument)) { 3648 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 3649 << EVT(ArgVT).getEVTString() << "\n"); 3650 llvm_unreachable(nullptr); 3651 } 3652 } 3653 } 3654 3655 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 3656 // values. 3657 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 3658 const CCValAssign &VA, const SDLoc &DL) { 3659 switch (VA.getLocInfo()) { 3660 default: 3661 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3662 case CCValAssign::Full: 3663 break; 3664 case CCValAssign::BCvt: 3665 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3666 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 3667 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3668 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 3669 else 3670 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 3671 break; 3672 } 3673 return Val; 3674 } 3675 3676 // The caller is responsible for loading the full value if the argument is 3677 // passed with CCValAssign::Indirect. 3678 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 3679 const CCValAssign &VA, const SDLoc &DL, 3680 const RISCVTargetLowering &TLI) { 3681 MachineFunction &MF = DAG.getMachineFunction(); 3682 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3683 EVT LocVT = VA.getLocVT(); 3684 SDValue Val; 3685 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 3686 Register VReg = RegInfo.createVirtualRegister(RC); 3687 RegInfo.addLiveIn(VA.getLocReg(), VReg); 3688 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 3689 3690 if (VA.getLocInfo() == CCValAssign::Indirect) 3691 return Val; 3692 3693 return convertLocVTToValVT(DAG, Val, VA, DL); 3694 } 3695 3696 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 3697 const CCValAssign &VA, const SDLoc &DL) { 3698 EVT LocVT = VA.getLocVT(); 3699 3700 switch (VA.getLocInfo()) { 3701 default: 3702 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3703 case CCValAssign::Full: 3704 break; 3705 case CCValAssign::BCvt: 3706 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3707 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 3708 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3709 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 3710 else 3711 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 3712 break; 3713 } 3714 return Val; 3715 } 3716 3717 // The caller is responsible for loading the full value if the argument is 3718 // passed with CCValAssign::Indirect. 3719 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 3720 const CCValAssign &VA, const SDLoc &DL) { 3721 MachineFunction &MF = DAG.getMachineFunction(); 3722 MachineFrameInfo &MFI = MF.getFrameInfo(); 3723 EVT LocVT = VA.getLocVT(); 3724 EVT ValVT = VA.getValVT(); 3725 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 3726 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 3727 VA.getLocMemOffset(), /*Immutable=*/true); 3728 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3729 SDValue Val; 3730 3731 ISD::LoadExtType ExtType; 3732 switch (VA.getLocInfo()) { 3733 default: 3734 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3735 case CCValAssign::Full: 3736 case CCValAssign::Indirect: 3737 case CCValAssign::BCvt: 3738 ExtType = ISD::NON_EXTLOAD; 3739 break; 3740 } 3741 Val = DAG.getExtLoad( 3742 ExtType, DL, LocVT, Chain, FIN, 3743 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 3744 return Val; 3745 } 3746 3747 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 3748 const CCValAssign &VA, const SDLoc &DL) { 3749 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 3750 "Unexpected VA"); 3751 MachineFunction &MF = DAG.getMachineFunction(); 3752 MachineFrameInfo &MFI = MF.getFrameInfo(); 3753 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3754 3755 if (VA.isMemLoc()) { 3756 // f64 is passed on the stack. 3757 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 3758 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3759 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 3760 MachinePointerInfo::getFixedStack(MF, FI)); 3761 } 3762 3763 assert(VA.isRegLoc() && "Expected register VA assignment"); 3764 3765 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3766 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 3767 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 3768 SDValue Hi; 3769 if (VA.getLocReg() == RISCV::X17) { 3770 // Second half of f64 is passed on the stack. 3771 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 3772 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3773 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 3774 MachinePointerInfo::getFixedStack(MF, FI)); 3775 } else { 3776 // Second half of f64 is passed in another GPR. 3777 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3778 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 3779 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 3780 } 3781 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 3782 } 3783 3784 // FastCC has less than 1% performance improvement for some particular 3785 // benchmark. But theoretically, it may has benenfit for some cases. 3786 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 3787 CCValAssign::LocInfo LocInfo, 3788 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3789 3790 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3791 // X5 and X6 might be used for save-restore libcall. 3792 static const MCPhysReg GPRList[] = { 3793 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 3794 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 3795 RISCV::X29, RISCV::X30, RISCV::X31}; 3796 if (unsigned Reg = State.AllocateReg(GPRList)) { 3797 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3798 return false; 3799 } 3800 } 3801 3802 if (LocVT == MVT::f16) { 3803 static const MCPhysReg FPR16List[] = { 3804 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 3805 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 3806 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 3807 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 3808 if (unsigned Reg = State.AllocateReg(FPR16List)) { 3809 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3810 return false; 3811 } 3812 } 3813 3814 if (LocVT == MVT::f32) { 3815 static const MCPhysReg FPR32List[] = { 3816 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 3817 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 3818 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 3819 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 3820 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3821 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3822 return false; 3823 } 3824 } 3825 3826 if (LocVT == MVT::f64) { 3827 static const MCPhysReg FPR64List[] = { 3828 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 3829 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 3830 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 3831 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 3832 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3833 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3834 return false; 3835 } 3836 } 3837 3838 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 3839 unsigned Offset4 = State.AllocateStack(4, Align(4)); 3840 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 3841 return false; 3842 } 3843 3844 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 3845 unsigned Offset5 = State.AllocateStack(8, Align(8)); 3846 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 3847 return false; 3848 } 3849 3850 return true; // CC didn't match. 3851 } 3852 3853 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 3854 CCValAssign::LocInfo LocInfo, 3855 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3856 3857 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3858 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 3859 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 3860 static const MCPhysReg GPRList[] = { 3861 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 3862 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 3863 if (unsigned Reg = State.AllocateReg(GPRList)) { 3864 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3865 return false; 3866 } 3867 } 3868 3869 if (LocVT == MVT::f32) { 3870 // Pass in STG registers: F1, ..., F6 3871 // fs0 ... fs5 3872 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 3873 RISCV::F18_F, RISCV::F19_F, 3874 RISCV::F20_F, RISCV::F21_F}; 3875 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3876 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3877 return false; 3878 } 3879 } 3880 3881 if (LocVT == MVT::f64) { 3882 // Pass in STG registers: D1, ..., D6 3883 // fs6 ... fs11 3884 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 3885 RISCV::F24_D, RISCV::F25_D, 3886 RISCV::F26_D, RISCV::F27_D}; 3887 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3888 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3889 return false; 3890 } 3891 } 3892 3893 report_fatal_error("No registers left in GHC calling convention"); 3894 return true; 3895 } 3896 3897 // Transform physical registers into virtual registers. 3898 SDValue RISCVTargetLowering::LowerFormalArguments( 3899 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 3900 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 3901 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3902 3903 MachineFunction &MF = DAG.getMachineFunction(); 3904 3905 switch (CallConv) { 3906 default: 3907 report_fatal_error("Unsupported calling convention"); 3908 case CallingConv::C: 3909 case CallingConv::Fast: 3910 break; 3911 case CallingConv::GHC: 3912 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 3913 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 3914 report_fatal_error( 3915 "GHC calling convention requires the F and D instruction set extensions"); 3916 } 3917 3918 const Function &Func = MF.getFunction(); 3919 if (Func.hasFnAttribute("interrupt")) { 3920 if (!Func.arg_empty()) 3921 report_fatal_error( 3922 "Functions with the interrupt attribute cannot have arguments!"); 3923 3924 StringRef Kind = 3925 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 3926 3927 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 3928 report_fatal_error( 3929 "Function interrupt attribute argument not supported!"); 3930 } 3931 3932 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3933 MVT XLenVT = Subtarget.getXLenVT(); 3934 unsigned XLenInBytes = Subtarget.getXLen() / 8; 3935 // Used with vargs to acumulate store chains. 3936 std::vector<SDValue> OutChains; 3937 3938 // Assign locations to all of the incoming arguments. 3939 SmallVector<CCValAssign, 16> ArgLocs; 3940 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3941 3942 if (CallConv == CallingConv::Fast) 3943 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 3944 else if (CallConv == CallingConv::GHC) 3945 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 3946 else 3947 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 3948 3949 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3950 CCValAssign &VA = ArgLocs[i]; 3951 SDValue ArgValue; 3952 // Passing f64 on RV32D with a soft float ABI must be handled as a special 3953 // case. 3954 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 3955 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 3956 else if (VA.isRegLoc()) 3957 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 3958 else 3959 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 3960 3961 if (VA.getLocInfo() == CCValAssign::Indirect) { 3962 // If the original argument was split and passed by reference (e.g. i128 3963 // on RV32), we need to load all parts of it here (using the same 3964 // address). 3965 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 3966 MachinePointerInfo())); 3967 unsigned ArgIndex = Ins[i].OrigArgIndex; 3968 assert(Ins[i].PartOffset == 0); 3969 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 3970 CCValAssign &PartVA = ArgLocs[i + 1]; 3971 unsigned PartOffset = Ins[i + 1].PartOffset; 3972 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 3973 DAG.getIntPtrConstant(PartOffset, DL)); 3974 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 3975 MachinePointerInfo())); 3976 ++i; 3977 } 3978 continue; 3979 } 3980 InVals.push_back(ArgValue); 3981 } 3982 3983 if (IsVarArg) { 3984 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 3985 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 3986 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 3987 MachineFrameInfo &MFI = MF.getFrameInfo(); 3988 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3989 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 3990 3991 // Offset of the first variable argument from stack pointer, and size of 3992 // the vararg save area. For now, the varargs save area is either zero or 3993 // large enough to hold a0-a7. 3994 int VaArgOffset, VarArgsSaveSize; 3995 3996 // If all registers are allocated, then all varargs must be passed on the 3997 // stack and we don't need to save any argregs. 3998 if (ArgRegs.size() == Idx) { 3999 VaArgOffset = CCInfo.getNextStackOffset(); 4000 VarArgsSaveSize = 0; 4001 } else { 4002 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 4003 VaArgOffset = -VarArgsSaveSize; 4004 } 4005 4006 // Record the frame index of the first variable argument 4007 // which is a value necessary to VASTART. 4008 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 4009 RVFI->setVarArgsFrameIndex(FI); 4010 4011 // If saving an odd number of registers then create an extra stack slot to 4012 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 4013 // offsets to even-numbered registered remain 2*XLEN-aligned. 4014 if (Idx % 2) { 4015 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 4016 VarArgsSaveSize += XLenInBytes; 4017 } 4018 4019 // Copy the integer registers that may have been used for passing varargs 4020 // to the vararg save area. 4021 for (unsigned I = Idx; I < ArgRegs.size(); 4022 ++I, VaArgOffset += XLenInBytes) { 4023 const Register Reg = RegInfo.createVirtualRegister(RC); 4024 RegInfo.addLiveIn(ArgRegs[I], Reg); 4025 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 4026 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 4027 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 4028 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 4029 MachinePointerInfo::getFixedStack(MF, FI)); 4030 cast<StoreSDNode>(Store.getNode()) 4031 ->getMemOperand() 4032 ->setValue((Value *)nullptr); 4033 OutChains.push_back(Store); 4034 } 4035 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 4036 } 4037 4038 // All stores are grouped in one node to allow the matching between 4039 // the size of Ins and InVals. This only happens for vararg functions. 4040 if (!OutChains.empty()) { 4041 OutChains.push_back(Chain); 4042 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 4043 } 4044 4045 return Chain; 4046 } 4047 4048 /// isEligibleForTailCallOptimization - Check whether the call is eligible 4049 /// for tail call optimization. 4050 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 4051 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 4052 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 4053 const SmallVector<CCValAssign, 16> &ArgLocs) const { 4054 4055 auto &Callee = CLI.Callee; 4056 auto CalleeCC = CLI.CallConv; 4057 auto &Outs = CLI.Outs; 4058 auto &Caller = MF.getFunction(); 4059 auto CallerCC = Caller.getCallingConv(); 4060 4061 // Exception-handling functions need a special set of instructions to 4062 // indicate a return to the hardware. Tail-calling another function would 4063 // probably break this. 4064 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 4065 // should be expanded as new function attributes are introduced. 4066 if (Caller.hasFnAttribute("interrupt")) 4067 return false; 4068 4069 // Do not tail call opt if the stack is used to pass parameters. 4070 if (CCInfo.getNextStackOffset() != 0) 4071 return false; 4072 4073 // Do not tail call opt if any parameters need to be passed indirectly. 4074 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 4075 // passed indirectly. So the address of the value will be passed in a 4076 // register, or if not available, then the address is put on the stack. In 4077 // order to pass indirectly, space on the stack often needs to be allocated 4078 // in order to store the value. In this case the CCInfo.getNextStackOffset() 4079 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 4080 // are passed CCValAssign::Indirect. 4081 for (auto &VA : ArgLocs) 4082 if (VA.getLocInfo() == CCValAssign::Indirect) 4083 return false; 4084 4085 // Do not tail call opt if either caller or callee uses struct return 4086 // semantics. 4087 auto IsCallerStructRet = Caller.hasStructRetAttr(); 4088 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 4089 if (IsCallerStructRet || IsCalleeStructRet) 4090 return false; 4091 4092 // Externally-defined functions with weak linkage should not be 4093 // tail-called. The behaviour of branch instructions in this situation (as 4094 // used for tail calls) is implementation-defined, so we cannot rely on the 4095 // linker replacing the tail call with a return. 4096 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4097 const GlobalValue *GV = G->getGlobal(); 4098 if (GV->hasExternalWeakLinkage()) 4099 return false; 4100 } 4101 4102 // The callee has to preserve all registers the caller needs to preserve. 4103 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4104 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 4105 if (CalleeCC != CallerCC) { 4106 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 4107 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 4108 return false; 4109 } 4110 4111 // Byval parameters hand the function a pointer directly into the stack area 4112 // we want to reuse during a tail call. Working around this *is* possible 4113 // but less efficient and uglier in LowerCall. 4114 for (auto &Arg : Outs) 4115 if (Arg.Flags.isByVal()) 4116 return false; 4117 4118 return true; 4119 } 4120 4121 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 4122 // and output parameter nodes. 4123 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 4124 SmallVectorImpl<SDValue> &InVals) const { 4125 SelectionDAG &DAG = CLI.DAG; 4126 SDLoc &DL = CLI.DL; 4127 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4128 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4129 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4130 SDValue Chain = CLI.Chain; 4131 SDValue Callee = CLI.Callee; 4132 bool &IsTailCall = CLI.IsTailCall; 4133 CallingConv::ID CallConv = CLI.CallConv; 4134 bool IsVarArg = CLI.IsVarArg; 4135 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4136 MVT XLenVT = Subtarget.getXLenVT(); 4137 4138 MachineFunction &MF = DAG.getMachineFunction(); 4139 4140 // Analyze the operands of the call, assigning locations to each operand. 4141 SmallVector<CCValAssign, 16> ArgLocs; 4142 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 4143 4144 if (CallConv == CallingConv::Fast) 4145 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 4146 else if (CallConv == CallingConv::GHC) 4147 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 4148 else 4149 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 4150 4151 // Check if it's really possible to do a tail call. 4152 if (IsTailCall) 4153 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 4154 4155 if (IsTailCall) 4156 ++NumTailCalls; 4157 else if (CLI.CB && CLI.CB->isMustTailCall()) 4158 report_fatal_error("failed to perform tail call elimination on a call " 4159 "site marked musttail"); 4160 4161 // Get a count of how many bytes are to be pushed on the stack. 4162 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 4163 4164 // Create local copies for byval args 4165 SmallVector<SDValue, 8> ByValArgs; 4166 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4167 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4168 if (!Flags.isByVal()) 4169 continue; 4170 4171 SDValue Arg = OutVals[i]; 4172 unsigned Size = Flags.getByValSize(); 4173 Align Alignment = Flags.getNonZeroByValAlign(); 4174 4175 int FI = 4176 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 4177 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 4178 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 4179 4180 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 4181 /*IsVolatile=*/false, 4182 /*AlwaysInline=*/false, IsTailCall, 4183 MachinePointerInfo(), MachinePointerInfo()); 4184 ByValArgs.push_back(FIPtr); 4185 } 4186 4187 if (!IsTailCall) 4188 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 4189 4190 // Copy argument values to their designated locations. 4191 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 4192 SmallVector<SDValue, 8> MemOpChains; 4193 SDValue StackPtr; 4194 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 4195 CCValAssign &VA = ArgLocs[i]; 4196 SDValue ArgValue = OutVals[i]; 4197 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4198 4199 // Handle passing f64 on RV32D with a soft float ABI as a special case. 4200 bool IsF64OnRV32DSoftABI = 4201 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 4202 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 4203 SDValue SplitF64 = DAG.getNode( 4204 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 4205 SDValue Lo = SplitF64.getValue(0); 4206 SDValue Hi = SplitF64.getValue(1); 4207 4208 Register RegLo = VA.getLocReg(); 4209 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 4210 4211 if (RegLo == RISCV::X17) { 4212 // Second half of f64 is passed on the stack. 4213 // Work out the address of the stack slot. 4214 if (!StackPtr.getNode()) 4215 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 4216 // Emit the store. 4217 MemOpChains.push_back( 4218 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 4219 } else { 4220 // Second half of f64 is passed in another GPR. 4221 assert(RegLo < RISCV::X31 && "Invalid register pair"); 4222 Register RegHigh = RegLo + 1; 4223 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 4224 } 4225 continue; 4226 } 4227 4228 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 4229 // as any other MemLoc. 4230 4231 // Promote the value if needed. 4232 // For now, only handle fully promoted and indirect arguments. 4233 if (VA.getLocInfo() == CCValAssign::Indirect) { 4234 // Store the argument in a stack slot and pass its address. 4235 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 4236 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 4237 MemOpChains.push_back( 4238 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 4239 MachinePointerInfo::getFixedStack(MF, FI))); 4240 // If the original argument was split (e.g. i128), we need 4241 // to store all parts of it here (and pass just one address). 4242 unsigned ArgIndex = Outs[i].OrigArgIndex; 4243 assert(Outs[i].PartOffset == 0); 4244 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 4245 SDValue PartValue = OutVals[i + 1]; 4246 unsigned PartOffset = Outs[i + 1].PartOffset; 4247 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 4248 DAG.getIntPtrConstant(PartOffset, DL)); 4249 MemOpChains.push_back( 4250 DAG.getStore(Chain, DL, PartValue, Address, 4251 MachinePointerInfo::getFixedStack(MF, FI))); 4252 ++i; 4253 } 4254 ArgValue = SpillSlot; 4255 } else { 4256 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 4257 } 4258 4259 // Use local copy if it is a byval arg. 4260 if (Flags.isByVal()) 4261 ArgValue = ByValArgs[j++]; 4262 4263 if (VA.isRegLoc()) { 4264 // Queue up the argument copies and emit them at the end. 4265 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 4266 } else { 4267 assert(VA.isMemLoc() && "Argument not register or memory"); 4268 assert(!IsTailCall && "Tail call not allowed if stack is used " 4269 "for passing parameters"); 4270 4271 // Work out the address of the stack slot. 4272 if (!StackPtr.getNode()) 4273 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 4274 SDValue Address = 4275 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 4276 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 4277 4278 // Emit the store. 4279 MemOpChains.push_back( 4280 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 4281 } 4282 } 4283 4284 // Join the stores, which are independent of one another. 4285 if (!MemOpChains.empty()) 4286 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 4287 4288 SDValue Glue; 4289 4290 // Build a sequence of copy-to-reg nodes, chained and glued together. 4291 for (auto &Reg : RegsToPass) { 4292 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 4293 Glue = Chain.getValue(1); 4294 } 4295 4296 // Validate that none of the argument registers have been marked as 4297 // reserved, if so report an error. Do the same for the return address if this 4298 // is not a tailcall. 4299 validateCCReservedRegs(RegsToPass, MF); 4300 if (!IsTailCall && 4301 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 4302 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4303 MF.getFunction(), 4304 "Return address register required, but has been reserved."}); 4305 4306 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 4307 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 4308 // split it and then direct call can be matched by PseudoCALL. 4309 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 4310 const GlobalValue *GV = S->getGlobal(); 4311 4312 unsigned OpFlags = RISCVII::MO_CALL; 4313 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 4314 OpFlags = RISCVII::MO_PLT; 4315 4316 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 4317 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4318 unsigned OpFlags = RISCVII::MO_CALL; 4319 4320 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 4321 nullptr)) 4322 OpFlags = RISCVII::MO_PLT; 4323 4324 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 4325 } 4326 4327 // The first call operand is the chain and the second is the target address. 4328 SmallVector<SDValue, 8> Ops; 4329 Ops.push_back(Chain); 4330 Ops.push_back(Callee); 4331 4332 // Add argument registers to the end of the list so that they are 4333 // known live into the call. 4334 for (auto &Reg : RegsToPass) 4335 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 4336 4337 if (!IsTailCall) { 4338 // Add a register mask operand representing the call-preserved registers. 4339 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4340 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 4341 assert(Mask && "Missing call preserved mask for calling convention"); 4342 Ops.push_back(DAG.getRegisterMask(Mask)); 4343 } 4344 4345 // Glue the call to the argument copies, if any. 4346 if (Glue.getNode()) 4347 Ops.push_back(Glue); 4348 4349 // Emit the call. 4350 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 4351 4352 if (IsTailCall) { 4353 MF.getFrameInfo().setHasTailCall(); 4354 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 4355 } 4356 4357 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 4358 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 4359 Glue = Chain.getValue(1); 4360 4361 // Mark the end of the call, which is glued to the call itself. 4362 Chain = DAG.getCALLSEQ_END(Chain, 4363 DAG.getConstant(NumBytes, DL, PtrVT, true), 4364 DAG.getConstant(0, DL, PtrVT, true), 4365 Glue, DL); 4366 Glue = Chain.getValue(1); 4367 4368 // Assign locations to each value returned by this call. 4369 SmallVector<CCValAssign, 16> RVLocs; 4370 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 4371 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 4372 4373 // Copy all of the result registers out of their specified physreg. 4374 for (auto &VA : RVLocs) { 4375 // Copy the value out 4376 SDValue RetValue = 4377 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 4378 // Glue the RetValue to the end of the call sequence 4379 Chain = RetValue.getValue(1); 4380 Glue = RetValue.getValue(2); 4381 4382 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 4383 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 4384 SDValue RetValue2 = 4385 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 4386 Chain = RetValue2.getValue(1); 4387 Glue = RetValue2.getValue(2); 4388 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 4389 RetValue2); 4390 } 4391 4392 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 4393 4394 InVals.push_back(RetValue); 4395 } 4396 4397 return Chain; 4398 } 4399 4400 bool RISCVTargetLowering::CanLowerReturn( 4401 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 4402 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 4403 SmallVector<CCValAssign, 16> RVLocs; 4404 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 4405 4406 Optional<unsigned> FirstMaskArgument; 4407 if (Subtarget.hasStdExtV()) 4408 FirstMaskArgument = preAssignMask(Outs); 4409 4410 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4411 MVT VT = Outs[i].VT; 4412 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4413 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 4414 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 4415 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 4416 *this, FirstMaskArgument)) 4417 return false; 4418 } 4419 return true; 4420 } 4421 4422 SDValue 4423 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 4424 bool IsVarArg, 4425 const SmallVectorImpl<ISD::OutputArg> &Outs, 4426 const SmallVectorImpl<SDValue> &OutVals, 4427 const SDLoc &DL, SelectionDAG &DAG) const { 4428 const MachineFunction &MF = DAG.getMachineFunction(); 4429 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 4430 4431 // Stores the assignment of the return value to a location. 4432 SmallVector<CCValAssign, 16> RVLocs; 4433 4434 // Info about the registers and stack slot. 4435 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 4436 *DAG.getContext()); 4437 4438 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 4439 nullptr); 4440 4441 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 4442 report_fatal_error("GHC functions return void only"); 4443 4444 SDValue Glue; 4445 SmallVector<SDValue, 4> RetOps(1, Chain); 4446 4447 // Copy the result values into the output registers. 4448 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 4449 SDValue Val = OutVals[i]; 4450 CCValAssign &VA = RVLocs[i]; 4451 assert(VA.isRegLoc() && "Can only return in registers!"); 4452 4453 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 4454 // Handle returning f64 on RV32D with a soft float ABI. 4455 assert(VA.isRegLoc() && "Expected return via registers"); 4456 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 4457 DAG.getVTList(MVT::i32, MVT::i32), Val); 4458 SDValue Lo = SplitF64.getValue(0); 4459 SDValue Hi = SplitF64.getValue(1); 4460 Register RegLo = VA.getLocReg(); 4461 assert(RegLo < RISCV::X31 && "Invalid register pair"); 4462 Register RegHi = RegLo + 1; 4463 4464 if (STI.isRegisterReservedByUser(RegLo) || 4465 STI.isRegisterReservedByUser(RegHi)) 4466 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4467 MF.getFunction(), 4468 "Return value register required, but has been reserved."}); 4469 4470 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 4471 Glue = Chain.getValue(1); 4472 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 4473 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 4474 Glue = Chain.getValue(1); 4475 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 4476 } else { 4477 // Handle a 'normal' return. 4478 Val = convertValVTToLocVT(DAG, Val, VA, DL); 4479 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 4480 4481 if (STI.isRegisterReservedByUser(VA.getLocReg())) 4482 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4483 MF.getFunction(), 4484 "Return value register required, but has been reserved."}); 4485 4486 // Guarantee that all emitted copies are stuck together. 4487 Glue = Chain.getValue(1); 4488 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4489 } 4490 } 4491 4492 RetOps[0] = Chain; // Update chain. 4493 4494 // Add the glue node if we have it. 4495 if (Glue.getNode()) { 4496 RetOps.push_back(Glue); 4497 } 4498 4499 // Interrupt service routines use different return instructions. 4500 const Function &Func = DAG.getMachineFunction().getFunction(); 4501 if (Func.hasFnAttribute("interrupt")) { 4502 if (!Func.getReturnType()->isVoidTy()) 4503 report_fatal_error( 4504 "Functions with the interrupt attribute must have void return type!"); 4505 4506 MachineFunction &MF = DAG.getMachineFunction(); 4507 StringRef Kind = 4508 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 4509 4510 unsigned RetOpc; 4511 if (Kind == "user") 4512 RetOpc = RISCVISD::URET_FLAG; 4513 else if (Kind == "supervisor") 4514 RetOpc = RISCVISD::SRET_FLAG; 4515 else 4516 RetOpc = RISCVISD::MRET_FLAG; 4517 4518 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 4519 } 4520 4521 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 4522 } 4523 4524 void RISCVTargetLowering::validateCCReservedRegs( 4525 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 4526 MachineFunction &MF) const { 4527 const Function &F = MF.getFunction(); 4528 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 4529 4530 if (llvm::any_of(Regs, [&STI](auto Reg) { 4531 return STI.isRegisterReservedByUser(Reg.first); 4532 })) 4533 F.getContext().diagnose(DiagnosticInfoUnsupported{ 4534 F, "Argument register required, but has been reserved."}); 4535 } 4536 4537 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 4538 return CI->isTailCall(); 4539 } 4540 4541 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 4542 #define NODE_NAME_CASE(NODE) \ 4543 case RISCVISD::NODE: \ 4544 return "RISCVISD::" #NODE; 4545 // clang-format off 4546 switch ((RISCVISD::NodeType)Opcode) { 4547 case RISCVISD::FIRST_NUMBER: 4548 break; 4549 NODE_NAME_CASE(RET_FLAG) 4550 NODE_NAME_CASE(URET_FLAG) 4551 NODE_NAME_CASE(SRET_FLAG) 4552 NODE_NAME_CASE(MRET_FLAG) 4553 NODE_NAME_CASE(CALL) 4554 NODE_NAME_CASE(SELECT_CC) 4555 NODE_NAME_CASE(BuildPairF64) 4556 NODE_NAME_CASE(SplitF64) 4557 NODE_NAME_CASE(TAIL) 4558 NODE_NAME_CASE(SLLW) 4559 NODE_NAME_CASE(SRAW) 4560 NODE_NAME_CASE(SRLW) 4561 NODE_NAME_CASE(DIVW) 4562 NODE_NAME_CASE(DIVUW) 4563 NODE_NAME_CASE(REMUW) 4564 NODE_NAME_CASE(ROLW) 4565 NODE_NAME_CASE(RORW) 4566 NODE_NAME_CASE(FSLW) 4567 NODE_NAME_CASE(FSRW) 4568 NODE_NAME_CASE(FSL) 4569 NODE_NAME_CASE(FSR) 4570 NODE_NAME_CASE(FMV_H_X) 4571 NODE_NAME_CASE(FMV_X_ANYEXTH) 4572 NODE_NAME_CASE(FMV_W_X_RV64) 4573 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 4574 NODE_NAME_CASE(READ_CYCLE_WIDE) 4575 NODE_NAME_CASE(GREVI) 4576 NODE_NAME_CASE(GREVIW) 4577 NODE_NAME_CASE(GORCI) 4578 NODE_NAME_CASE(GORCIW) 4579 NODE_NAME_CASE(VMV_V_X_VL) 4580 NODE_NAME_CASE(VFMV_V_F_VL) 4581 NODE_NAME_CASE(VMV_X_S) 4582 NODE_NAME_CASE(SPLAT_VECTOR_I64) 4583 NODE_NAME_CASE(READ_VLENB) 4584 NODE_NAME_CASE(TRUNCATE_VECTOR) 4585 NODE_NAME_CASE(VLEFF) 4586 NODE_NAME_CASE(VLEFF_MASK) 4587 NODE_NAME_CASE(VSLIDEUP) 4588 NODE_NAME_CASE(VSLIDEDOWN) 4589 NODE_NAME_CASE(VID) 4590 NODE_NAME_CASE(VFNCVT_ROD) 4591 NODE_NAME_CASE(VECREDUCE_ADD) 4592 NODE_NAME_CASE(VECREDUCE_UMAX) 4593 NODE_NAME_CASE(VECREDUCE_SMAX) 4594 NODE_NAME_CASE(VECREDUCE_UMIN) 4595 NODE_NAME_CASE(VECREDUCE_SMIN) 4596 NODE_NAME_CASE(VECREDUCE_AND) 4597 NODE_NAME_CASE(VECREDUCE_OR) 4598 NODE_NAME_CASE(VECREDUCE_XOR) 4599 NODE_NAME_CASE(VECREDUCE_FADD) 4600 NODE_NAME_CASE(VECREDUCE_SEQ_FADD) 4601 NODE_NAME_CASE(ADD_VL) 4602 NODE_NAME_CASE(AND_VL) 4603 NODE_NAME_CASE(MUL_VL) 4604 NODE_NAME_CASE(OR_VL) 4605 NODE_NAME_CASE(SDIV_VL) 4606 NODE_NAME_CASE(SHL_VL) 4607 NODE_NAME_CASE(SREM_VL) 4608 NODE_NAME_CASE(SRA_VL) 4609 NODE_NAME_CASE(SRL_VL) 4610 NODE_NAME_CASE(SUB_VL) 4611 NODE_NAME_CASE(UDIV_VL) 4612 NODE_NAME_CASE(UREM_VL) 4613 NODE_NAME_CASE(XOR_VL) 4614 NODE_NAME_CASE(FADD_VL) 4615 NODE_NAME_CASE(FSUB_VL) 4616 NODE_NAME_CASE(FMUL_VL) 4617 NODE_NAME_CASE(FDIV_VL) 4618 NODE_NAME_CASE(FNEG_VL) 4619 NODE_NAME_CASE(FMA_VL) 4620 NODE_NAME_CASE(VMCLR_VL) 4621 NODE_NAME_CASE(VMSET_VL) 4622 NODE_NAME_CASE(VLE_VL) 4623 NODE_NAME_CASE(VSE_VL) 4624 } 4625 // clang-format on 4626 return nullptr; 4627 #undef NODE_NAME_CASE 4628 } 4629 4630 /// getConstraintType - Given a constraint letter, return the type of 4631 /// constraint it is for this target. 4632 RISCVTargetLowering::ConstraintType 4633 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 4634 if (Constraint.size() == 1) { 4635 switch (Constraint[0]) { 4636 default: 4637 break; 4638 case 'f': 4639 return C_RegisterClass; 4640 case 'I': 4641 case 'J': 4642 case 'K': 4643 return C_Immediate; 4644 case 'A': 4645 return C_Memory; 4646 } 4647 } 4648 return TargetLowering::getConstraintType(Constraint); 4649 } 4650 4651 std::pair<unsigned, const TargetRegisterClass *> 4652 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 4653 StringRef Constraint, 4654 MVT VT) const { 4655 // First, see if this is a constraint that directly corresponds to a 4656 // RISCV register class. 4657 if (Constraint.size() == 1) { 4658 switch (Constraint[0]) { 4659 case 'r': 4660 return std::make_pair(0U, &RISCV::GPRRegClass); 4661 case 'f': 4662 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 4663 return std::make_pair(0U, &RISCV::FPR16RegClass); 4664 if (Subtarget.hasStdExtF() && VT == MVT::f32) 4665 return std::make_pair(0U, &RISCV::FPR32RegClass); 4666 if (Subtarget.hasStdExtD() && VT == MVT::f64) 4667 return std::make_pair(0U, &RISCV::FPR64RegClass); 4668 break; 4669 default: 4670 break; 4671 } 4672 } 4673 4674 // Clang will correctly decode the usage of register name aliases into their 4675 // official names. However, other frontends like `rustc` do not. This allows 4676 // users of these frontends to use the ABI names for registers in LLVM-style 4677 // register constraints. 4678 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 4679 .Case("{zero}", RISCV::X0) 4680 .Case("{ra}", RISCV::X1) 4681 .Case("{sp}", RISCV::X2) 4682 .Case("{gp}", RISCV::X3) 4683 .Case("{tp}", RISCV::X4) 4684 .Case("{t0}", RISCV::X5) 4685 .Case("{t1}", RISCV::X6) 4686 .Case("{t2}", RISCV::X7) 4687 .Cases("{s0}", "{fp}", RISCV::X8) 4688 .Case("{s1}", RISCV::X9) 4689 .Case("{a0}", RISCV::X10) 4690 .Case("{a1}", RISCV::X11) 4691 .Case("{a2}", RISCV::X12) 4692 .Case("{a3}", RISCV::X13) 4693 .Case("{a4}", RISCV::X14) 4694 .Case("{a5}", RISCV::X15) 4695 .Case("{a6}", RISCV::X16) 4696 .Case("{a7}", RISCV::X17) 4697 .Case("{s2}", RISCV::X18) 4698 .Case("{s3}", RISCV::X19) 4699 .Case("{s4}", RISCV::X20) 4700 .Case("{s5}", RISCV::X21) 4701 .Case("{s6}", RISCV::X22) 4702 .Case("{s7}", RISCV::X23) 4703 .Case("{s8}", RISCV::X24) 4704 .Case("{s9}", RISCV::X25) 4705 .Case("{s10}", RISCV::X26) 4706 .Case("{s11}", RISCV::X27) 4707 .Case("{t3}", RISCV::X28) 4708 .Case("{t4}", RISCV::X29) 4709 .Case("{t5}", RISCV::X30) 4710 .Case("{t6}", RISCV::X31) 4711 .Default(RISCV::NoRegister); 4712 if (XRegFromAlias != RISCV::NoRegister) 4713 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 4714 4715 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 4716 // TableGen record rather than the AsmName to choose registers for InlineAsm 4717 // constraints, plus we want to match those names to the widest floating point 4718 // register type available, manually select floating point registers here. 4719 // 4720 // The second case is the ABI name of the register, so that frontends can also 4721 // use the ABI names in register constraint lists. 4722 if (Subtarget.hasStdExtF()) { 4723 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 4724 .Cases("{f0}", "{ft0}", RISCV::F0_F) 4725 .Cases("{f1}", "{ft1}", RISCV::F1_F) 4726 .Cases("{f2}", "{ft2}", RISCV::F2_F) 4727 .Cases("{f3}", "{ft3}", RISCV::F3_F) 4728 .Cases("{f4}", "{ft4}", RISCV::F4_F) 4729 .Cases("{f5}", "{ft5}", RISCV::F5_F) 4730 .Cases("{f6}", "{ft6}", RISCV::F6_F) 4731 .Cases("{f7}", "{ft7}", RISCV::F7_F) 4732 .Cases("{f8}", "{fs0}", RISCV::F8_F) 4733 .Cases("{f9}", "{fs1}", RISCV::F9_F) 4734 .Cases("{f10}", "{fa0}", RISCV::F10_F) 4735 .Cases("{f11}", "{fa1}", RISCV::F11_F) 4736 .Cases("{f12}", "{fa2}", RISCV::F12_F) 4737 .Cases("{f13}", "{fa3}", RISCV::F13_F) 4738 .Cases("{f14}", "{fa4}", RISCV::F14_F) 4739 .Cases("{f15}", "{fa5}", RISCV::F15_F) 4740 .Cases("{f16}", "{fa6}", RISCV::F16_F) 4741 .Cases("{f17}", "{fa7}", RISCV::F17_F) 4742 .Cases("{f18}", "{fs2}", RISCV::F18_F) 4743 .Cases("{f19}", "{fs3}", RISCV::F19_F) 4744 .Cases("{f20}", "{fs4}", RISCV::F20_F) 4745 .Cases("{f21}", "{fs5}", RISCV::F21_F) 4746 .Cases("{f22}", "{fs6}", RISCV::F22_F) 4747 .Cases("{f23}", "{fs7}", RISCV::F23_F) 4748 .Cases("{f24}", "{fs8}", RISCV::F24_F) 4749 .Cases("{f25}", "{fs9}", RISCV::F25_F) 4750 .Cases("{f26}", "{fs10}", RISCV::F26_F) 4751 .Cases("{f27}", "{fs11}", RISCV::F27_F) 4752 .Cases("{f28}", "{ft8}", RISCV::F28_F) 4753 .Cases("{f29}", "{ft9}", RISCV::F29_F) 4754 .Cases("{f30}", "{ft10}", RISCV::F30_F) 4755 .Cases("{f31}", "{ft11}", RISCV::F31_F) 4756 .Default(RISCV::NoRegister); 4757 if (FReg != RISCV::NoRegister) { 4758 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 4759 if (Subtarget.hasStdExtD()) { 4760 unsigned RegNo = FReg - RISCV::F0_F; 4761 unsigned DReg = RISCV::F0_D + RegNo; 4762 return std::make_pair(DReg, &RISCV::FPR64RegClass); 4763 } 4764 return std::make_pair(FReg, &RISCV::FPR32RegClass); 4765 } 4766 } 4767 4768 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 4769 } 4770 4771 unsigned 4772 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 4773 // Currently only support length 1 constraints. 4774 if (ConstraintCode.size() == 1) { 4775 switch (ConstraintCode[0]) { 4776 case 'A': 4777 return InlineAsm::Constraint_A; 4778 default: 4779 break; 4780 } 4781 } 4782 4783 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 4784 } 4785 4786 void RISCVTargetLowering::LowerAsmOperandForConstraint( 4787 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 4788 SelectionDAG &DAG) const { 4789 // Currently only support length 1 constraints. 4790 if (Constraint.length() == 1) { 4791 switch (Constraint[0]) { 4792 case 'I': 4793 // Validate & create a 12-bit signed immediate operand. 4794 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4795 uint64_t CVal = C->getSExtValue(); 4796 if (isInt<12>(CVal)) 4797 Ops.push_back( 4798 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4799 } 4800 return; 4801 case 'J': 4802 // Validate & create an integer zero operand. 4803 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 4804 if (C->getZExtValue() == 0) 4805 Ops.push_back( 4806 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 4807 return; 4808 case 'K': 4809 // Validate & create a 5-bit unsigned immediate operand. 4810 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4811 uint64_t CVal = C->getZExtValue(); 4812 if (isUInt<5>(CVal)) 4813 Ops.push_back( 4814 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4815 } 4816 return; 4817 default: 4818 break; 4819 } 4820 } 4821 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 4822 } 4823 4824 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 4825 Instruction *Inst, 4826 AtomicOrdering Ord) const { 4827 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 4828 return Builder.CreateFence(Ord); 4829 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 4830 return Builder.CreateFence(AtomicOrdering::Release); 4831 return nullptr; 4832 } 4833 4834 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 4835 Instruction *Inst, 4836 AtomicOrdering Ord) const { 4837 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 4838 return Builder.CreateFence(AtomicOrdering::Acquire); 4839 return nullptr; 4840 } 4841 4842 TargetLowering::AtomicExpansionKind 4843 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 4844 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 4845 // point operations can't be used in an lr/sc sequence without breaking the 4846 // forward-progress guarantee. 4847 if (AI->isFloatingPointOperation()) 4848 return AtomicExpansionKind::CmpXChg; 4849 4850 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 4851 if (Size == 8 || Size == 16) 4852 return AtomicExpansionKind::MaskedIntrinsic; 4853 return AtomicExpansionKind::None; 4854 } 4855 4856 static Intrinsic::ID 4857 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 4858 if (XLen == 32) { 4859 switch (BinOp) { 4860 default: 4861 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4862 case AtomicRMWInst::Xchg: 4863 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 4864 case AtomicRMWInst::Add: 4865 return Intrinsic::riscv_masked_atomicrmw_add_i32; 4866 case AtomicRMWInst::Sub: 4867 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 4868 case AtomicRMWInst::Nand: 4869 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 4870 case AtomicRMWInst::Max: 4871 return Intrinsic::riscv_masked_atomicrmw_max_i32; 4872 case AtomicRMWInst::Min: 4873 return Intrinsic::riscv_masked_atomicrmw_min_i32; 4874 case AtomicRMWInst::UMax: 4875 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 4876 case AtomicRMWInst::UMin: 4877 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 4878 } 4879 } 4880 4881 if (XLen == 64) { 4882 switch (BinOp) { 4883 default: 4884 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4885 case AtomicRMWInst::Xchg: 4886 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 4887 case AtomicRMWInst::Add: 4888 return Intrinsic::riscv_masked_atomicrmw_add_i64; 4889 case AtomicRMWInst::Sub: 4890 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 4891 case AtomicRMWInst::Nand: 4892 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 4893 case AtomicRMWInst::Max: 4894 return Intrinsic::riscv_masked_atomicrmw_max_i64; 4895 case AtomicRMWInst::Min: 4896 return Intrinsic::riscv_masked_atomicrmw_min_i64; 4897 case AtomicRMWInst::UMax: 4898 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 4899 case AtomicRMWInst::UMin: 4900 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 4901 } 4902 } 4903 4904 llvm_unreachable("Unexpected XLen\n"); 4905 } 4906 4907 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 4908 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 4909 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 4910 unsigned XLen = Subtarget.getXLen(); 4911 Value *Ordering = 4912 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 4913 Type *Tys[] = {AlignedAddr->getType()}; 4914 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 4915 AI->getModule(), 4916 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 4917 4918 if (XLen == 64) { 4919 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 4920 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4921 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 4922 } 4923 4924 Value *Result; 4925 4926 // Must pass the shift amount needed to sign extend the loaded value prior 4927 // to performing a signed comparison for min/max. ShiftAmt is the number of 4928 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 4929 // is the number of bits to left+right shift the value in order to 4930 // sign-extend. 4931 if (AI->getOperation() == AtomicRMWInst::Min || 4932 AI->getOperation() == AtomicRMWInst::Max) { 4933 const DataLayout &DL = AI->getModule()->getDataLayout(); 4934 unsigned ValWidth = 4935 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 4936 Value *SextShamt = 4937 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 4938 Result = Builder.CreateCall(LrwOpScwLoop, 4939 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 4940 } else { 4941 Result = 4942 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 4943 } 4944 4945 if (XLen == 64) 4946 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4947 return Result; 4948 } 4949 4950 TargetLowering::AtomicExpansionKind 4951 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 4952 AtomicCmpXchgInst *CI) const { 4953 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 4954 if (Size == 8 || Size == 16) 4955 return AtomicExpansionKind::MaskedIntrinsic; 4956 return AtomicExpansionKind::None; 4957 } 4958 4959 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 4960 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 4961 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 4962 unsigned XLen = Subtarget.getXLen(); 4963 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 4964 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 4965 if (XLen == 64) { 4966 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 4967 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 4968 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4969 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 4970 } 4971 Type *Tys[] = {AlignedAddr->getType()}; 4972 Function *MaskedCmpXchg = 4973 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 4974 Value *Result = Builder.CreateCall( 4975 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 4976 if (XLen == 64) 4977 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4978 return Result; 4979 } 4980 4981 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 4982 EVT VT) const { 4983 VT = VT.getScalarType(); 4984 4985 if (!VT.isSimple()) 4986 return false; 4987 4988 switch (VT.getSimpleVT().SimpleTy) { 4989 case MVT::f16: 4990 return Subtarget.hasStdExtZfh(); 4991 case MVT::f32: 4992 return Subtarget.hasStdExtF(); 4993 case MVT::f64: 4994 return Subtarget.hasStdExtD(); 4995 default: 4996 break; 4997 } 4998 4999 return false; 5000 } 5001 5002 Register RISCVTargetLowering::getExceptionPointerRegister( 5003 const Constant *PersonalityFn) const { 5004 return RISCV::X10; 5005 } 5006 5007 Register RISCVTargetLowering::getExceptionSelectorRegister( 5008 const Constant *PersonalityFn) const { 5009 return RISCV::X11; 5010 } 5011 5012 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 5013 // Return false to suppress the unnecessary extensions if the LibCall 5014 // arguments or return value is f32 type for LP64 ABI. 5015 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 5016 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 5017 return false; 5018 5019 return true; 5020 } 5021 5022 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 5023 if (Subtarget.is64Bit() && Type == MVT::i32) 5024 return true; 5025 5026 return IsSigned; 5027 } 5028 5029 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 5030 SDValue C) const { 5031 // Check integral scalar types. 5032 if (VT.isScalarInteger()) { 5033 // Omit the optimization if the sub target has the M extension and the data 5034 // size exceeds XLen. 5035 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 5036 return false; 5037 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 5038 // Break the MUL to a SLLI and an ADD/SUB. 5039 const APInt &Imm = ConstNode->getAPIntValue(); 5040 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 5041 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 5042 return true; 5043 // Omit the following optimization if the sub target has the M extension 5044 // and the data size >= XLen. 5045 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 5046 return false; 5047 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 5048 // a pair of LUI/ADDI. 5049 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 5050 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 5051 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 5052 (1 - ImmS).isPowerOf2()) 5053 return true; 5054 } 5055 } 5056 } 5057 5058 return false; 5059 } 5060 5061 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 5062 if (!Subtarget.useRVVForFixedLengthVectors()) 5063 return false; 5064 5065 if (!VT.isFixedLengthVector()) 5066 return false; 5067 5068 // Don't use RVV for vectors we cannot scalarize if required. 5069 switch (VT.getVectorElementType().SimpleTy) { 5070 default: 5071 return false; 5072 case MVT::i1: 5073 case MVT::i8: 5074 case MVT::i16: 5075 case MVT::i32: 5076 case MVT::i64: 5077 break; 5078 case MVT::f16: 5079 if (!Subtarget.hasStdExtZfh()) 5080 return false; 5081 break; 5082 case MVT::f32: 5083 if (!Subtarget.hasStdExtF()) 5084 return false; 5085 break; 5086 case MVT::f64: 5087 if (!Subtarget.hasStdExtD()) 5088 return false; 5089 break; 5090 } 5091 5092 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 5093 // Don't use RVV for types that don't fit. 5094 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 5095 return false; 5096 5097 // TODO: Perhaps an artificial restriction, but worth having whilst getting 5098 // the base fixed length RVV support in place. 5099 if (!VT.isPow2VectorType()) 5100 return false; 5101 5102 return true; 5103 } 5104 5105 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 5106 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 5107 bool *Fast) const { 5108 if (!VT.isScalableVector()) 5109 return false; 5110 5111 EVT ElemVT = VT.getVectorElementType(); 5112 if (Alignment >= ElemVT.getStoreSize()) { 5113 if (Fast) 5114 *Fast = true; 5115 return true; 5116 } 5117 5118 return false; 5119 } 5120 5121 #define GET_REGISTER_MATCHER 5122 #include "RISCVGenAsmMatcher.inc" 5123 5124 Register 5125 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 5126 const MachineFunction &MF) const { 5127 Register Reg = MatchRegisterAltName(RegName); 5128 if (Reg == RISCV::NoRegister) 5129 Reg = MatchRegisterName(RegName); 5130 if (Reg == RISCV::NoRegister) 5131 report_fatal_error( 5132 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 5133 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 5134 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 5135 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 5136 StringRef(RegName) + "\".")); 5137 return Reg; 5138 } 5139 5140 namespace llvm { 5141 namespace RISCVVIntrinsicsTable { 5142 5143 #define GET_RISCVVIntrinsicsTable_IMPL 5144 #include "RISCVGenSearchableTables.inc" 5145 5146 } // namespace RISCVVIntrinsicsTable 5147 5148 namespace RISCVZvlssegTable { 5149 5150 #define GET_RISCVZvlssegTable_IMPL 5151 #include "RISCVGenSearchableTables.inc" 5152 5153 } // namespace RISCVZvlssegTable 5154 } // namespace llvm 5155