1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 144 if (Subtarget.useRVVForFixedLengthVectors()) { 145 auto addRegClassForFixedVectors = [this](MVT VT) { 146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 147 const TargetRegisterClass *RC; 148 if (LMul == 1) 149 RC = &RISCV::VRRegClass; 150 else if (LMul == 2) 151 RC = &RISCV::VRM2RegClass; 152 else if (LMul == 4) 153 RC = &RISCV::VRM4RegClass; 154 else if (LMul == 8) 155 RC = &RISCV::VRM8RegClass; 156 else 157 llvm_unreachable("Unexpected LMul!"); 158 159 addRegisterClass(VT, RC); 160 }; 161 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 165 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 166 if (useRVVForFixedLengthVectorVT(VT)) 167 addRegClassForFixedVectors(VT); 168 } 169 } 170 171 // Compute derived properties from the register classes. 172 computeRegisterProperties(STI.getRegisterInfo()); 173 174 setStackPointerRegisterToSaveRestore(RISCV::X2); 175 176 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 177 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 178 179 // TODO: add all necessary setOperationAction calls. 180 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 181 182 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 183 setOperationAction(ISD::BR_CC, XLenVT, Expand); 184 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 185 186 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 187 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 188 189 setOperationAction(ISD::VASTART, MVT::Other, Custom); 190 setOperationAction(ISD::VAARG, MVT::Other, Expand); 191 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 192 setOperationAction(ISD::VAEND, MVT::Other, Expand); 193 194 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 195 if (!Subtarget.hasStdExtZbb()) { 196 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 198 } 199 200 if (Subtarget.is64Bit()) { 201 setOperationAction(ISD::ADD, MVT::i32, Custom); 202 setOperationAction(ISD::SUB, MVT::i32, Custom); 203 setOperationAction(ISD::SHL, MVT::i32, Custom); 204 setOperationAction(ISD::SRA, MVT::i32, Custom); 205 setOperationAction(ISD::SRL, MVT::i32, Custom); 206 } 207 208 if (!Subtarget.hasStdExtM()) { 209 setOperationAction(ISD::MUL, XLenVT, Expand); 210 setOperationAction(ISD::MULHS, XLenVT, Expand); 211 setOperationAction(ISD::MULHU, XLenVT, Expand); 212 setOperationAction(ISD::SDIV, XLenVT, Expand); 213 setOperationAction(ISD::UDIV, XLenVT, Expand); 214 setOperationAction(ISD::SREM, XLenVT, Expand); 215 setOperationAction(ISD::UREM, XLenVT, Expand); 216 } 217 218 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 219 setOperationAction(ISD::MUL, MVT::i32, Custom); 220 221 setOperationAction(ISD::SDIV, MVT::i8, Custom); 222 setOperationAction(ISD::UDIV, MVT::i8, Custom); 223 setOperationAction(ISD::UREM, MVT::i8, Custom); 224 setOperationAction(ISD::SDIV, MVT::i16, Custom); 225 setOperationAction(ISD::UDIV, MVT::i16, Custom); 226 setOperationAction(ISD::UREM, MVT::i16, Custom); 227 setOperationAction(ISD::SDIV, MVT::i32, Custom); 228 setOperationAction(ISD::UDIV, MVT::i32, Custom); 229 setOperationAction(ISD::UREM, MVT::i32, Custom); 230 } 231 232 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 233 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 234 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 235 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 236 237 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 238 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 239 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 240 241 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 242 if (Subtarget.is64Bit()) { 243 setOperationAction(ISD::ROTL, MVT::i32, Custom); 244 setOperationAction(ISD::ROTR, MVT::i32, Custom); 245 } 246 } else { 247 setOperationAction(ISD::ROTL, XLenVT, Expand); 248 setOperationAction(ISD::ROTR, XLenVT, Expand); 249 } 250 251 if (Subtarget.hasStdExtZbp()) { 252 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 253 // more combining. 254 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 255 setOperationAction(ISD::BSWAP, XLenVT, Custom); 256 257 if (Subtarget.is64Bit()) { 258 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 259 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 260 } 261 } else { 262 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 263 // pattern match it directly in isel. 264 setOperationAction(ISD::BSWAP, XLenVT, 265 Subtarget.hasStdExtZbb() ? Legal : Expand); 266 } 267 268 if (Subtarget.hasStdExtZbb()) { 269 setOperationAction(ISD::SMIN, XLenVT, Legal); 270 setOperationAction(ISD::SMAX, XLenVT, Legal); 271 setOperationAction(ISD::UMIN, XLenVT, Legal); 272 setOperationAction(ISD::UMAX, XLenVT, Legal); 273 } else { 274 setOperationAction(ISD::CTTZ, XLenVT, Expand); 275 setOperationAction(ISD::CTLZ, XLenVT, Expand); 276 setOperationAction(ISD::CTPOP, XLenVT, Expand); 277 } 278 279 if (Subtarget.hasStdExtZbt()) { 280 setOperationAction(ISD::FSHL, XLenVT, Custom); 281 setOperationAction(ISD::FSHR, XLenVT, Custom); 282 setOperationAction(ISD::SELECT, XLenVT, Legal); 283 284 if (Subtarget.is64Bit()) { 285 setOperationAction(ISD::FSHL, MVT::i32, Custom); 286 setOperationAction(ISD::FSHR, MVT::i32, Custom); 287 } 288 } else { 289 setOperationAction(ISD::SELECT, XLenVT, Custom); 290 } 291 292 ISD::CondCode FPCCToExpand[] = { 293 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 294 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 295 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 296 297 ISD::NodeType FPOpToExpand[] = { 298 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 299 ISD::FP_TO_FP16}; 300 301 if (Subtarget.hasStdExtZfh()) 302 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 303 304 if (Subtarget.hasStdExtZfh()) { 305 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 306 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 307 for (auto CC : FPCCToExpand) 308 setCondCodeAction(CC, MVT::f16, Expand); 309 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 310 setOperationAction(ISD::SELECT, MVT::f16, Custom); 311 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 312 for (auto Op : FPOpToExpand) 313 setOperationAction(Op, MVT::f16, Expand); 314 } 315 316 if (Subtarget.hasStdExtF()) { 317 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 318 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 319 for (auto CC : FPCCToExpand) 320 setCondCodeAction(CC, MVT::f32, Expand); 321 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 322 setOperationAction(ISD::SELECT, MVT::f32, Custom); 323 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 324 for (auto Op : FPOpToExpand) 325 setOperationAction(Op, MVT::f32, Expand); 326 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 327 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 328 } 329 330 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 331 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 332 333 if (Subtarget.hasStdExtD()) { 334 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 335 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 336 for (auto CC : FPCCToExpand) 337 setCondCodeAction(CC, MVT::f64, Expand); 338 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 339 setOperationAction(ISD::SELECT, MVT::f64, Custom); 340 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 341 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 342 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 343 for (auto Op : FPOpToExpand) 344 setOperationAction(Op, MVT::f64, Expand); 345 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 346 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 347 } 348 349 if (Subtarget.is64Bit()) { 350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 351 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 352 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 353 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 354 } 355 356 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 357 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 358 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 359 setOperationAction(ISD::JumpTable, XLenVT, Custom); 360 361 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 362 363 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 364 // Unfortunately this can't be determined just from the ISA naming string. 365 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 366 Subtarget.is64Bit() ? Legal : Custom); 367 368 setOperationAction(ISD::TRAP, MVT::Other, Legal); 369 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 370 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 371 372 if (Subtarget.hasStdExtA()) { 373 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 374 setMinCmpXchgSizeInBits(32); 375 } else { 376 setMaxAtomicSizeInBitsSupported(0); 377 } 378 379 setBooleanContents(ZeroOrOneBooleanContent); 380 381 if (Subtarget.hasStdExtV()) { 382 setBooleanVectorContents(ZeroOrOneBooleanContent); 383 384 setOperationAction(ISD::VSCALE, XLenVT, Custom); 385 386 // RVV intrinsics may have illegal operands. 387 // We also need to custom legalize vmv.x.s. 388 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 389 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 390 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 391 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 392 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 393 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 394 395 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 396 397 if (Subtarget.is64Bit()) { 398 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 399 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 400 } else { 401 // We must custom-lower certain vXi64 operations on RV32 due to the vector 402 // element type being illegal. 403 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); 404 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 405 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 406 407 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 408 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 409 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 410 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 411 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 412 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 413 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 414 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 415 } 416 417 for (MVT VT : BoolVecVTs) { 418 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 419 420 // Mask VTs are custom-expanded into a series of standard nodes 421 setOperationAction(ISD::TRUNCATE, VT, Custom); 422 } 423 424 for (MVT VT : IntVecVTs) { 425 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 426 427 setOperationAction(ISD::SMIN, VT, Legal); 428 setOperationAction(ISD::SMAX, VT, Legal); 429 setOperationAction(ISD::UMIN, VT, Legal); 430 setOperationAction(ISD::UMAX, VT, Legal); 431 432 setOperationAction(ISD::ROTL, VT, Expand); 433 setOperationAction(ISD::ROTR, VT, Expand); 434 435 // Custom-lower extensions and truncations from/to mask types. 436 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 437 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 438 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 439 440 // RVV has native int->float & float->int conversions where the 441 // element type sizes are within one power-of-two of each other. Any 442 // wider distances between type sizes have to be lowered as sequences 443 // which progressively narrow the gap in stages. 444 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 445 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 446 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 447 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 448 449 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR" 450 // nodes which truncate by one power of two at a time. 451 setOperationAction(ISD::TRUNCATE, VT, Custom); 452 453 // Custom-lower insert/extract operations to simplify patterns. 454 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 455 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 456 457 // Custom-lower reduction operations to set up the corresponding custom 458 // nodes' operands. 459 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 460 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 461 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 462 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 463 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 464 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 465 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 466 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 467 } 468 469 // Expand various CCs to best match the RVV ISA, which natively supports UNE 470 // but no other unordered comparisons, and supports all ordered comparisons 471 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 472 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 473 // and we pattern-match those back to the "original", swapping operands once 474 // more. This way we catch both operations and both "vf" and "fv" forms with 475 // fewer patterns. 476 ISD::CondCode VFPCCToExpand[] = { 477 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 478 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 479 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 480 }; 481 482 // Sets common operation actions on RVV floating-point vector types. 483 const auto SetCommonVFPActions = [&](MVT VT) { 484 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 485 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 486 // sizes are within one power-of-two of each other. Therefore conversions 487 // between vXf16 and vXf64 must be lowered as sequences which convert via 488 // vXf32. 489 setOperationAction(ISD::FP_ROUND, VT, Custom); 490 setOperationAction(ISD::FP_EXTEND, VT, Custom); 491 // Custom-lower insert/extract operations to simplify patterns. 492 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 493 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 494 // Expand various condition codes (explained above). 495 for (auto CC : VFPCCToExpand) 496 setCondCodeAction(CC, VT, Expand); 497 498 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 499 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 500 }; 501 502 if (Subtarget.hasStdExtZfh()) 503 for (MVT VT : F16VecVTs) 504 SetCommonVFPActions(VT); 505 506 if (Subtarget.hasStdExtF()) 507 for (MVT VT : F32VecVTs) 508 SetCommonVFPActions(VT); 509 510 if (Subtarget.hasStdExtD()) 511 for (MVT VT : F64VecVTs) 512 SetCommonVFPActions(VT); 513 514 if (Subtarget.useRVVForFixedLengthVectors()) { 515 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 516 if (!useRVVForFixedLengthVectorVT(VT)) 517 continue; 518 519 // By default everything must be expanded. 520 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 521 setOperationAction(Op, VT, Expand); 522 523 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 524 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 525 526 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 527 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 528 529 setOperationAction(ISD::LOAD, VT, Custom); 530 setOperationAction(ISD::STORE, VT, Custom); 531 setOperationAction(ISD::ADD, VT, Custom); 532 setOperationAction(ISD::MUL, VT, Custom); 533 setOperationAction(ISD::SUB, VT, Custom); 534 setOperationAction(ISD::AND, VT, Custom); 535 setOperationAction(ISD::OR, VT, Custom); 536 setOperationAction(ISD::XOR, VT, Custom); 537 setOperationAction(ISD::SDIV, VT, Custom); 538 setOperationAction(ISD::SREM, VT, Custom); 539 setOperationAction(ISD::UDIV, VT, Custom); 540 setOperationAction(ISD::UREM, VT, Custom); 541 setOperationAction(ISD::SHL, VT, Custom); 542 setOperationAction(ISD::SRA, VT, Custom); 543 setOperationAction(ISD::SRL, VT, Custom); 544 } 545 546 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 547 if (!useRVVForFixedLengthVectorVT(VT)) 548 continue; 549 550 // By default everything must be expanded. 551 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 552 setOperationAction(Op, VT, Expand); 553 554 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 555 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 556 557 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 558 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 559 560 setOperationAction(ISD::LOAD, VT, Custom); 561 setOperationAction(ISD::STORE, VT, Custom); 562 setOperationAction(ISD::FADD, VT, Custom); 563 setOperationAction(ISD::FSUB, VT, Custom); 564 setOperationAction(ISD::FMUL, VT, Custom); 565 setOperationAction(ISD::FDIV, VT, Custom); 566 setOperationAction(ISD::FNEG, VT, Custom); 567 setOperationAction(ISD::FMA, VT, Custom); 568 } 569 } 570 } 571 572 // Function alignments. 573 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 574 setMinFunctionAlignment(FunctionAlignment); 575 setPrefFunctionAlignment(FunctionAlignment); 576 577 setMinimumJumpTableEntries(5); 578 579 // Jumps are expensive, compared to logic 580 setJumpIsExpensive(); 581 582 // We can use any register for comparisons 583 setHasMultipleConditionRegisters(); 584 585 setTargetDAGCombine(ISD::SETCC); 586 if (Subtarget.hasStdExtZbp()) { 587 setTargetDAGCombine(ISD::OR); 588 } 589 } 590 591 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 592 LLVMContext &Context, 593 EVT VT) const { 594 if (!VT.isVector()) 595 return getPointerTy(DL); 596 if (Subtarget.hasStdExtV() && VT.isScalableVector()) 597 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 598 return VT.changeVectorElementTypeToInteger(); 599 } 600 601 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 602 const CallInst &I, 603 MachineFunction &MF, 604 unsigned Intrinsic) const { 605 switch (Intrinsic) { 606 default: 607 return false; 608 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 609 case Intrinsic::riscv_masked_atomicrmw_add_i32: 610 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 611 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 612 case Intrinsic::riscv_masked_atomicrmw_max_i32: 613 case Intrinsic::riscv_masked_atomicrmw_min_i32: 614 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 615 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 616 case Intrinsic::riscv_masked_cmpxchg_i32: 617 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 618 Info.opc = ISD::INTRINSIC_W_CHAIN; 619 Info.memVT = MVT::getVT(PtrTy->getElementType()); 620 Info.ptrVal = I.getArgOperand(0); 621 Info.offset = 0; 622 Info.align = Align(4); 623 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 624 MachineMemOperand::MOVolatile; 625 return true; 626 } 627 } 628 629 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 630 const AddrMode &AM, Type *Ty, 631 unsigned AS, 632 Instruction *I) const { 633 // No global is ever allowed as a base. 634 if (AM.BaseGV) 635 return false; 636 637 // Require a 12-bit signed offset. 638 if (!isInt<12>(AM.BaseOffs)) 639 return false; 640 641 switch (AM.Scale) { 642 case 0: // "r+i" or just "i", depending on HasBaseReg. 643 break; 644 case 1: 645 if (!AM.HasBaseReg) // allow "r+i". 646 break; 647 return false; // disallow "r+r" or "r+r+i". 648 default: 649 return false; 650 } 651 652 return true; 653 } 654 655 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 656 return isInt<12>(Imm); 657 } 658 659 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 660 return isInt<12>(Imm); 661 } 662 663 // On RV32, 64-bit integers are split into their high and low parts and held 664 // in two different registers, so the trunc is free since the low register can 665 // just be used. 666 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 667 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 668 return false; 669 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 670 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 671 return (SrcBits == 64 && DestBits == 32); 672 } 673 674 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 675 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 676 !SrcVT.isInteger() || !DstVT.isInteger()) 677 return false; 678 unsigned SrcBits = SrcVT.getSizeInBits(); 679 unsigned DestBits = DstVT.getSizeInBits(); 680 return (SrcBits == 64 && DestBits == 32); 681 } 682 683 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 684 // Zexts are free if they can be combined with a load. 685 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 686 EVT MemVT = LD->getMemoryVT(); 687 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 688 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 689 (LD->getExtensionType() == ISD::NON_EXTLOAD || 690 LD->getExtensionType() == ISD::ZEXTLOAD)) 691 return true; 692 } 693 694 return TargetLowering::isZExtFree(Val, VT2); 695 } 696 697 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 698 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 699 } 700 701 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 702 return Subtarget.hasStdExtZbb(); 703 } 704 705 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 706 return Subtarget.hasStdExtZbb(); 707 } 708 709 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 710 bool ForCodeSize) const { 711 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 712 return false; 713 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 714 return false; 715 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 716 return false; 717 if (Imm.isNegZero()) 718 return false; 719 return Imm.isZero(); 720 } 721 722 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 723 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 724 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 725 (VT == MVT::f64 && Subtarget.hasStdExtD()); 726 } 727 728 // Changes the condition code and swaps operands if necessary, so the SetCC 729 // operation matches one of the comparisons supported directly in the RISC-V 730 // ISA. 731 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 732 switch (CC) { 733 default: 734 break; 735 case ISD::SETGT: 736 case ISD::SETLE: 737 case ISD::SETUGT: 738 case ISD::SETULE: 739 CC = ISD::getSetCCSwappedOperands(CC); 740 std::swap(LHS, RHS); 741 break; 742 } 743 } 744 745 // Return the RISC-V branch opcode that matches the given DAG integer 746 // condition code. The CondCode must be one of those supported by the RISC-V 747 // ISA (see normaliseSetCC). 748 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 749 switch (CC) { 750 default: 751 llvm_unreachable("Unsupported CondCode"); 752 case ISD::SETEQ: 753 return RISCV::BEQ; 754 case ISD::SETNE: 755 return RISCV::BNE; 756 case ISD::SETLT: 757 return RISCV::BLT; 758 case ISD::SETGE: 759 return RISCV::BGE; 760 case ISD::SETULT: 761 return RISCV::BLTU; 762 case ISD::SETUGE: 763 return RISCV::BGEU; 764 } 765 } 766 767 // Return the largest legal scalable vector type that matches VT's element type. 768 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, 769 const RISCVSubtarget &Subtarget) { 770 assert(VT.isFixedLengthVector() && 771 DAG.getTargetLoweringInfo().isTypeLegal(VT) && 772 "Expected legal fixed length vector!"); 773 774 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 775 assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); 776 777 switch (VT.getVectorElementType().SimpleTy) { 778 default: 779 llvm_unreachable("unexpected element type for RVV container"); 780 case MVT::i8: 781 return MVT::getScalableVectorVT(MVT::i8, LMul * 8); 782 case MVT::i16: 783 return MVT::getScalableVectorVT(MVT::i16, LMul * 4); 784 case MVT::i32: 785 return MVT::getScalableVectorVT(MVT::i32, LMul * 2); 786 case MVT::i64: 787 return MVT::getScalableVectorVT(MVT::i64, LMul); 788 case MVT::f16: 789 return MVT::getScalableVectorVT(MVT::f16, LMul * 4); 790 case MVT::f32: 791 return MVT::getScalableVectorVT(MVT::f32, LMul * 2); 792 case MVT::f64: 793 return MVT::getScalableVectorVT(MVT::f64, LMul); 794 } 795 } 796 797 // Grow V to consume an entire RVV register. 798 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 799 const RISCVSubtarget &Subtarget) { 800 assert(VT.isScalableVector() && 801 "Expected to convert into a scalable vector!"); 802 assert(V.getValueType().isFixedLengthVector() && 803 "Expected a fixed length vector operand!"); 804 SDLoc DL(V); 805 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 806 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 807 } 808 809 // Shrink V so it's just big enough to maintain a VT's worth of data. 810 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 811 const RISCVSubtarget &Subtarget) { 812 assert(VT.isFixedLengthVector() && 813 "Expected to convert into a fixed length vector!"); 814 assert(V.getValueType().isScalableVector() && 815 "Expected a scalable vector operand!"); 816 SDLoc DL(V); 817 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 818 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 819 } 820 821 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 822 const RISCVSubtarget &Subtarget) { 823 MVT VT = Op.getSimpleValueType(); 824 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 825 826 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 827 828 SDLoc DL(Op); 829 SDValue VL = 830 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 831 832 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 833 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 834 : RISCVISD::VMV_V_X_VL; 835 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 836 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 837 } 838 839 // Try and match an index sequence, which we can lower directly to the vid 840 // instruction. An all-undef vector is matched by getSplatValue, above. 841 bool IsVID = true; 842 if (VT.isInteger()) 843 for (unsigned i = 0, e = Op.getNumOperands(); i < e && IsVID; i++) 844 IsVID &= Op.getOperand(i).isUndef() || 845 (isa<ConstantSDNode>(Op.getOperand(i)) && 846 Op.getConstantOperandVal(i) == i); 847 848 if (IsVID) { 849 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 850 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 851 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 852 return convertFromScalableVector(VT, VID, DAG, Subtarget); 853 } 854 855 return SDValue(); 856 } 857 858 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 859 const RISCVSubtarget &Subtarget) { 860 SDValue V1 = Op.getOperand(0); 861 SDLoc DL(Op); 862 MVT VT = Op.getSimpleValueType(); 863 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 864 865 if (SVN->isSplat()) { 866 int Lane = SVN->getSplatIndex(); 867 if (Lane >= 0) { 868 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 869 870 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 871 assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!"); 872 873 MVT XLenVT = Subtarget.getXLenVT(); 874 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 875 MVT MaskVT = 876 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 877 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 878 SDValue Gather = 879 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 880 DAG.getConstant(Lane, DL, XLenVT), Mask, VL); 881 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 882 } 883 } 884 885 return SDValue(); 886 } 887 888 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 889 SelectionDAG &DAG) const { 890 switch (Op.getOpcode()) { 891 default: 892 report_fatal_error("unimplemented operand"); 893 case ISD::GlobalAddress: 894 return lowerGlobalAddress(Op, DAG); 895 case ISD::BlockAddress: 896 return lowerBlockAddress(Op, DAG); 897 case ISD::ConstantPool: 898 return lowerConstantPool(Op, DAG); 899 case ISD::JumpTable: 900 return lowerJumpTable(Op, DAG); 901 case ISD::GlobalTLSAddress: 902 return lowerGlobalTLSAddress(Op, DAG); 903 case ISD::SELECT: 904 return lowerSELECT(Op, DAG); 905 case ISD::VASTART: 906 return lowerVASTART(Op, DAG); 907 case ISD::FRAMEADDR: 908 return lowerFRAMEADDR(Op, DAG); 909 case ISD::RETURNADDR: 910 return lowerRETURNADDR(Op, DAG); 911 case ISD::SHL_PARTS: 912 return lowerShiftLeftParts(Op, DAG); 913 case ISD::SRA_PARTS: 914 return lowerShiftRightParts(Op, DAG, true); 915 case ISD::SRL_PARTS: 916 return lowerShiftRightParts(Op, DAG, false); 917 case ISD::BITCAST: { 918 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 919 Subtarget.hasStdExtZfh()) && 920 "Unexpected custom legalisation"); 921 SDLoc DL(Op); 922 SDValue Op0 = Op.getOperand(0); 923 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 924 if (Op0.getValueType() != MVT::i16) 925 return SDValue(); 926 SDValue NewOp0 = 927 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 928 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 929 return FPConv; 930 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 931 Subtarget.hasStdExtF()) { 932 if (Op0.getValueType() != MVT::i32) 933 return SDValue(); 934 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 935 SDValue FPConv = 936 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 937 return FPConv; 938 } 939 return SDValue(); 940 } 941 case ISD::INTRINSIC_WO_CHAIN: 942 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 943 case ISD::INTRINSIC_W_CHAIN: 944 return LowerINTRINSIC_W_CHAIN(Op, DAG); 945 case ISD::BSWAP: 946 case ISD::BITREVERSE: { 947 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 948 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 949 MVT VT = Op.getSimpleValueType(); 950 SDLoc DL(Op); 951 // Start with the maximum immediate value which is the bitwidth - 1. 952 unsigned Imm = VT.getSizeInBits() - 1; 953 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 954 if (Op.getOpcode() == ISD::BSWAP) 955 Imm &= ~0x7U; 956 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 957 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 958 } 959 case ISD::FSHL: 960 case ISD::FSHR: { 961 MVT VT = Op.getSimpleValueType(); 962 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 963 SDLoc DL(Op); 964 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 965 // use log(XLen) bits. Mask the shift amount accordingly. 966 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 967 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 968 DAG.getConstant(ShAmtWidth, DL, VT)); 969 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 970 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 971 } 972 case ISD::TRUNCATE: { 973 SDLoc DL(Op); 974 EVT VT = Op.getValueType(); 975 // Only custom-lower vector truncates 976 if (!VT.isVector()) 977 return Op; 978 979 // Truncates to mask types are handled differently 980 if (VT.getVectorElementType() == MVT::i1) 981 return lowerVectorMaskTrunc(Op, DAG); 982 983 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 984 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which 985 // truncate by one power of two at a time. 986 EVT DstEltVT = VT.getVectorElementType(); 987 988 SDValue Src = Op.getOperand(0); 989 EVT SrcVT = Src.getValueType(); 990 EVT SrcEltVT = SrcVT.getVectorElementType(); 991 992 assert(DstEltVT.bitsLT(SrcEltVT) && 993 isPowerOf2_64(DstEltVT.getSizeInBits()) && 994 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 995 "Unexpected vector truncate lowering"); 996 997 SDValue Result = Src; 998 LLVMContext &Context = *DAG.getContext(); 999 const ElementCount Count = SrcVT.getVectorElementCount(); 1000 do { 1001 SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2); 1002 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 1003 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result); 1004 } while (SrcEltVT != DstEltVT); 1005 1006 return Result; 1007 } 1008 case ISD::ANY_EXTEND: 1009 case ISD::ZERO_EXTEND: 1010 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 1011 case ISD::SIGN_EXTEND: 1012 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 1013 case ISD::SPLAT_VECTOR: 1014 return lowerSPLATVECTOR(Op, DAG); 1015 case ISD::INSERT_VECTOR_ELT: 1016 return lowerINSERT_VECTOR_ELT(Op, DAG); 1017 case ISD::EXTRACT_VECTOR_ELT: 1018 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 1019 case ISD::VSCALE: { 1020 MVT VT = Op.getSimpleValueType(); 1021 SDLoc DL(Op); 1022 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 1023 // We define our scalable vector types for lmul=1 to use a 64 bit known 1024 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 1025 // vscale as VLENB / 8. 1026 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 1027 DAG.getConstant(3, DL, VT)); 1028 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 1029 } 1030 case ISD::FP_EXTEND: { 1031 // RVV can only do fp_extend to types double the size as the source. We 1032 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 1033 // via f32. 1034 MVT VT = Op.getSimpleValueType(); 1035 MVT SrcVT = Op.getOperand(0).getSimpleValueType(); 1036 // We only need to close the gap between vXf16->vXf64. 1037 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 1038 SrcVT.getVectorElementType() != MVT::f16) 1039 return Op; 1040 SDLoc DL(Op); 1041 MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1042 SDValue IntermediateRound = 1043 DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT); 1044 return DAG.getFPExtendOrRound(IntermediateRound, DL, VT); 1045 } 1046 case ISD::FP_ROUND: { 1047 // RVV can only do fp_round to types half the size as the source. We 1048 // custom-lower f64->f16 rounds via RVV's round-to-odd float 1049 // conversion instruction. 1050 MVT VT = Op.getSimpleValueType(); 1051 MVT SrcVT = Op.getOperand(0).getSimpleValueType(); 1052 // We only need to close the gap between vXf64<->vXf16. 1053 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 1054 SrcVT.getVectorElementType() != MVT::f64) 1055 return Op; 1056 SDLoc DL(Op); 1057 MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1058 SDValue IntermediateRound = 1059 DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0)); 1060 return DAG.getFPExtendOrRound(IntermediateRound, DL, VT); 1061 } 1062 case ISD::FP_TO_SINT: 1063 case ISD::FP_TO_UINT: 1064 case ISD::SINT_TO_FP: 1065 case ISD::UINT_TO_FP: { 1066 // RVV can only do fp<->int conversions to types half/double the size as 1067 // the source. We custom-lower any conversions that do two hops into 1068 // sequences. 1069 MVT VT = Op.getSimpleValueType(); 1070 if (!VT.isVector()) 1071 return Op; 1072 SDLoc DL(Op); 1073 SDValue Src = Op.getOperand(0); 1074 MVT EltVT = VT.getVectorElementType(); 1075 MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType(); 1076 unsigned EltSize = EltVT.getSizeInBits(); 1077 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 1078 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 1079 "Unexpected vector element types"); 1080 bool IsInt2FP = SrcEltVT.isInteger(); 1081 // Widening conversions 1082 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 1083 if (IsInt2FP) { 1084 // Do a regular integer sign/zero extension then convert to float. 1085 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 1086 VT.getVectorElementCount()); 1087 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 1088 ? ISD::ZERO_EXTEND 1089 : ISD::SIGN_EXTEND; 1090 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 1091 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 1092 } 1093 // FP2Int 1094 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 1095 // Do one doubling fp_extend then complete the operation by converting 1096 // to int. 1097 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1098 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 1099 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 1100 } 1101 1102 // Narrowing conversions 1103 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 1104 if (IsInt2FP) { 1105 // One narrowing int_to_fp, then an fp_round. 1106 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 1107 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1108 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 1109 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 1110 } 1111 // FP2Int 1112 // One narrowing fp_to_int, then truncate the integer. If the float isn't 1113 // representable by the integer, the result is poison. 1114 MVT IVecVT = 1115 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 1116 VT.getVectorElementCount()); 1117 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 1118 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 1119 } 1120 1121 return Op; 1122 } 1123 case ISD::VECREDUCE_ADD: 1124 case ISD::VECREDUCE_UMAX: 1125 case ISD::VECREDUCE_SMAX: 1126 case ISD::VECREDUCE_UMIN: 1127 case ISD::VECREDUCE_SMIN: 1128 case ISD::VECREDUCE_AND: 1129 case ISD::VECREDUCE_OR: 1130 case ISD::VECREDUCE_XOR: 1131 return lowerVECREDUCE(Op, DAG); 1132 case ISD::VECREDUCE_FADD: 1133 case ISD::VECREDUCE_SEQ_FADD: 1134 return lowerFPVECREDUCE(Op, DAG); 1135 case ISD::BUILD_VECTOR: 1136 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 1137 case ISD::VECTOR_SHUFFLE: 1138 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 1139 case ISD::LOAD: 1140 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 1141 case ISD::STORE: 1142 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 1143 case ISD::ADD: 1144 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 1145 case ISD::SUB: 1146 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 1147 case ISD::MUL: 1148 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 1149 case ISD::AND: 1150 return lowerToScalableOp(Op, DAG, RISCVISD::AND_VL); 1151 case ISD::OR: 1152 return lowerToScalableOp(Op, DAG, RISCVISD::OR_VL); 1153 case ISD::XOR: 1154 return lowerToScalableOp(Op, DAG, RISCVISD::XOR_VL); 1155 case ISD::SDIV: 1156 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 1157 case ISD::SREM: 1158 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 1159 case ISD::UDIV: 1160 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 1161 case ISD::UREM: 1162 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 1163 case ISD::SHL: 1164 return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); 1165 case ISD::SRA: 1166 return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); 1167 case ISD::SRL: 1168 return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); 1169 case ISD::FADD: 1170 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 1171 case ISD::FSUB: 1172 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 1173 case ISD::FMUL: 1174 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 1175 case ISD::FDIV: 1176 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 1177 case ISD::FNEG: 1178 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 1179 case ISD::FMA: 1180 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 1181 } 1182 } 1183 1184 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1185 SelectionDAG &DAG, unsigned Flags) { 1186 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1187 } 1188 1189 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1190 SelectionDAG &DAG, unsigned Flags) { 1191 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1192 Flags); 1193 } 1194 1195 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1196 SelectionDAG &DAG, unsigned Flags) { 1197 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1198 N->getOffset(), Flags); 1199 } 1200 1201 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1202 SelectionDAG &DAG, unsigned Flags) { 1203 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1204 } 1205 1206 template <class NodeTy> 1207 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1208 bool IsLocal) const { 1209 SDLoc DL(N); 1210 EVT Ty = getPointerTy(DAG.getDataLayout()); 1211 1212 if (isPositionIndependent()) { 1213 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1214 if (IsLocal) 1215 // Use PC-relative addressing to access the symbol. This generates the 1216 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 1217 // %pcrel_lo(auipc)). 1218 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1219 1220 // Use PC-relative addressing to access the GOT for this symbol, then load 1221 // the address from the GOT. This generates the pattern (PseudoLA sym), 1222 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 1223 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 1224 } 1225 1226 switch (getTargetMachine().getCodeModel()) { 1227 default: 1228 report_fatal_error("Unsupported code model for lowering"); 1229 case CodeModel::Small: { 1230 // Generate a sequence for accessing addresses within the first 2 GiB of 1231 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 1232 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 1233 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 1234 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1235 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 1236 } 1237 case CodeModel::Medium: { 1238 // Generate a sequence for accessing addresses within any 2GiB range within 1239 // the address space. This generates the pattern (PseudoLLA sym), which 1240 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 1241 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1242 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 1243 } 1244 } 1245 } 1246 1247 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 1248 SelectionDAG &DAG) const { 1249 SDLoc DL(Op); 1250 EVT Ty = Op.getValueType(); 1251 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1252 int64_t Offset = N->getOffset(); 1253 MVT XLenVT = Subtarget.getXLenVT(); 1254 1255 const GlobalValue *GV = N->getGlobal(); 1256 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 1257 SDValue Addr = getAddr(N, DAG, IsLocal); 1258 1259 // In order to maximise the opportunity for common subexpression elimination, 1260 // emit a separate ADD node for the global address offset instead of folding 1261 // it in the global address node. Later peephole optimisations may choose to 1262 // fold it back in when profitable. 1263 if (Offset != 0) 1264 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1265 DAG.getConstant(Offset, DL, XLenVT)); 1266 return Addr; 1267 } 1268 1269 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 1270 SelectionDAG &DAG) const { 1271 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 1272 1273 return getAddr(N, DAG); 1274 } 1275 1276 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 1277 SelectionDAG &DAG) const { 1278 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 1279 1280 return getAddr(N, DAG); 1281 } 1282 1283 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 1284 SelectionDAG &DAG) const { 1285 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 1286 1287 return getAddr(N, DAG); 1288 } 1289 1290 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1291 SelectionDAG &DAG, 1292 bool UseGOT) const { 1293 SDLoc DL(N); 1294 EVT Ty = getPointerTy(DAG.getDataLayout()); 1295 const GlobalValue *GV = N->getGlobal(); 1296 MVT XLenVT = Subtarget.getXLenVT(); 1297 1298 if (UseGOT) { 1299 // Use PC-relative addressing to access the GOT for this TLS symbol, then 1300 // load the address from the GOT and add the thread pointer. This generates 1301 // the pattern (PseudoLA_TLS_IE sym), which expands to 1302 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 1303 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1304 SDValue Load = 1305 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 1306 1307 // Add the thread pointer. 1308 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1309 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 1310 } 1311 1312 // Generate a sequence for accessing the address relative to the thread 1313 // pointer, with the appropriate adjustment for the thread pointer offset. 1314 // This generates the pattern 1315 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 1316 SDValue AddrHi = 1317 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 1318 SDValue AddrAdd = 1319 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 1320 SDValue AddrLo = 1321 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 1322 1323 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 1324 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 1325 SDValue MNAdd = SDValue( 1326 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 1327 0); 1328 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 1329 } 1330 1331 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1332 SelectionDAG &DAG) const { 1333 SDLoc DL(N); 1334 EVT Ty = getPointerTy(DAG.getDataLayout()); 1335 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1336 const GlobalValue *GV = N->getGlobal(); 1337 1338 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1339 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 1340 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 1341 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1342 SDValue Load = 1343 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 1344 1345 // Prepare argument list to generate call. 1346 ArgListTy Args; 1347 ArgListEntry Entry; 1348 Entry.Node = Load; 1349 Entry.Ty = CallTy; 1350 Args.push_back(Entry); 1351 1352 // Setup call to __tls_get_addr. 1353 TargetLowering::CallLoweringInfo CLI(DAG); 1354 CLI.setDebugLoc(DL) 1355 .setChain(DAG.getEntryNode()) 1356 .setLibCallee(CallingConv::C, CallTy, 1357 DAG.getExternalSymbol("__tls_get_addr", Ty), 1358 std::move(Args)); 1359 1360 return LowerCallTo(CLI).first; 1361 } 1362 1363 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1364 SelectionDAG &DAG) const { 1365 SDLoc DL(Op); 1366 EVT Ty = Op.getValueType(); 1367 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1368 int64_t Offset = N->getOffset(); 1369 MVT XLenVT = Subtarget.getXLenVT(); 1370 1371 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 1372 1373 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1374 CallingConv::GHC) 1375 report_fatal_error("In GHC calling convention TLS is not supported"); 1376 1377 SDValue Addr; 1378 switch (Model) { 1379 case TLSModel::LocalExec: 1380 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 1381 break; 1382 case TLSModel::InitialExec: 1383 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 1384 break; 1385 case TLSModel::LocalDynamic: 1386 case TLSModel::GeneralDynamic: 1387 Addr = getDynamicTLSAddr(N, DAG); 1388 break; 1389 } 1390 1391 // In order to maximise the opportunity for common subexpression elimination, 1392 // emit a separate ADD node for the global address offset instead of folding 1393 // it in the global address node. Later peephole optimisations may choose to 1394 // fold it back in when profitable. 1395 if (Offset != 0) 1396 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 1397 DAG.getConstant(Offset, DL, XLenVT)); 1398 return Addr; 1399 } 1400 1401 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 1402 SDValue CondV = Op.getOperand(0); 1403 SDValue TrueV = Op.getOperand(1); 1404 SDValue FalseV = Op.getOperand(2); 1405 SDLoc DL(Op); 1406 MVT XLenVT = Subtarget.getXLenVT(); 1407 1408 // If the result type is XLenVT and CondV is the output of a SETCC node 1409 // which also operated on XLenVT inputs, then merge the SETCC node into the 1410 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1411 // compare+branch instructions. i.e.: 1412 // (select (setcc lhs, rhs, cc), truev, falsev) 1413 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1414 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1415 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1416 SDValue LHS = CondV.getOperand(0); 1417 SDValue RHS = CondV.getOperand(1); 1418 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1419 ISD::CondCode CCVal = CC->get(); 1420 1421 normaliseSetCC(LHS, RHS, CCVal); 1422 1423 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1424 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1425 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1426 } 1427 1428 // Otherwise: 1429 // (select condv, truev, falsev) 1430 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1431 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1432 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1433 1434 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1435 1436 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1437 } 1438 1439 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 1440 MachineFunction &MF = DAG.getMachineFunction(); 1441 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 1442 1443 SDLoc DL(Op); 1444 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1445 getPointerTy(MF.getDataLayout())); 1446 1447 // vastart just stores the address of the VarArgsFrameIndex slot into the 1448 // memory location argument. 1449 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1450 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1451 MachinePointerInfo(SV)); 1452 } 1453 1454 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 1455 SelectionDAG &DAG) const { 1456 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1457 MachineFunction &MF = DAG.getMachineFunction(); 1458 MachineFrameInfo &MFI = MF.getFrameInfo(); 1459 MFI.setFrameAddressIsTaken(true); 1460 Register FrameReg = RI.getFrameRegister(MF); 1461 int XLenInBytes = Subtarget.getXLen() / 8; 1462 1463 EVT VT = Op.getValueType(); 1464 SDLoc DL(Op); 1465 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1466 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1467 while (Depth--) { 1468 int Offset = -(XLenInBytes * 2); 1469 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1470 DAG.getIntPtrConstant(Offset, DL)); 1471 FrameAddr = 1472 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1473 } 1474 return FrameAddr; 1475 } 1476 1477 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 1478 SelectionDAG &DAG) const { 1479 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1480 MachineFunction &MF = DAG.getMachineFunction(); 1481 MachineFrameInfo &MFI = MF.getFrameInfo(); 1482 MFI.setReturnAddressIsTaken(true); 1483 MVT XLenVT = Subtarget.getXLenVT(); 1484 int XLenInBytes = Subtarget.getXLen() / 8; 1485 1486 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1487 return SDValue(); 1488 1489 EVT VT = Op.getValueType(); 1490 SDLoc DL(Op); 1491 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1492 if (Depth) { 1493 int Off = -XLenInBytes; 1494 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 1495 SDValue Offset = DAG.getConstant(Off, DL, VT); 1496 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 1497 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 1498 MachinePointerInfo()); 1499 } 1500 1501 // Return the value of the return address register, marking it an implicit 1502 // live-in. 1503 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 1504 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 1505 } 1506 1507 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 1508 SelectionDAG &DAG) const { 1509 SDLoc DL(Op); 1510 SDValue Lo = Op.getOperand(0); 1511 SDValue Hi = Op.getOperand(1); 1512 SDValue Shamt = Op.getOperand(2); 1513 EVT VT = Lo.getValueType(); 1514 1515 // if Shamt-XLEN < 0: // Shamt < XLEN 1516 // Lo = Lo << Shamt 1517 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 1518 // else: 1519 // Lo = 0 1520 // Hi = Lo << (Shamt-XLEN) 1521 1522 SDValue Zero = DAG.getConstant(0, DL, VT); 1523 SDValue One = DAG.getConstant(1, DL, VT); 1524 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1525 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1526 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1527 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1528 1529 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1530 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1531 SDValue ShiftRightLo = 1532 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 1533 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1534 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1535 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 1536 1537 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1538 1539 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1540 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1541 1542 SDValue Parts[2] = {Lo, Hi}; 1543 return DAG.getMergeValues(Parts, DL); 1544 } 1545 1546 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1547 bool IsSRA) const { 1548 SDLoc DL(Op); 1549 SDValue Lo = Op.getOperand(0); 1550 SDValue Hi = Op.getOperand(1); 1551 SDValue Shamt = Op.getOperand(2); 1552 EVT VT = Lo.getValueType(); 1553 1554 // SRA expansion: 1555 // if Shamt-XLEN < 0: // Shamt < XLEN 1556 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1557 // Hi = Hi >>s Shamt 1558 // else: 1559 // Lo = Hi >>s (Shamt-XLEN); 1560 // Hi = Hi >>s (XLEN-1) 1561 // 1562 // SRL expansion: 1563 // if Shamt-XLEN < 0: // Shamt < XLEN 1564 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1565 // Hi = Hi >>u Shamt 1566 // else: 1567 // Lo = Hi >>u (Shamt-XLEN); 1568 // Hi = 0; 1569 1570 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1571 1572 SDValue Zero = DAG.getConstant(0, DL, VT); 1573 SDValue One = DAG.getConstant(1, DL, VT); 1574 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1575 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1576 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1577 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1578 1579 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1580 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1581 SDValue ShiftLeftHi = 1582 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 1583 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1584 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1585 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 1586 SDValue HiFalse = 1587 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 1588 1589 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1590 1591 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 1592 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1593 1594 SDValue Parts[2] = {Lo, Hi}; 1595 return DAG.getMergeValues(Parts, DL); 1596 } 1597 1598 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is 1599 // illegal (currently only vXi64 RV32). 1600 // FIXME: We could also catch non-constant sign-extended i32 values and lower 1601 // them to SPLAT_VECTOR_I64 1602 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, 1603 SelectionDAG &DAG) const { 1604 SDLoc DL(Op); 1605 EVT VecVT = Op.getValueType(); 1606 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 1607 "Unexpected SPLAT_VECTOR lowering"); 1608 SDValue SplatVal = Op.getOperand(0); 1609 1610 // If we can prove that the value is a sign-extended 32-bit value, lower this 1611 // as a custom node in order to try and match RVV vector/scalar instructions. 1612 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { 1613 if (isInt<32>(CVal->getSExtValue())) 1614 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1615 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); 1616 } 1617 1618 if (SplatVal.getOpcode() == ISD::SIGN_EXTEND && 1619 SplatVal.getOperand(0).getValueType() == MVT::i32) { 1620 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1621 SplatVal.getOperand(0)); 1622 } 1623 1624 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 1625 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 1626 // vmv.v.x vX, hi 1627 // vsll.vx vX, vX, /*32*/ 1628 // vmv.v.x vY, lo 1629 // vsll.vx vY, vY, /*32*/ 1630 // vsrl.vx vY, vY, /*32*/ 1631 // vor.vv vX, vX, vY 1632 SDValue One = DAG.getConstant(1, DL, MVT::i32); 1633 SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 1634 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 1635 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); 1636 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); 1637 1638 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 1639 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 1640 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 1641 1642 if (isNullConstant(Hi)) 1643 return Lo; 1644 1645 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 1646 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 1647 1648 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 1649 } 1650 1651 // Custom-lower extensions from mask vectors by using a vselect either with 1 1652 // for zero/any-extension or -1 for sign-extension: 1653 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 1654 // Note that any-extension is lowered identically to zero-extension. 1655 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 1656 int64_t ExtTrueVal) const { 1657 SDLoc DL(Op); 1658 EVT VecVT = Op.getValueType(); 1659 SDValue Src = Op.getOperand(0); 1660 // Only custom-lower extensions from mask types 1661 if (!Src.getValueType().isVector() || 1662 Src.getValueType().getVectorElementType() != MVT::i1) 1663 return Op; 1664 1665 // Be careful not to introduce illegal scalar types at this stage, and be 1666 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1667 // illegal and must be expanded. Since we know that the constants are 1668 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1669 bool IsRV32E64 = 1670 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1671 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1672 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT()); 1673 1674 if (!IsRV32E64) { 1675 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1676 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 1677 } else { 1678 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1679 SplatTrueVal = 1680 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 1681 } 1682 1683 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 1684 } 1685 1686 // Custom-lower truncations from vectors to mask vectors by using a mask and a 1687 // setcc operation: 1688 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 1689 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 1690 SelectionDAG &DAG) const { 1691 SDLoc DL(Op); 1692 EVT MaskVT = Op.getValueType(); 1693 // Only expect to custom-lower truncations to mask types 1694 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 1695 "Unexpected type for vector mask lowering"); 1696 SDValue Src = Op.getOperand(0); 1697 EVT VecVT = Src.getValueType(); 1698 1699 // Be careful not to introduce illegal scalar types at this stage, and be 1700 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1701 // illegal and must be expanded. Since we know that the constants are 1702 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1703 bool IsRV32E64 = 1704 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1705 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 1706 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1707 1708 if (!IsRV32E64) { 1709 SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne); 1710 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1711 } else { 1712 SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne); 1713 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1714 } 1715 1716 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 1717 1718 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 1719 } 1720 1721 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 1722 SelectionDAG &DAG) const { 1723 SDLoc DL(Op); 1724 EVT VecVT = Op.getValueType(); 1725 SDValue Vec = Op.getOperand(0); 1726 SDValue Val = Op.getOperand(1); 1727 SDValue Idx = Op.getOperand(2); 1728 1729 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is 1730 // first slid down into position, the value is inserted into the first 1731 // position, and the vector is slid back up. We do this to simplify patterns. 1732 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), 1733 if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) { 1734 if (isNullConstant(Idx)) 1735 return Op; 1736 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1737 DAG.getUNDEF(VecVT), Vec, Idx); 1738 SDValue InsertElt0 = 1739 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val, 1740 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1741 1742 return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx); 1743 } 1744 1745 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type 1746 // is illegal (currently only vXi64 RV32). 1747 // Since there is no easy way of getting a single element into a vector when 1748 // XLEN<SEW, we lower the operation to the following sequence: 1749 // splat vVal, rVal 1750 // vid.v vVid 1751 // vmseq.vx mMask, vVid, rIdx 1752 // vmerge.vvm vDest, vSrc, vVal, mMask 1753 // This essentially merges the original vector with the inserted element by 1754 // using a mask whose only set bit is that corresponding to the insert 1755 // index. 1756 SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val); 1757 SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx); 1758 1759 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); 1760 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 1761 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1762 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VecVT, Mask, VL); 1763 auto SetCCVT = 1764 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT); 1765 SDValue SelectCond = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ); 1766 1767 return DAG.getNode(ISD::VSELECT, DL, VecVT, SelectCond, SplattedVal, Vec); 1768 } 1769 1770 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 1771 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 1772 // types this is done using VMV_X_S to allow us to glean information about the 1773 // sign bits of the result. 1774 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 1775 SelectionDAG &DAG) const { 1776 SDLoc DL(Op); 1777 SDValue Idx = Op.getOperand(1); 1778 SDValue Vec = Op.getOperand(0); 1779 EVT EltVT = Op.getValueType(); 1780 EVT VecVT = Vec.getValueType(); 1781 MVT XLenVT = Subtarget.getXLenVT(); 1782 1783 // If the index is 0, the vector is already in the right position. 1784 if (!isNullConstant(Idx)) { 1785 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, DAG.getUNDEF(VecVT), Vec, 1786 Idx); 1787 } 1788 1789 if (!EltVT.isInteger()) { 1790 // Floating-point extracts are handled in TableGen. 1791 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 1792 DAG.getConstant(0, DL, XLenVT)); 1793 } 1794 1795 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 1796 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 1797 } 1798 1799 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 1800 SelectionDAG &DAG) const { 1801 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1802 SDLoc DL(Op); 1803 1804 if (Subtarget.hasStdExtV()) { 1805 // Some RVV intrinsics may claim that they want an integer operand to be 1806 // extended. 1807 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1808 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1809 if (II->ExtendedOperand) { 1810 assert(II->ExtendedOperand < Op.getNumOperands()); 1811 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1812 SDValue &ScalarOp = Operands[II->ExtendedOperand]; 1813 EVT OpVT = ScalarOp.getValueType(); 1814 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1815 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1816 // If the operand is a constant, sign extend to increase our chances 1817 // of being able to use a .vi instruction. ANY_EXTEND would become a 1818 // a zero extend and the simm5 check in isel would fail. 1819 // FIXME: Should we ignore the upper bits in isel instead? 1820 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1821 : ISD::ANY_EXTEND; 1822 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1823 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), 1824 Operands); 1825 } 1826 } 1827 } 1828 } 1829 1830 switch (IntNo) { 1831 default: 1832 return SDValue(); // Don't custom lower most intrinsics. 1833 case Intrinsic::thread_pointer: { 1834 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1835 return DAG.getRegister(RISCV::X4, PtrVT); 1836 } 1837 case Intrinsic::riscv_vmv_x_s: 1838 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); 1839 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 1840 Op.getOperand(1)); 1841 case Intrinsic::riscv_vmv_v_x: { 1842 SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), 1843 Op.getOperand(1)); 1844 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), 1845 Scalar, Op.getOperand(2)); 1846 } 1847 case Intrinsic::riscv_vfmv_v_f: 1848 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 1849 Op.getOperand(1), Op.getOperand(2)); 1850 } 1851 } 1852 1853 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 1854 SelectionDAG &DAG) const { 1855 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1856 SDLoc DL(Op); 1857 1858 if (Subtarget.hasStdExtV()) { 1859 // Some RVV intrinsics may claim that they want an integer operand to be 1860 // extended. 1861 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1862 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1863 if (II->ExtendedOperand) { 1864 // The operands start from the second argument in INTRINSIC_W_CHAIN. 1865 unsigned ExtendOp = II->ExtendedOperand + 1; 1866 assert(ExtendOp < Op.getNumOperands()); 1867 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1868 SDValue &ScalarOp = Operands[ExtendOp]; 1869 EVT OpVT = ScalarOp.getValueType(); 1870 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1871 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1872 // If the operand is a constant, sign extend to increase our chances 1873 // of being able to use a .vi instruction. ANY_EXTEND would become a 1874 // a zero extend and the simm5 check in isel would fail. 1875 // FIXME: Should we ignore the upper bits in isel instead? 1876 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1877 : ISD::ANY_EXTEND; 1878 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1879 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), 1880 Operands); 1881 } 1882 } 1883 } 1884 } 1885 1886 switch (IntNo) { 1887 default: 1888 return SDValue(); // Don't custom lower most intrinsics. 1889 case Intrinsic::riscv_vleff: { 1890 SDLoc DL(Op); 1891 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1892 SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0), 1893 Op.getOperand(2), Op.getOperand(3)); 1894 SDValue ReadVL = 1895 SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1), 1896 Load.getValue(2)), 1897 0); 1898 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1899 } 1900 case Intrinsic::riscv_vleff_mask: { 1901 SDLoc DL(Op); 1902 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1903 SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0), 1904 Op.getOperand(2), Op.getOperand(3), 1905 Op.getOperand(4), Op.getOperand(5)); 1906 SDValue ReadVL = 1907 SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1), 1908 Load.getValue(2)), 1909 0); 1910 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1911 } 1912 } 1913 } 1914 1915 static std::pair<unsigned, uint64_t> 1916 getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) { 1917 switch (ISDOpcode) { 1918 default: 1919 llvm_unreachable("Unhandled reduction"); 1920 case ISD::VECREDUCE_ADD: 1921 return {RISCVISD::VECREDUCE_ADD, 0}; 1922 case ISD::VECREDUCE_UMAX: 1923 return {RISCVISD::VECREDUCE_UMAX, 0}; 1924 case ISD::VECREDUCE_SMAX: 1925 return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)}; 1926 case ISD::VECREDUCE_UMIN: 1927 return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)}; 1928 case ISD::VECREDUCE_SMIN: 1929 return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)}; 1930 case ISD::VECREDUCE_AND: 1931 return {RISCVISD::VECREDUCE_AND, -1}; 1932 case ISD::VECREDUCE_OR: 1933 return {RISCVISD::VECREDUCE_OR, 0}; 1934 case ISD::VECREDUCE_XOR: 1935 return {RISCVISD::VECREDUCE_XOR, 0}; 1936 } 1937 } 1938 1939 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV 1940 // reduction opcode. Note that this returns a vector type, which must be 1941 // further processed to access the scalar result in element 0. 1942 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 1943 SelectionDAG &DAG) const { 1944 SDLoc DL(Op); 1945 assert(Op.getValueType().isSimple() && 1946 Op.getOperand(0).getValueType().isSimple() && 1947 "Unexpected vector-reduce lowering"); 1948 MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType(); 1949 unsigned RVVOpcode; 1950 uint64_t IdentityVal; 1951 std::tie(RVVOpcode, IdentityVal) = 1952 getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits()); 1953 // We have to perform a bit of a dance to get from our vector type to the 1954 // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector 1955 // element type to find the type which fills a single register. Be careful to 1956 // use the operand's vector element type rather than the reduction's value 1957 // type, as that has likely been extended to XLEN. 1958 unsigned NumElts = 64 / VecEltVT.getSizeInBits(); 1959 MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts); 1960 SDValue IdentitySplat = 1961 DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT)); 1962 SDValue Reduction = 1963 DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat); 1964 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 1965 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1966 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 1967 } 1968 1969 // Given a reduction op, this function returns the matching reduction opcode, 1970 // the vector SDValue and the scalar SDValue required to lower this to a 1971 // RISCVISD node. 1972 static std::tuple<unsigned, SDValue, SDValue> 1973 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 1974 SDLoc DL(Op); 1975 switch (Op.getOpcode()) { 1976 default: 1977 llvm_unreachable("Unhandled reduction"); 1978 case ISD::VECREDUCE_FADD: 1979 return std::make_tuple(RISCVISD::VECREDUCE_FADD, Op.getOperand(0), 1980 DAG.getConstantFP(0.0, DL, EltVT)); 1981 case ISD::VECREDUCE_SEQ_FADD: 1982 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD, Op.getOperand(1), 1983 Op.getOperand(0)); 1984 } 1985 } 1986 1987 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 1988 SelectionDAG &DAG) const { 1989 SDLoc DL(Op); 1990 MVT VecEltVT = Op.getSimpleValueType(); 1991 // We have to perform a bit of a dance to get from our vector type to the 1992 // correct LMUL=1 vector type. See above for an explanation. 1993 unsigned NumElts = 64 / VecEltVT.getSizeInBits(); 1994 MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts); 1995 1996 unsigned RVVOpcode; 1997 SDValue VectorVal, ScalarVal; 1998 std::tie(RVVOpcode, VectorVal, ScalarVal) = 1999 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 2000 2001 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 2002 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat); 2003 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 2004 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 2005 } 2006 2007 SDValue 2008 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 2009 SelectionDAG &DAG) const { 2010 auto *Load = cast<LoadSDNode>(Op); 2011 2012 SDLoc DL(Op); 2013 MVT VT = Op.getSimpleValueType(); 2014 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2015 2016 SDValue VL = 2017 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2018 2019 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 2020 SDValue NewLoad = DAG.getMemIntrinsicNode( 2021 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 2022 Load->getMemoryVT(), Load->getMemOperand()); 2023 2024 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 2025 return DAG.getMergeValues({Result, Load->getChain()}, DL); 2026 } 2027 2028 SDValue 2029 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 2030 SelectionDAG &DAG) const { 2031 auto *Store = cast<StoreSDNode>(Op); 2032 2033 SDLoc DL(Op); 2034 MVT VT = Store->getValue().getSimpleValueType(); 2035 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2036 2037 SDValue VL = 2038 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2039 2040 SDValue NewValue = 2041 convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); 2042 return DAG.getMemIntrinsicNode( 2043 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 2044 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 2045 Store->getMemoryVT(), Store->getMemOperand()); 2046 } 2047 2048 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 2049 unsigned NewOpc) const { 2050 MVT VT = Op.getSimpleValueType(); 2051 assert(useRVVForFixedLengthVectorVT(VT) && 2052 "Only expected to lower fixed length vector operation!"); 2053 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2054 2055 // Create list of operands by converting existing ones to scalable types. 2056 SmallVector<SDValue, 6> Ops; 2057 for (const SDValue &V : Op->op_values()) { 2058 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 2059 2060 // Pass through non-vector operands. 2061 if (!V.getValueType().isVector()) { 2062 Ops.push_back(V); 2063 continue; 2064 } 2065 2066 // "cast" fixed length vector to a scalable vector. 2067 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 2068 "Only fixed length vectors are supported!"); 2069 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 2070 } 2071 2072 SDLoc DL(Op); 2073 SDValue VL = 2074 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 2075 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2076 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2077 Ops.push_back(Mask); 2078 Ops.push_back(VL); 2079 2080 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 2081 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 2082 } 2083 2084 // Returns the opcode of the target-specific SDNode that implements the 32-bit 2085 // form of the given Opcode. 2086 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 2087 switch (Opcode) { 2088 default: 2089 llvm_unreachable("Unexpected opcode"); 2090 case ISD::SHL: 2091 return RISCVISD::SLLW; 2092 case ISD::SRA: 2093 return RISCVISD::SRAW; 2094 case ISD::SRL: 2095 return RISCVISD::SRLW; 2096 case ISD::SDIV: 2097 return RISCVISD::DIVW; 2098 case ISD::UDIV: 2099 return RISCVISD::DIVUW; 2100 case ISD::UREM: 2101 return RISCVISD::REMUW; 2102 case ISD::ROTL: 2103 return RISCVISD::ROLW; 2104 case ISD::ROTR: 2105 return RISCVISD::RORW; 2106 case RISCVISD::GREVI: 2107 return RISCVISD::GREVIW; 2108 case RISCVISD::GORCI: 2109 return RISCVISD::GORCIW; 2110 } 2111 } 2112 2113 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 2114 // Because i32 isn't a legal type for RV64, these operations would otherwise 2115 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 2116 // later one because the fact the operation was originally of type i32 is 2117 // lost. 2118 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 2119 unsigned ExtOpc = ISD::ANY_EXTEND) { 2120 SDLoc DL(N); 2121 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 2122 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 2123 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 2124 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 2125 // ReplaceNodeResults requires we maintain the same type for the return value. 2126 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 2127 } 2128 2129 // Converts the given 32-bit operation to a i64 operation with signed extension 2130 // semantic to reduce the signed extension instructions. 2131 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 2132 SDLoc DL(N); 2133 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2134 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 2135 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 2136 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 2137 DAG.getValueType(MVT::i32)); 2138 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 2139 } 2140 2141 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 2142 SmallVectorImpl<SDValue> &Results, 2143 SelectionDAG &DAG) const { 2144 SDLoc DL(N); 2145 switch (N->getOpcode()) { 2146 default: 2147 llvm_unreachable("Don't know how to custom type legalize this operation!"); 2148 case ISD::STRICT_FP_TO_SINT: 2149 case ISD::STRICT_FP_TO_UINT: 2150 case ISD::FP_TO_SINT: 2151 case ISD::FP_TO_UINT: { 2152 bool IsStrict = N->isStrictFPOpcode(); 2153 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2154 "Unexpected custom legalisation"); 2155 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 2156 // If the FP type needs to be softened, emit a library call using the 'si' 2157 // version. If we left it to default legalization we'd end up with 'di'. If 2158 // the FP type doesn't need to be softened just let generic type 2159 // legalization promote the result type. 2160 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 2161 TargetLowering::TypeSoftenFloat) 2162 return; 2163 RTLIB::Libcall LC; 2164 if (N->getOpcode() == ISD::FP_TO_SINT || 2165 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 2166 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 2167 else 2168 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 2169 MakeLibCallOptions CallOptions; 2170 EVT OpVT = Op0.getValueType(); 2171 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 2172 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 2173 SDValue Result; 2174 std::tie(Result, Chain) = 2175 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 2176 Results.push_back(Result); 2177 if (IsStrict) 2178 Results.push_back(Chain); 2179 break; 2180 } 2181 case ISD::READCYCLECOUNTER: { 2182 assert(!Subtarget.is64Bit() && 2183 "READCYCLECOUNTER only has custom type legalization on riscv32"); 2184 2185 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 2186 SDValue RCW = 2187 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 2188 2189 Results.push_back( 2190 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 2191 Results.push_back(RCW.getValue(2)); 2192 break; 2193 } 2194 case ISD::ADD: 2195 case ISD::SUB: 2196 case ISD::MUL: 2197 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2198 "Unexpected custom legalisation"); 2199 if (N->getOperand(1).getOpcode() == ISD::Constant) 2200 return; 2201 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 2202 break; 2203 case ISD::SHL: 2204 case ISD::SRA: 2205 case ISD::SRL: 2206 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2207 "Unexpected custom legalisation"); 2208 if (N->getOperand(1).getOpcode() == ISD::Constant) 2209 return; 2210 Results.push_back(customLegalizeToWOp(N, DAG)); 2211 break; 2212 case ISD::ROTL: 2213 case ISD::ROTR: 2214 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2215 "Unexpected custom legalisation"); 2216 Results.push_back(customLegalizeToWOp(N, DAG)); 2217 break; 2218 case ISD::SDIV: 2219 case ISD::UDIV: 2220 case ISD::UREM: { 2221 MVT VT = N->getSimpleValueType(0); 2222 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 2223 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 2224 "Unexpected custom legalisation"); 2225 if (N->getOperand(0).getOpcode() == ISD::Constant || 2226 N->getOperand(1).getOpcode() == ISD::Constant) 2227 return; 2228 2229 // If the input is i32, use ANY_EXTEND since the W instructions don't read 2230 // the upper 32 bits. For other types we need to sign or zero extend 2231 // based on the opcode. 2232 unsigned ExtOpc = ISD::ANY_EXTEND; 2233 if (VT != MVT::i32) 2234 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 2235 : ISD::ZERO_EXTEND; 2236 2237 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 2238 break; 2239 } 2240 case ISD::BITCAST: { 2241 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2242 Subtarget.hasStdExtF()) || 2243 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 2244 "Unexpected custom legalisation"); 2245 SDValue Op0 = N->getOperand(0); 2246 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 2247 if (Op0.getValueType() != MVT::f16) 2248 return; 2249 SDValue FPConv = 2250 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 2251 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 2252 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2253 Subtarget.hasStdExtF()) { 2254 if (Op0.getValueType() != MVT::f32) 2255 return; 2256 SDValue FPConv = 2257 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 2258 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 2259 } 2260 break; 2261 } 2262 case RISCVISD::GREVI: 2263 case RISCVISD::GORCI: { 2264 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2265 "Unexpected custom legalisation"); 2266 // This is similar to customLegalizeToWOp, except that we pass the second 2267 // operand (a TargetConstant) straight through: it is already of type 2268 // XLenVT. 2269 SDLoc DL(N); 2270 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 2271 SDValue NewOp0 = 2272 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2273 SDValue NewRes = 2274 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 2275 // ReplaceNodeResults requires we maintain the same type for the return 2276 // value. 2277 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 2278 break; 2279 } 2280 case ISD::BSWAP: 2281 case ISD::BITREVERSE: { 2282 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2283 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 2284 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 2285 N->getOperand(0)); 2286 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 2287 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 2288 DAG.getTargetConstant(Imm, DL, 2289 Subtarget.getXLenVT())); 2290 // ReplaceNodeResults requires we maintain the same type for the return 2291 // value. 2292 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 2293 break; 2294 } 2295 case ISD::FSHL: 2296 case ISD::FSHR: { 2297 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2298 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 2299 SDValue NewOp0 = 2300 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2301 SDValue NewOp1 = 2302 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 2303 SDValue NewOp2 = 2304 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 2305 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 2306 // Mask the shift amount to 5 bits. 2307 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 2308 DAG.getConstant(0x1f, DL, MVT::i64)); 2309 unsigned Opc = 2310 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 2311 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 2312 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 2313 break; 2314 } 2315 case ISD::EXTRACT_VECTOR_ELT: { 2316 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 2317 // type is illegal (currently only vXi64 RV32). 2318 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 2319 // transferred to the destination register. We issue two of these from the 2320 // upper- and lower- halves of the SEW-bit vector element, slid down to the 2321 // first element. 2322 SDLoc DL(N); 2323 SDValue Vec = N->getOperand(0); 2324 SDValue Idx = N->getOperand(1); 2325 EVT VecVT = Vec.getValueType(); 2326 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 2327 VecVT.getVectorElementType() == MVT::i64 && 2328 "Unexpected EXTRACT_VECTOR_ELT legalization"); 2329 2330 SDValue Slidedown = Vec; 2331 // Unless the index is known to be 0, we must slide the vector down to get 2332 // the desired element into index 0. 2333 if (!isNullConstant(Idx)) 2334 Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 2335 DAG.getUNDEF(VecVT), Vec, Idx); 2336 2337 MVT XLenVT = Subtarget.getXLenVT(); 2338 // Extract the lower XLEN bits of the correct vector element. 2339 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx); 2340 2341 // To extract the upper XLEN bits of the vector element, shift the first 2342 // element right by 32 bits and re-extract the lower XLEN bits. 2343 SDValue ThirtyTwoV = 2344 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 2345 DAG.getConstant(32, DL, Subtarget.getXLenVT())); 2346 SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV); 2347 2348 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx); 2349 2350 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 2351 break; 2352 } 2353 case ISD::INTRINSIC_WO_CHAIN: { 2354 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 2355 switch (IntNo) { 2356 default: 2357 llvm_unreachable( 2358 "Don't know how to custom type legalize this intrinsic!"); 2359 case Intrinsic::riscv_vmv_x_s: { 2360 EVT VT = N->getValueType(0); 2361 assert((VT == MVT::i8 || VT == MVT::i16 || 2362 (Subtarget.is64Bit() && VT == MVT::i32)) && 2363 "Unexpected custom legalisation!"); 2364 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 2365 Subtarget.getXLenVT(), N->getOperand(1)); 2366 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 2367 break; 2368 } 2369 } 2370 break; 2371 } 2372 case ISD::VECREDUCE_ADD: 2373 case ISD::VECREDUCE_AND: 2374 case ISD::VECREDUCE_OR: 2375 case ISD::VECREDUCE_XOR: 2376 case ISD::VECREDUCE_SMAX: 2377 case ISD::VECREDUCE_UMAX: 2378 case ISD::VECREDUCE_SMIN: 2379 case ISD::VECREDUCE_UMIN: 2380 // The custom-lowering for these nodes returns a vector whose first element 2381 // is the result of the reduction. Extract its first element and let the 2382 // legalization for EXTRACT_VECTOR_ELT do the rest of the job. 2383 Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG)); 2384 break; 2385 } 2386 } 2387 2388 // A structure to hold one of the bit-manipulation patterns below. Together, a 2389 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 2390 // (or (and (shl x, 1), 0xAAAAAAAA), 2391 // (and (srl x, 1), 0x55555555)) 2392 struct RISCVBitmanipPat { 2393 SDValue Op; 2394 unsigned ShAmt; 2395 bool IsSHL; 2396 2397 bool formsPairWith(const RISCVBitmanipPat &Other) const { 2398 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 2399 } 2400 }; 2401 2402 // Matches any of the following bit-manipulation patterns: 2403 // (and (shl x, 1), (0x55555555 << 1)) 2404 // (and (srl x, 1), 0x55555555) 2405 // (shl (and x, 0x55555555), 1) 2406 // (srl (and x, (0x55555555 << 1)), 1) 2407 // where the shift amount and mask may vary thus: 2408 // [1] = 0x55555555 / 0xAAAAAAAA 2409 // [2] = 0x33333333 / 0xCCCCCCCC 2410 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 2411 // [8] = 0x00FF00FF / 0xFF00FF00 2412 // [16] = 0x0000FFFF / 0xFFFFFFFF 2413 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 2414 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) { 2415 Optional<uint64_t> Mask; 2416 // Optionally consume a mask around the shift operation. 2417 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 2418 Mask = Op.getConstantOperandVal(1); 2419 Op = Op.getOperand(0); 2420 } 2421 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 2422 return None; 2423 bool IsSHL = Op.getOpcode() == ISD::SHL; 2424 2425 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2426 return None; 2427 auto ShAmt = Op.getConstantOperandVal(1); 2428 2429 if (!isPowerOf2_64(ShAmt)) 2430 return None; 2431 2432 // These are the unshifted masks which we use to match bit-manipulation 2433 // patterns. They may be shifted left in certain circumstances. 2434 static const uint64_t BitmanipMasks[] = { 2435 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 2436 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL, 2437 }; 2438 2439 unsigned MaskIdx = Log2_64(ShAmt); 2440 if (MaskIdx >= array_lengthof(BitmanipMasks)) 2441 return None; 2442 2443 auto Src = Op.getOperand(0); 2444 2445 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 2446 auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 2447 2448 // The expected mask is shifted left when the AND is found around SHL 2449 // patterns. 2450 // ((x >> 1) & 0x55555555) 2451 // ((x << 1) & 0xAAAAAAAA) 2452 bool SHLExpMask = IsSHL; 2453 2454 if (!Mask) { 2455 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 2456 // the mask is all ones: consume that now. 2457 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 2458 Mask = Src.getConstantOperandVal(1); 2459 Src = Src.getOperand(0); 2460 // The expected mask is now in fact shifted left for SRL, so reverse the 2461 // decision. 2462 // ((x & 0xAAAAAAAA) >> 1) 2463 // ((x & 0x55555555) << 1) 2464 SHLExpMask = !SHLExpMask; 2465 } else { 2466 // Use a default shifted mask of all-ones if there's no AND, truncated 2467 // down to the expected width. This simplifies the logic later on. 2468 Mask = maskTrailingOnes<uint64_t>(Width); 2469 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 2470 } 2471 } 2472 2473 if (SHLExpMask) 2474 ExpMask <<= ShAmt; 2475 2476 if (Mask != ExpMask) 2477 return None; 2478 2479 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 2480 } 2481 2482 // Match the following pattern as a GREVI(W) operation 2483 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 2484 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 2485 const RISCVSubtarget &Subtarget) { 2486 EVT VT = Op.getValueType(); 2487 2488 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 2489 auto LHS = matchRISCVBitmanipPat(Op.getOperand(0)); 2490 auto RHS = matchRISCVBitmanipPat(Op.getOperand(1)); 2491 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 2492 SDLoc DL(Op); 2493 return DAG.getNode( 2494 RISCVISD::GREVI, DL, VT, LHS->Op, 2495 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 2496 } 2497 } 2498 return SDValue(); 2499 } 2500 2501 // Matches any the following pattern as a GORCI(W) operation 2502 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 2503 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 2504 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 2505 // Note that with the variant of 3., 2506 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 2507 // the inner pattern will first be matched as GREVI and then the outer 2508 // pattern will be matched to GORC via the first rule above. 2509 // 4. (or (rotl/rotr x, bitwidth/2), x) 2510 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 2511 const RISCVSubtarget &Subtarget) { 2512 EVT VT = Op.getValueType(); 2513 2514 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 2515 SDLoc DL(Op); 2516 SDValue Op0 = Op.getOperand(0); 2517 SDValue Op1 = Op.getOperand(1); 2518 2519 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 2520 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 2521 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 2522 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 2523 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 2524 if ((Reverse.getOpcode() == ISD::ROTL || 2525 Reverse.getOpcode() == ISD::ROTR) && 2526 Reverse.getOperand(0) == X && 2527 isa<ConstantSDNode>(Reverse.getOperand(1))) { 2528 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 2529 if (RotAmt == (VT.getSizeInBits() / 2)) 2530 return DAG.getNode( 2531 RISCVISD::GORCI, DL, VT, X, 2532 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 2533 } 2534 return SDValue(); 2535 }; 2536 2537 // Check for either commutable permutation of (or (GREVI x, shamt), x) 2538 if (SDValue V = MatchOROfReverse(Op0, Op1)) 2539 return V; 2540 if (SDValue V = MatchOROfReverse(Op1, Op0)) 2541 return V; 2542 2543 // OR is commutable so canonicalize its OR operand to the left 2544 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 2545 std::swap(Op0, Op1); 2546 if (Op0.getOpcode() != ISD::OR) 2547 return SDValue(); 2548 SDValue OrOp0 = Op0.getOperand(0); 2549 SDValue OrOp1 = Op0.getOperand(1); 2550 auto LHS = matchRISCVBitmanipPat(OrOp0); 2551 // OR is commutable so swap the operands and try again: x might have been 2552 // on the left 2553 if (!LHS) { 2554 std::swap(OrOp0, OrOp1); 2555 LHS = matchRISCVBitmanipPat(OrOp0); 2556 } 2557 auto RHS = matchRISCVBitmanipPat(Op1); 2558 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 2559 return DAG.getNode( 2560 RISCVISD::GORCI, DL, VT, LHS->Op, 2561 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 2562 } 2563 } 2564 return SDValue(); 2565 } 2566 2567 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 2568 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 2569 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 2570 // not undo itself, but they are redundant. 2571 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 2572 unsigned ShAmt1 = N->getConstantOperandVal(1); 2573 SDValue Src = N->getOperand(0); 2574 2575 if (Src.getOpcode() != N->getOpcode()) 2576 return SDValue(); 2577 2578 unsigned ShAmt2 = Src.getConstantOperandVal(1); 2579 Src = Src.getOperand(0); 2580 2581 unsigned CombinedShAmt; 2582 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 2583 CombinedShAmt = ShAmt1 | ShAmt2; 2584 else 2585 CombinedShAmt = ShAmt1 ^ ShAmt2; 2586 2587 if (CombinedShAmt == 0) 2588 return Src; 2589 2590 SDLoc DL(N); 2591 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 2592 DAG.getTargetConstant(CombinedShAmt, DL, 2593 N->getOperand(1).getValueType())); 2594 } 2595 2596 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 2597 DAGCombinerInfo &DCI) const { 2598 SelectionDAG &DAG = DCI.DAG; 2599 2600 switch (N->getOpcode()) { 2601 default: 2602 break; 2603 case RISCVISD::SplitF64: { 2604 SDValue Op0 = N->getOperand(0); 2605 // If the input to SplitF64 is just BuildPairF64 then the operation is 2606 // redundant. Instead, use BuildPairF64's operands directly. 2607 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 2608 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 2609 2610 SDLoc DL(N); 2611 2612 // It's cheaper to materialise two 32-bit integers than to load a double 2613 // from the constant pool and transfer it to integer registers through the 2614 // stack. 2615 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 2616 APInt V = C->getValueAPF().bitcastToAPInt(); 2617 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 2618 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 2619 return DCI.CombineTo(N, Lo, Hi); 2620 } 2621 2622 // This is a target-specific version of a DAGCombine performed in 2623 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2624 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2625 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2626 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2627 !Op0.getNode()->hasOneUse()) 2628 break; 2629 SDValue NewSplitF64 = 2630 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 2631 Op0.getOperand(0)); 2632 SDValue Lo = NewSplitF64.getValue(0); 2633 SDValue Hi = NewSplitF64.getValue(1); 2634 APInt SignBit = APInt::getSignMask(32); 2635 if (Op0.getOpcode() == ISD::FNEG) { 2636 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 2637 DAG.getConstant(SignBit, DL, MVT::i32)); 2638 return DCI.CombineTo(N, Lo, NewHi); 2639 } 2640 assert(Op0.getOpcode() == ISD::FABS); 2641 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 2642 DAG.getConstant(~SignBit, DL, MVT::i32)); 2643 return DCI.CombineTo(N, Lo, NewHi); 2644 } 2645 case RISCVISD::SLLW: 2646 case RISCVISD::SRAW: 2647 case RISCVISD::SRLW: 2648 case RISCVISD::ROLW: 2649 case RISCVISD::RORW: { 2650 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 2651 SDValue LHS = N->getOperand(0); 2652 SDValue RHS = N->getOperand(1); 2653 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 2654 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 2655 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 2656 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 2657 if (N->getOpcode() != ISD::DELETED_NODE) 2658 DCI.AddToWorklist(N); 2659 return SDValue(N, 0); 2660 } 2661 break; 2662 } 2663 case RISCVISD::FSL: 2664 case RISCVISD::FSR: { 2665 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 2666 SDValue ShAmt = N->getOperand(2); 2667 unsigned BitWidth = ShAmt.getValueSizeInBits(); 2668 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 2669 APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1); 2670 if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 2671 if (N->getOpcode() != ISD::DELETED_NODE) 2672 DCI.AddToWorklist(N); 2673 return SDValue(N, 0); 2674 } 2675 break; 2676 } 2677 case RISCVISD::FSLW: 2678 case RISCVISD::FSRW: { 2679 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 2680 // read. 2681 SDValue Op0 = N->getOperand(0); 2682 SDValue Op1 = N->getOperand(1); 2683 SDValue ShAmt = N->getOperand(2); 2684 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2685 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 2686 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 2687 SimplifyDemandedBits(Op1, OpMask, DCI) || 2688 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 2689 if (N->getOpcode() != ISD::DELETED_NODE) 2690 DCI.AddToWorklist(N); 2691 return SDValue(N, 0); 2692 } 2693 break; 2694 } 2695 case RISCVISD::GREVIW: 2696 case RISCVISD::GORCIW: { 2697 // Only the lower 32 bits of the first operand are read 2698 SDValue Op0 = N->getOperand(0); 2699 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2700 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 2701 if (N->getOpcode() != ISD::DELETED_NODE) 2702 DCI.AddToWorklist(N); 2703 return SDValue(N, 0); 2704 } 2705 2706 return combineGREVI_GORCI(N, DCI.DAG); 2707 } 2708 case RISCVISD::FMV_X_ANYEXTW_RV64: { 2709 SDLoc DL(N); 2710 SDValue Op0 = N->getOperand(0); 2711 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 2712 // conversion is unnecessary and can be replaced with an ANY_EXTEND 2713 // of the FMV_W_X_RV64 operand. 2714 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 2715 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 2716 "Unexpected value type!"); 2717 return Op0.getOperand(0); 2718 } 2719 2720 // This is a target-specific version of a DAGCombine performed in 2721 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2722 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2723 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2724 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2725 !Op0.getNode()->hasOneUse()) 2726 break; 2727 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 2728 Op0.getOperand(0)); 2729 APInt SignBit = APInt::getSignMask(32).sext(64); 2730 if (Op0.getOpcode() == ISD::FNEG) 2731 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 2732 DAG.getConstant(SignBit, DL, MVT::i64)); 2733 2734 assert(Op0.getOpcode() == ISD::FABS); 2735 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 2736 DAG.getConstant(~SignBit, DL, MVT::i64)); 2737 } 2738 case RISCVISD::GREVI: 2739 case RISCVISD::GORCI: 2740 return combineGREVI_GORCI(N, DCI.DAG); 2741 case ISD::OR: 2742 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 2743 return GREV; 2744 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 2745 return GORC; 2746 break; 2747 case RISCVISD::SELECT_CC: { 2748 // Transform 2749 // (select_cc (xor X, 1), 0, setne, trueV, falseV) -> 2750 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 2751 // This can occur when legalizing some floating point comparisons. 2752 SDValue LHS = N->getOperand(0); 2753 SDValue RHS = N->getOperand(1); 2754 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 2755 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2756 if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) && 2757 LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) && 2758 DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) { 2759 SDLoc DL(N); 2760 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 2761 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 2762 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 2763 {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3), 2764 N->getOperand(4)}); 2765 } 2766 break; 2767 } 2768 case ISD::SETCC: { 2769 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. 2770 // Comparing with 0 may allow us to fold into bnez/beqz. 2771 SDValue LHS = N->getOperand(0); 2772 SDValue RHS = N->getOperand(1); 2773 if (LHS.getValueType().isScalableVector()) 2774 break; 2775 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2776 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2777 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && 2778 DAG.MaskedValueIsZero(LHS, Mask)) { 2779 SDLoc DL(N); 2780 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); 2781 CC = ISD::getSetCCInverse(CC, LHS.getValueType()); 2782 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); 2783 } 2784 break; 2785 } 2786 } 2787 2788 return SDValue(); 2789 } 2790 2791 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 2792 const SDNode *N, CombineLevel Level) const { 2793 // The following folds are only desirable if `(OP _, c1 << c2)` can be 2794 // materialised in fewer instructions than `(OP _, c1)`: 2795 // 2796 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 2797 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 2798 SDValue N0 = N->getOperand(0); 2799 EVT Ty = N0.getValueType(); 2800 if (Ty.isScalarInteger() && 2801 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 2802 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 2803 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2804 if (C1 && C2) { 2805 const APInt &C1Int = C1->getAPIntValue(); 2806 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 2807 2808 // We can materialise `c1 << c2` into an add immediate, so it's "free", 2809 // and the combine should happen, to potentially allow further combines 2810 // later. 2811 if (ShiftedC1Int.getMinSignedBits() <= 64 && 2812 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 2813 return true; 2814 2815 // We can materialise `c1` in an add immediate, so it's "free", and the 2816 // combine should be prevented. 2817 if (C1Int.getMinSignedBits() <= 64 && 2818 isLegalAddImmediate(C1Int.getSExtValue())) 2819 return false; 2820 2821 // Neither constant will fit into an immediate, so find materialisation 2822 // costs. 2823 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 2824 Subtarget.is64Bit()); 2825 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 2826 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 2827 2828 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 2829 // combine should be prevented. 2830 if (C1Cost < ShiftedC1Cost) 2831 return false; 2832 } 2833 } 2834 return true; 2835 } 2836 2837 bool RISCVTargetLowering::targetShrinkDemandedConstant( 2838 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 2839 TargetLoweringOpt &TLO) const { 2840 // Delay this optimization as late as possible. 2841 if (!TLO.LegalOps) 2842 return false; 2843 2844 EVT VT = Op.getValueType(); 2845 if (VT.isVector()) 2846 return false; 2847 2848 // Only handle AND for now. 2849 if (Op.getOpcode() != ISD::AND) 2850 return false; 2851 2852 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 2853 if (!C) 2854 return false; 2855 2856 const APInt &Mask = C->getAPIntValue(); 2857 2858 // Clear all non-demanded bits initially. 2859 APInt ShrunkMask = Mask & DemandedBits; 2860 2861 // If the shrunk mask fits in sign extended 12 bits, let the target 2862 // independent code apply it. 2863 if (ShrunkMask.isSignedIntN(12)) 2864 return false; 2865 2866 // Try to make a smaller immediate by setting undemanded bits. 2867 2868 // We need to be able to make a negative number through a combination of mask 2869 // and undemanded bits. 2870 APInt ExpandedMask = Mask | ~DemandedBits; 2871 if (!ExpandedMask.isNegative()) 2872 return false; 2873 2874 // What is the fewest number of bits we need to represent the negative number. 2875 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 2876 2877 // Try to make a 12 bit negative immediate. If that fails try to make a 32 2878 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 2879 APInt NewMask = ShrunkMask; 2880 if (MinSignedBits <= 12) 2881 NewMask.setBitsFrom(11); 2882 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 2883 NewMask.setBitsFrom(31); 2884 else 2885 return false; 2886 2887 // Sanity check that our new mask is a subset of the demanded mask. 2888 assert(NewMask.isSubsetOf(ExpandedMask)); 2889 2890 // If we aren't changing the mask, just return true to keep it and prevent 2891 // the caller from optimizing. 2892 if (NewMask == Mask) 2893 return true; 2894 2895 // Replace the constant with the new mask. 2896 SDLoc DL(Op); 2897 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 2898 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 2899 return TLO.CombineTo(Op, NewOp); 2900 } 2901 2902 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 2903 KnownBits &Known, 2904 const APInt &DemandedElts, 2905 const SelectionDAG &DAG, 2906 unsigned Depth) const { 2907 unsigned BitWidth = Known.getBitWidth(); 2908 unsigned Opc = Op.getOpcode(); 2909 assert((Opc >= ISD::BUILTIN_OP_END || 2910 Opc == ISD::INTRINSIC_WO_CHAIN || 2911 Opc == ISD::INTRINSIC_W_CHAIN || 2912 Opc == ISD::INTRINSIC_VOID) && 2913 "Should use MaskedValueIsZero if you don't know whether Op" 2914 " is a target node!"); 2915 2916 Known.resetAll(); 2917 switch (Opc) { 2918 default: break; 2919 case RISCVISD::REMUW: { 2920 KnownBits Known2; 2921 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2922 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2923 // We only care about the lower 32 bits. 2924 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 2925 // Restore the original width by sign extending. 2926 Known = Known.sext(BitWidth); 2927 break; 2928 } 2929 case RISCVISD::DIVUW: { 2930 KnownBits Known2; 2931 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2932 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2933 // We only care about the lower 32 bits. 2934 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 2935 // Restore the original width by sign extending. 2936 Known = Known.sext(BitWidth); 2937 break; 2938 } 2939 case RISCVISD::READ_VLENB: 2940 // We assume VLENB is at least 8 bytes. 2941 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 2942 Known.Zero.setLowBits(3); 2943 break; 2944 } 2945 } 2946 2947 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 2948 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 2949 unsigned Depth) const { 2950 switch (Op.getOpcode()) { 2951 default: 2952 break; 2953 case RISCVISD::SLLW: 2954 case RISCVISD::SRAW: 2955 case RISCVISD::SRLW: 2956 case RISCVISD::DIVW: 2957 case RISCVISD::DIVUW: 2958 case RISCVISD::REMUW: 2959 case RISCVISD::ROLW: 2960 case RISCVISD::RORW: 2961 case RISCVISD::GREVIW: 2962 case RISCVISD::GORCIW: 2963 case RISCVISD::FSLW: 2964 case RISCVISD::FSRW: 2965 // TODO: As the result is sign-extended, this is conservatively correct. A 2966 // more precise answer could be calculated for SRAW depending on known 2967 // bits in the shift amount. 2968 return 33; 2969 case RISCVISD::VMV_X_S: 2970 // The number of sign bits of the scalar result is computed by obtaining the 2971 // element type of the input vector operand, subtracting its width from the 2972 // XLEN, and then adding one (sign bit within the element type). If the 2973 // element type is wider than XLen, the least-significant XLEN bits are 2974 // taken. 2975 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 2976 return 1; 2977 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 2978 } 2979 2980 return 1; 2981 } 2982 2983 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 2984 MachineBasicBlock *BB) { 2985 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 2986 2987 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 2988 // Should the count have wrapped while it was being read, we need to try 2989 // again. 2990 // ... 2991 // read: 2992 // rdcycleh x3 # load high word of cycle 2993 // rdcycle x2 # load low word of cycle 2994 // rdcycleh x4 # load high word of cycle 2995 // bne x3, x4, read # check if high word reads match, otherwise try again 2996 // ... 2997 2998 MachineFunction &MF = *BB->getParent(); 2999 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3000 MachineFunction::iterator It = ++BB->getIterator(); 3001 3002 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 3003 MF.insert(It, LoopMBB); 3004 3005 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 3006 MF.insert(It, DoneMBB); 3007 3008 // Transfer the remainder of BB and its successor edges to DoneMBB. 3009 DoneMBB->splice(DoneMBB->begin(), BB, 3010 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 3011 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 3012 3013 BB->addSuccessor(LoopMBB); 3014 3015 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3016 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3017 Register LoReg = MI.getOperand(0).getReg(); 3018 Register HiReg = MI.getOperand(1).getReg(); 3019 DebugLoc DL = MI.getDebugLoc(); 3020 3021 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 3022 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 3023 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 3024 .addReg(RISCV::X0); 3025 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 3026 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 3027 .addReg(RISCV::X0); 3028 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 3029 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 3030 .addReg(RISCV::X0); 3031 3032 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 3033 .addReg(HiReg) 3034 .addReg(ReadAgainReg) 3035 .addMBB(LoopMBB); 3036 3037 LoopMBB->addSuccessor(LoopMBB); 3038 LoopMBB->addSuccessor(DoneMBB); 3039 3040 MI.eraseFromParent(); 3041 3042 return DoneMBB; 3043 } 3044 3045 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 3046 MachineBasicBlock *BB) { 3047 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 3048 3049 MachineFunction &MF = *BB->getParent(); 3050 DebugLoc DL = MI.getDebugLoc(); 3051 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3052 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 3053 Register LoReg = MI.getOperand(0).getReg(); 3054 Register HiReg = MI.getOperand(1).getReg(); 3055 Register SrcReg = MI.getOperand(2).getReg(); 3056 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 3057 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 3058 3059 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 3060 RI); 3061 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 3062 MachineMemOperand *MMOLo = 3063 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 3064 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 3065 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 3066 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 3067 .addFrameIndex(FI) 3068 .addImm(0) 3069 .addMemOperand(MMOLo); 3070 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 3071 .addFrameIndex(FI) 3072 .addImm(4) 3073 .addMemOperand(MMOHi); 3074 MI.eraseFromParent(); // The pseudo instruction is gone now. 3075 return BB; 3076 } 3077 3078 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 3079 MachineBasicBlock *BB) { 3080 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 3081 "Unexpected instruction"); 3082 3083 MachineFunction &MF = *BB->getParent(); 3084 DebugLoc DL = MI.getDebugLoc(); 3085 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3086 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 3087 Register DstReg = MI.getOperand(0).getReg(); 3088 Register LoReg = MI.getOperand(1).getReg(); 3089 Register HiReg = MI.getOperand(2).getReg(); 3090 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 3091 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 3092 3093 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 3094 MachineMemOperand *MMOLo = 3095 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 3096 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 3097 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 3098 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 3099 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 3100 .addFrameIndex(FI) 3101 .addImm(0) 3102 .addMemOperand(MMOLo); 3103 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 3104 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 3105 .addFrameIndex(FI) 3106 .addImm(4) 3107 .addMemOperand(MMOHi); 3108 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 3109 MI.eraseFromParent(); // The pseudo instruction is gone now. 3110 return BB; 3111 } 3112 3113 static bool isSelectPseudo(MachineInstr &MI) { 3114 switch (MI.getOpcode()) { 3115 default: 3116 return false; 3117 case RISCV::Select_GPR_Using_CC_GPR: 3118 case RISCV::Select_FPR16_Using_CC_GPR: 3119 case RISCV::Select_FPR32_Using_CC_GPR: 3120 case RISCV::Select_FPR64_Using_CC_GPR: 3121 return true; 3122 } 3123 } 3124 3125 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 3126 MachineBasicBlock *BB) { 3127 // To "insert" Select_* instructions, we actually have to insert the triangle 3128 // control-flow pattern. The incoming instructions know the destination vreg 3129 // to set, the condition code register to branch on, the true/false values to 3130 // select between, and the condcode to use to select the appropriate branch. 3131 // 3132 // We produce the following control flow: 3133 // HeadMBB 3134 // | \ 3135 // | IfFalseMBB 3136 // | / 3137 // TailMBB 3138 // 3139 // When we find a sequence of selects we attempt to optimize their emission 3140 // by sharing the control flow. Currently we only handle cases where we have 3141 // multiple selects with the exact same condition (same LHS, RHS and CC). 3142 // The selects may be interleaved with other instructions if the other 3143 // instructions meet some requirements we deem safe: 3144 // - They are debug instructions. Otherwise, 3145 // - They do not have side-effects, do not access memory and their inputs do 3146 // not depend on the results of the select pseudo-instructions. 3147 // The TrueV/FalseV operands of the selects cannot depend on the result of 3148 // previous selects in the sequence. 3149 // These conditions could be further relaxed. See the X86 target for a 3150 // related approach and more information. 3151 Register LHS = MI.getOperand(1).getReg(); 3152 Register RHS = MI.getOperand(2).getReg(); 3153 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 3154 3155 SmallVector<MachineInstr *, 4> SelectDebugValues; 3156 SmallSet<Register, 4> SelectDests; 3157 SelectDests.insert(MI.getOperand(0).getReg()); 3158 3159 MachineInstr *LastSelectPseudo = &MI; 3160 3161 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 3162 SequenceMBBI != E; ++SequenceMBBI) { 3163 if (SequenceMBBI->isDebugInstr()) 3164 continue; 3165 else if (isSelectPseudo(*SequenceMBBI)) { 3166 if (SequenceMBBI->getOperand(1).getReg() != LHS || 3167 SequenceMBBI->getOperand(2).getReg() != RHS || 3168 SequenceMBBI->getOperand(3).getImm() != CC || 3169 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 3170 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 3171 break; 3172 LastSelectPseudo = &*SequenceMBBI; 3173 SequenceMBBI->collectDebugValues(SelectDebugValues); 3174 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 3175 } else { 3176 if (SequenceMBBI->hasUnmodeledSideEffects() || 3177 SequenceMBBI->mayLoadOrStore()) 3178 break; 3179 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 3180 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 3181 })) 3182 break; 3183 } 3184 } 3185 3186 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 3187 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3188 DebugLoc DL = MI.getDebugLoc(); 3189 MachineFunction::iterator I = ++BB->getIterator(); 3190 3191 MachineBasicBlock *HeadMBB = BB; 3192 MachineFunction *F = BB->getParent(); 3193 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 3194 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 3195 3196 F->insert(I, IfFalseMBB); 3197 F->insert(I, TailMBB); 3198 3199 // Transfer debug instructions associated with the selects to TailMBB. 3200 for (MachineInstr *DebugInstr : SelectDebugValues) { 3201 TailMBB->push_back(DebugInstr->removeFromParent()); 3202 } 3203 3204 // Move all instructions after the sequence to TailMBB. 3205 TailMBB->splice(TailMBB->end(), HeadMBB, 3206 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 3207 // Update machine-CFG edges by transferring all successors of the current 3208 // block to the new block which will contain the Phi nodes for the selects. 3209 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 3210 // Set the successors for HeadMBB. 3211 HeadMBB->addSuccessor(IfFalseMBB); 3212 HeadMBB->addSuccessor(TailMBB); 3213 3214 // Insert appropriate branch. 3215 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 3216 3217 BuildMI(HeadMBB, DL, TII.get(Opcode)) 3218 .addReg(LHS) 3219 .addReg(RHS) 3220 .addMBB(TailMBB); 3221 3222 // IfFalseMBB just falls through to TailMBB. 3223 IfFalseMBB->addSuccessor(TailMBB); 3224 3225 // Create PHIs for all of the select pseudo-instructions. 3226 auto SelectMBBI = MI.getIterator(); 3227 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 3228 auto InsertionPoint = TailMBB->begin(); 3229 while (SelectMBBI != SelectEnd) { 3230 auto Next = std::next(SelectMBBI); 3231 if (isSelectPseudo(*SelectMBBI)) { 3232 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 3233 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 3234 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 3235 .addReg(SelectMBBI->getOperand(4).getReg()) 3236 .addMBB(HeadMBB) 3237 .addReg(SelectMBBI->getOperand(5).getReg()) 3238 .addMBB(IfFalseMBB); 3239 SelectMBBI->eraseFromParent(); 3240 } 3241 SelectMBBI = Next; 3242 } 3243 3244 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 3245 return TailMBB; 3246 } 3247 3248 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 3249 int VLIndex, unsigned SEWIndex, 3250 RISCVVLMUL VLMul, bool ForceTailAgnostic) { 3251 MachineFunction &MF = *BB->getParent(); 3252 DebugLoc DL = MI.getDebugLoc(); 3253 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 3254 3255 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 3256 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 3257 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 3258 3259 MachineRegisterInfo &MRI = MF.getRegInfo(); 3260 3261 // VL and VTYPE are alive here. 3262 MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)); 3263 3264 if (VLIndex >= 0) { 3265 // Set VL (rs1 != X0). 3266 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3267 MIB.addReg(DestReg, RegState::Define | RegState::Dead) 3268 .addReg(MI.getOperand(VLIndex).getReg()); 3269 } else 3270 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 3271 MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead) 3272 .addReg(RISCV::X0, RegState::Kill); 3273 3274 // Default to tail agnostic unless the destination is tied to a source. In 3275 // that case the user would have some control over the tail values. The tail 3276 // policy is also ignored on instructions that only update element 0 like 3277 // vmv.s.x or reductions so use agnostic there to match the common case. 3278 // FIXME: This is conservatively correct, but we might want to detect that 3279 // the input is undefined. 3280 bool TailAgnostic = true; 3281 unsigned UseOpIdx; 3282 if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 3283 TailAgnostic = false; 3284 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 3285 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 3286 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 3287 if (UseMI && UseMI->isImplicitDef()) 3288 TailAgnostic = true; 3289 } 3290 3291 // For simplicity we reuse the vtype representation here. 3292 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 3293 /*TailAgnostic*/ TailAgnostic, 3294 /*MaskAgnostic*/ false)); 3295 3296 // Remove (now) redundant operands from pseudo 3297 MI.getOperand(SEWIndex).setImm(-1); 3298 if (VLIndex >= 0) { 3299 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 3300 MI.getOperand(VLIndex).setIsKill(false); 3301 } 3302 3303 return BB; 3304 } 3305 3306 MachineBasicBlock * 3307 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 3308 MachineBasicBlock *BB) const { 3309 uint64_t TSFlags = MI.getDesc().TSFlags; 3310 3311 if (TSFlags & RISCVII::HasSEWOpMask) { 3312 unsigned NumOperands = MI.getNumExplicitOperands(); 3313 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 3314 unsigned SEWIndex = NumOperands - 1; 3315 bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; 3316 3317 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 3318 RISCVII::VLMulShift); 3319 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); 3320 } 3321 3322 switch (MI.getOpcode()) { 3323 default: 3324 llvm_unreachable("Unexpected instr type to insert"); 3325 case RISCV::ReadCycleWide: 3326 assert(!Subtarget.is64Bit() && 3327 "ReadCycleWrite is only to be used on riscv32"); 3328 return emitReadCycleWidePseudo(MI, BB); 3329 case RISCV::Select_GPR_Using_CC_GPR: 3330 case RISCV::Select_FPR16_Using_CC_GPR: 3331 case RISCV::Select_FPR32_Using_CC_GPR: 3332 case RISCV::Select_FPR64_Using_CC_GPR: 3333 return emitSelectPseudo(MI, BB); 3334 case RISCV::BuildPairF64Pseudo: 3335 return emitBuildPairF64Pseudo(MI, BB); 3336 case RISCV::SplitF64Pseudo: 3337 return emitSplitF64Pseudo(MI, BB); 3338 } 3339 } 3340 3341 // Calling Convention Implementation. 3342 // The expectations for frontend ABI lowering vary from target to target. 3343 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 3344 // details, but this is a longer term goal. For now, we simply try to keep the 3345 // role of the frontend as simple and well-defined as possible. The rules can 3346 // be summarised as: 3347 // * Never split up large scalar arguments. We handle them here. 3348 // * If a hardfloat calling convention is being used, and the struct may be 3349 // passed in a pair of registers (fp+fp, int+fp), and both registers are 3350 // available, then pass as two separate arguments. If either the GPRs or FPRs 3351 // are exhausted, then pass according to the rule below. 3352 // * If a struct could never be passed in registers or directly in a stack 3353 // slot (as it is larger than 2*XLEN and the floating point rules don't 3354 // apply), then pass it using a pointer with the byval attribute. 3355 // * If a struct is less than 2*XLEN, then coerce to either a two-element 3356 // word-sized array or a 2*XLEN scalar (depending on alignment). 3357 // * The frontend can determine whether a struct is returned by reference or 3358 // not based on its size and fields. If it will be returned by reference, the 3359 // frontend must modify the prototype so a pointer with the sret annotation is 3360 // passed as the first argument. This is not necessary for large scalar 3361 // returns. 3362 // * Struct return values and varargs should be coerced to structs containing 3363 // register-size fields in the same situations they would be for fixed 3364 // arguments. 3365 3366 static const MCPhysReg ArgGPRs[] = { 3367 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 3368 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 3369 }; 3370 static const MCPhysReg ArgFPR16s[] = { 3371 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 3372 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 3373 }; 3374 static const MCPhysReg ArgFPR32s[] = { 3375 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 3376 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 3377 }; 3378 static const MCPhysReg ArgFPR64s[] = { 3379 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 3380 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 3381 }; 3382 // This is an interim calling convention and it may be changed in the future. 3383 static const MCPhysReg ArgVRs[] = { 3384 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 3385 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 3386 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 3387 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 3388 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 3389 RISCV::V20M2, RISCV::V22M2}; 3390 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 3391 RISCV::V20M4}; 3392 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 3393 3394 // Pass a 2*XLEN argument that has been split into two XLEN values through 3395 // registers or the stack as necessary. 3396 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 3397 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 3398 MVT ValVT2, MVT LocVT2, 3399 ISD::ArgFlagsTy ArgFlags2) { 3400 unsigned XLenInBytes = XLen / 8; 3401 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3402 // At least one half can be passed via register. 3403 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 3404 VA1.getLocVT(), CCValAssign::Full)); 3405 } else { 3406 // Both halves must be passed on the stack, with proper alignment. 3407 Align StackAlign = 3408 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 3409 State.addLoc( 3410 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 3411 State.AllocateStack(XLenInBytes, StackAlign), 3412 VA1.getLocVT(), CCValAssign::Full)); 3413 State.addLoc(CCValAssign::getMem( 3414 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 3415 LocVT2, CCValAssign::Full)); 3416 return false; 3417 } 3418 3419 if (Register Reg = State.AllocateReg(ArgGPRs)) { 3420 // The second half can also be passed via register. 3421 State.addLoc( 3422 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 3423 } else { 3424 // The second half is passed via the stack, without additional alignment. 3425 State.addLoc(CCValAssign::getMem( 3426 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 3427 LocVT2, CCValAssign::Full)); 3428 } 3429 3430 return false; 3431 } 3432 3433 // Implements the RISC-V calling convention. Returns true upon failure. 3434 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 3435 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 3436 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 3437 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 3438 Optional<unsigned> FirstMaskArgument) { 3439 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 3440 assert(XLen == 32 || XLen == 64); 3441 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 3442 3443 // Any return value split in to more than two values can't be returned 3444 // directly. 3445 if (IsRet && ValNo > 1) 3446 return true; 3447 3448 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 3449 // variadic argument, or if no F16/F32 argument registers are available. 3450 bool UseGPRForF16_F32 = true; 3451 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 3452 // variadic argument, or if no F64 argument registers are available. 3453 bool UseGPRForF64 = true; 3454 3455 switch (ABI) { 3456 default: 3457 llvm_unreachable("Unexpected ABI"); 3458 case RISCVABI::ABI_ILP32: 3459 case RISCVABI::ABI_LP64: 3460 break; 3461 case RISCVABI::ABI_ILP32F: 3462 case RISCVABI::ABI_LP64F: 3463 UseGPRForF16_F32 = !IsFixed; 3464 break; 3465 case RISCVABI::ABI_ILP32D: 3466 case RISCVABI::ABI_LP64D: 3467 UseGPRForF16_F32 = !IsFixed; 3468 UseGPRForF64 = !IsFixed; 3469 break; 3470 } 3471 3472 // FPR16, FPR32, and FPR64 alias each other. 3473 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 3474 UseGPRForF16_F32 = true; 3475 UseGPRForF64 = true; 3476 } 3477 3478 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 3479 // similar local variables rather than directly checking against the target 3480 // ABI. 3481 3482 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 3483 LocVT = XLenVT; 3484 LocInfo = CCValAssign::BCvt; 3485 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 3486 LocVT = MVT::i64; 3487 LocInfo = CCValAssign::BCvt; 3488 } 3489 3490 // If this is a variadic argument, the RISC-V calling convention requires 3491 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 3492 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 3493 // be used regardless of whether the original argument was split during 3494 // legalisation or not. The argument will not be passed by registers if the 3495 // original type is larger than 2*XLEN, so the register alignment rule does 3496 // not apply. 3497 unsigned TwoXLenInBytes = (2 * XLen) / 8; 3498 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 3499 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 3500 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 3501 // Skip 'odd' register if necessary. 3502 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 3503 State.AllocateReg(ArgGPRs); 3504 } 3505 3506 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 3507 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 3508 State.getPendingArgFlags(); 3509 3510 assert(PendingLocs.size() == PendingArgFlags.size() && 3511 "PendingLocs and PendingArgFlags out of sync"); 3512 3513 // Handle passing f64 on RV32D with a soft float ABI or when floating point 3514 // registers are exhausted. 3515 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 3516 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 3517 "Can't lower f64 if it is split"); 3518 // Depending on available argument GPRS, f64 may be passed in a pair of 3519 // GPRs, split between a GPR and the stack, or passed completely on the 3520 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 3521 // cases. 3522 Register Reg = State.AllocateReg(ArgGPRs); 3523 LocVT = MVT::i32; 3524 if (!Reg) { 3525 unsigned StackOffset = State.AllocateStack(8, Align(8)); 3526 State.addLoc( 3527 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3528 return false; 3529 } 3530 if (!State.AllocateReg(ArgGPRs)) 3531 State.AllocateStack(4, Align(4)); 3532 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3533 return false; 3534 } 3535 3536 // Split arguments might be passed indirectly, so keep track of the pending 3537 // values. 3538 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 3539 LocVT = XLenVT; 3540 LocInfo = CCValAssign::Indirect; 3541 PendingLocs.push_back( 3542 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 3543 PendingArgFlags.push_back(ArgFlags); 3544 if (!ArgFlags.isSplitEnd()) { 3545 return false; 3546 } 3547 } 3548 3549 // If the split argument only had two elements, it should be passed directly 3550 // in registers or on the stack. 3551 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 3552 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 3553 // Apply the normal calling convention rules to the first half of the 3554 // split argument. 3555 CCValAssign VA = PendingLocs[0]; 3556 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 3557 PendingLocs.clear(); 3558 PendingArgFlags.clear(); 3559 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 3560 ArgFlags); 3561 } 3562 3563 // Allocate to a register if possible, or else a stack slot. 3564 Register Reg; 3565 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 3566 Reg = State.AllocateReg(ArgFPR16s); 3567 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 3568 Reg = State.AllocateReg(ArgFPR32s); 3569 else if (ValVT == MVT::f64 && !UseGPRForF64) 3570 Reg = State.AllocateReg(ArgFPR64s); 3571 else if (ValVT.isScalableVector()) { 3572 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 3573 if (RC == &RISCV::VRRegClass) { 3574 // Assign the first mask argument to V0. 3575 // This is an interim calling convention and it may be changed in the 3576 // future. 3577 if (FirstMaskArgument.hasValue() && 3578 ValNo == FirstMaskArgument.getValue()) { 3579 Reg = State.AllocateReg(RISCV::V0); 3580 } else { 3581 Reg = State.AllocateReg(ArgVRs); 3582 } 3583 } else if (RC == &RISCV::VRM2RegClass) { 3584 Reg = State.AllocateReg(ArgVRM2s); 3585 } else if (RC == &RISCV::VRM4RegClass) { 3586 Reg = State.AllocateReg(ArgVRM4s); 3587 } else if (RC == &RISCV::VRM8RegClass) { 3588 Reg = State.AllocateReg(ArgVRM8s); 3589 } else { 3590 llvm_unreachable("Unhandled class register for ValueType"); 3591 } 3592 if (!Reg) { 3593 LocInfo = CCValAssign::Indirect; 3594 // Try using a GPR to pass the address 3595 Reg = State.AllocateReg(ArgGPRs); 3596 LocVT = XLenVT; 3597 } 3598 } else 3599 Reg = State.AllocateReg(ArgGPRs); 3600 unsigned StackOffset = 3601 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 3602 3603 // If we reach this point and PendingLocs is non-empty, we must be at the 3604 // end of a split argument that must be passed indirectly. 3605 if (!PendingLocs.empty()) { 3606 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 3607 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 3608 3609 for (auto &It : PendingLocs) { 3610 if (Reg) 3611 It.convertToReg(Reg); 3612 else 3613 It.convertToMem(StackOffset); 3614 State.addLoc(It); 3615 } 3616 PendingLocs.clear(); 3617 PendingArgFlags.clear(); 3618 return false; 3619 } 3620 3621 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 3622 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 3623 "Expected an XLenVT or scalable vector types at this stage"); 3624 3625 if (Reg) { 3626 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3627 return false; 3628 } 3629 3630 // When a floating-point value is passed on the stack, no bit-conversion is 3631 // needed. 3632 if (ValVT.isFloatingPoint()) { 3633 LocVT = ValVT; 3634 LocInfo = CCValAssign::Full; 3635 } 3636 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3637 return false; 3638 } 3639 3640 template <typename ArgTy> 3641 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 3642 for (const auto &ArgIdx : enumerate(Args)) { 3643 MVT ArgVT = ArgIdx.value().VT; 3644 if (ArgVT.isScalableVector() && 3645 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 3646 return ArgIdx.index(); 3647 } 3648 return None; 3649 } 3650 3651 void RISCVTargetLowering::analyzeInputArgs( 3652 MachineFunction &MF, CCState &CCInfo, 3653 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 3654 unsigned NumArgs = Ins.size(); 3655 FunctionType *FType = MF.getFunction().getFunctionType(); 3656 3657 Optional<unsigned> FirstMaskArgument; 3658 if (Subtarget.hasStdExtV()) 3659 FirstMaskArgument = preAssignMask(Ins); 3660 3661 for (unsigned i = 0; i != NumArgs; ++i) { 3662 MVT ArgVT = Ins[i].VT; 3663 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 3664 3665 Type *ArgTy = nullptr; 3666 if (IsRet) 3667 ArgTy = FType->getReturnType(); 3668 else if (Ins[i].isOrigArg()) 3669 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 3670 3671 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3672 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3673 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 3674 FirstMaskArgument)) { 3675 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 3676 << EVT(ArgVT).getEVTString() << '\n'); 3677 llvm_unreachable(nullptr); 3678 } 3679 } 3680 } 3681 3682 void RISCVTargetLowering::analyzeOutputArgs( 3683 MachineFunction &MF, CCState &CCInfo, 3684 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 3685 CallLoweringInfo *CLI) const { 3686 unsigned NumArgs = Outs.size(); 3687 3688 Optional<unsigned> FirstMaskArgument; 3689 if (Subtarget.hasStdExtV()) 3690 FirstMaskArgument = preAssignMask(Outs); 3691 3692 for (unsigned i = 0; i != NumArgs; i++) { 3693 MVT ArgVT = Outs[i].VT; 3694 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3695 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 3696 3697 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3698 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3699 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 3700 FirstMaskArgument)) { 3701 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 3702 << EVT(ArgVT).getEVTString() << "\n"); 3703 llvm_unreachable(nullptr); 3704 } 3705 } 3706 } 3707 3708 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 3709 // values. 3710 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 3711 const CCValAssign &VA, const SDLoc &DL) { 3712 switch (VA.getLocInfo()) { 3713 default: 3714 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3715 case CCValAssign::Full: 3716 break; 3717 case CCValAssign::BCvt: 3718 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3719 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 3720 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3721 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 3722 else 3723 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 3724 break; 3725 } 3726 return Val; 3727 } 3728 3729 // The caller is responsible for loading the full value if the argument is 3730 // passed with CCValAssign::Indirect. 3731 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 3732 const CCValAssign &VA, const SDLoc &DL, 3733 const RISCVTargetLowering &TLI) { 3734 MachineFunction &MF = DAG.getMachineFunction(); 3735 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3736 EVT LocVT = VA.getLocVT(); 3737 SDValue Val; 3738 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 3739 Register VReg = RegInfo.createVirtualRegister(RC); 3740 RegInfo.addLiveIn(VA.getLocReg(), VReg); 3741 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 3742 3743 if (VA.getLocInfo() == CCValAssign::Indirect) 3744 return Val; 3745 3746 return convertLocVTToValVT(DAG, Val, VA, DL); 3747 } 3748 3749 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 3750 const CCValAssign &VA, const SDLoc &DL) { 3751 EVT LocVT = VA.getLocVT(); 3752 3753 switch (VA.getLocInfo()) { 3754 default: 3755 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3756 case CCValAssign::Full: 3757 break; 3758 case CCValAssign::BCvt: 3759 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3760 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 3761 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3762 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 3763 else 3764 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 3765 break; 3766 } 3767 return Val; 3768 } 3769 3770 // The caller is responsible for loading the full value if the argument is 3771 // passed with CCValAssign::Indirect. 3772 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 3773 const CCValAssign &VA, const SDLoc &DL) { 3774 MachineFunction &MF = DAG.getMachineFunction(); 3775 MachineFrameInfo &MFI = MF.getFrameInfo(); 3776 EVT LocVT = VA.getLocVT(); 3777 EVT ValVT = VA.getValVT(); 3778 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 3779 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 3780 VA.getLocMemOffset(), /*Immutable=*/true); 3781 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3782 SDValue Val; 3783 3784 ISD::LoadExtType ExtType; 3785 switch (VA.getLocInfo()) { 3786 default: 3787 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3788 case CCValAssign::Full: 3789 case CCValAssign::Indirect: 3790 case CCValAssign::BCvt: 3791 ExtType = ISD::NON_EXTLOAD; 3792 break; 3793 } 3794 Val = DAG.getExtLoad( 3795 ExtType, DL, LocVT, Chain, FIN, 3796 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 3797 return Val; 3798 } 3799 3800 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 3801 const CCValAssign &VA, const SDLoc &DL) { 3802 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 3803 "Unexpected VA"); 3804 MachineFunction &MF = DAG.getMachineFunction(); 3805 MachineFrameInfo &MFI = MF.getFrameInfo(); 3806 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3807 3808 if (VA.isMemLoc()) { 3809 // f64 is passed on the stack. 3810 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 3811 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3812 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 3813 MachinePointerInfo::getFixedStack(MF, FI)); 3814 } 3815 3816 assert(VA.isRegLoc() && "Expected register VA assignment"); 3817 3818 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3819 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 3820 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 3821 SDValue Hi; 3822 if (VA.getLocReg() == RISCV::X17) { 3823 // Second half of f64 is passed on the stack. 3824 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 3825 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3826 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 3827 MachinePointerInfo::getFixedStack(MF, FI)); 3828 } else { 3829 // Second half of f64 is passed in another GPR. 3830 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3831 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 3832 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 3833 } 3834 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 3835 } 3836 3837 // FastCC has less than 1% performance improvement for some particular 3838 // benchmark. But theoretically, it may has benenfit for some cases. 3839 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 3840 CCValAssign::LocInfo LocInfo, 3841 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3842 3843 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3844 // X5 and X6 might be used for save-restore libcall. 3845 static const MCPhysReg GPRList[] = { 3846 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 3847 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 3848 RISCV::X29, RISCV::X30, RISCV::X31}; 3849 if (unsigned Reg = State.AllocateReg(GPRList)) { 3850 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3851 return false; 3852 } 3853 } 3854 3855 if (LocVT == MVT::f16) { 3856 static const MCPhysReg FPR16List[] = { 3857 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 3858 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 3859 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 3860 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 3861 if (unsigned Reg = State.AllocateReg(FPR16List)) { 3862 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3863 return false; 3864 } 3865 } 3866 3867 if (LocVT == MVT::f32) { 3868 static const MCPhysReg FPR32List[] = { 3869 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 3870 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 3871 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 3872 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 3873 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3874 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3875 return false; 3876 } 3877 } 3878 3879 if (LocVT == MVT::f64) { 3880 static const MCPhysReg FPR64List[] = { 3881 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 3882 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 3883 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 3884 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 3885 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3886 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3887 return false; 3888 } 3889 } 3890 3891 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 3892 unsigned Offset4 = State.AllocateStack(4, Align(4)); 3893 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 3894 return false; 3895 } 3896 3897 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 3898 unsigned Offset5 = State.AllocateStack(8, Align(8)); 3899 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 3900 return false; 3901 } 3902 3903 return true; // CC didn't match. 3904 } 3905 3906 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 3907 CCValAssign::LocInfo LocInfo, 3908 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3909 3910 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3911 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 3912 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 3913 static const MCPhysReg GPRList[] = { 3914 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 3915 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 3916 if (unsigned Reg = State.AllocateReg(GPRList)) { 3917 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3918 return false; 3919 } 3920 } 3921 3922 if (LocVT == MVT::f32) { 3923 // Pass in STG registers: F1, ..., F6 3924 // fs0 ... fs5 3925 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 3926 RISCV::F18_F, RISCV::F19_F, 3927 RISCV::F20_F, RISCV::F21_F}; 3928 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3929 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3930 return false; 3931 } 3932 } 3933 3934 if (LocVT == MVT::f64) { 3935 // Pass in STG registers: D1, ..., D6 3936 // fs6 ... fs11 3937 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 3938 RISCV::F24_D, RISCV::F25_D, 3939 RISCV::F26_D, RISCV::F27_D}; 3940 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3941 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3942 return false; 3943 } 3944 } 3945 3946 report_fatal_error("No registers left in GHC calling convention"); 3947 return true; 3948 } 3949 3950 // Transform physical registers into virtual registers. 3951 SDValue RISCVTargetLowering::LowerFormalArguments( 3952 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 3953 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 3954 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3955 3956 MachineFunction &MF = DAG.getMachineFunction(); 3957 3958 switch (CallConv) { 3959 default: 3960 report_fatal_error("Unsupported calling convention"); 3961 case CallingConv::C: 3962 case CallingConv::Fast: 3963 break; 3964 case CallingConv::GHC: 3965 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 3966 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 3967 report_fatal_error( 3968 "GHC calling convention requires the F and D instruction set extensions"); 3969 } 3970 3971 const Function &Func = MF.getFunction(); 3972 if (Func.hasFnAttribute("interrupt")) { 3973 if (!Func.arg_empty()) 3974 report_fatal_error( 3975 "Functions with the interrupt attribute cannot have arguments!"); 3976 3977 StringRef Kind = 3978 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 3979 3980 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 3981 report_fatal_error( 3982 "Function interrupt attribute argument not supported!"); 3983 } 3984 3985 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3986 MVT XLenVT = Subtarget.getXLenVT(); 3987 unsigned XLenInBytes = Subtarget.getXLen() / 8; 3988 // Used with vargs to acumulate store chains. 3989 std::vector<SDValue> OutChains; 3990 3991 // Assign locations to all of the incoming arguments. 3992 SmallVector<CCValAssign, 16> ArgLocs; 3993 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3994 3995 if (CallConv == CallingConv::Fast) 3996 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 3997 else if (CallConv == CallingConv::GHC) 3998 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 3999 else 4000 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 4001 4002 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 4003 CCValAssign &VA = ArgLocs[i]; 4004 SDValue ArgValue; 4005 // Passing f64 on RV32D with a soft float ABI must be handled as a special 4006 // case. 4007 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 4008 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 4009 else if (VA.isRegLoc()) 4010 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 4011 else 4012 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 4013 4014 if (VA.getLocInfo() == CCValAssign::Indirect) { 4015 // If the original argument was split and passed by reference (e.g. i128 4016 // on RV32), we need to load all parts of it here (using the same 4017 // address). 4018 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 4019 MachinePointerInfo())); 4020 unsigned ArgIndex = Ins[i].OrigArgIndex; 4021 assert(Ins[i].PartOffset == 0); 4022 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 4023 CCValAssign &PartVA = ArgLocs[i + 1]; 4024 unsigned PartOffset = Ins[i + 1].PartOffset; 4025 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 4026 DAG.getIntPtrConstant(PartOffset, DL)); 4027 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 4028 MachinePointerInfo())); 4029 ++i; 4030 } 4031 continue; 4032 } 4033 InVals.push_back(ArgValue); 4034 } 4035 4036 if (IsVarArg) { 4037 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 4038 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 4039 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 4040 MachineFrameInfo &MFI = MF.getFrameInfo(); 4041 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4042 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 4043 4044 // Offset of the first variable argument from stack pointer, and size of 4045 // the vararg save area. For now, the varargs save area is either zero or 4046 // large enough to hold a0-a7. 4047 int VaArgOffset, VarArgsSaveSize; 4048 4049 // If all registers are allocated, then all varargs must be passed on the 4050 // stack and we don't need to save any argregs. 4051 if (ArgRegs.size() == Idx) { 4052 VaArgOffset = CCInfo.getNextStackOffset(); 4053 VarArgsSaveSize = 0; 4054 } else { 4055 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 4056 VaArgOffset = -VarArgsSaveSize; 4057 } 4058 4059 // Record the frame index of the first variable argument 4060 // which is a value necessary to VASTART. 4061 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 4062 RVFI->setVarArgsFrameIndex(FI); 4063 4064 // If saving an odd number of registers then create an extra stack slot to 4065 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 4066 // offsets to even-numbered registered remain 2*XLEN-aligned. 4067 if (Idx % 2) { 4068 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 4069 VarArgsSaveSize += XLenInBytes; 4070 } 4071 4072 // Copy the integer registers that may have been used for passing varargs 4073 // to the vararg save area. 4074 for (unsigned I = Idx; I < ArgRegs.size(); 4075 ++I, VaArgOffset += XLenInBytes) { 4076 const Register Reg = RegInfo.createVirtualRegister(RC); 4077 RegInfo.addLiveIn(ArgRegs[I], Reg); 4078 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 4079 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 4080 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 4081 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 4082 MachinePointerInfo::getFixedStack(MF, FI)); 4083 cast<StoreSDNode>(Store.getNode()) 4084 ->getMemOperand() 4085 ->setValue((Value *)nullptr); 4086 OutChains.push_back(Store); 4087 } 4088 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 4089 } 4090 4091 // All stores are grouped in one node to allow the matching between 4092 // the size of Ins and InVals. This only happens for vararg functions. 4093 if (!OutChains.empty()) { 4094 OutChains.push_back(Chain); 4095 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 4096 } 4097 4098 return Chain; 4099 } 4100 4101 /// isEligibleForTailCallOptimization - Check whether the call is eligible 4102 /// for tail call optimization. 4103 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 4104 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 4105 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 4106 const SmallVector<CCValAssign, 16> &ArgLocs) const { 4107 4108 auto &Callee = CLI.Callee; 4109 auto CalleeCC = CLI.CallConv; 4110 auto &Outs = CLI.Outs; 4111 auto &Caller = MF.getFunction(); 4112 auto CallerCC = Caller.getCallingConv(); 4113 4114 // Exception-handling functions need a special set of instructions to 4115 // indicate a return to the hardware. Tail-calling another function would 4116 // probably break this. 4117 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 4118 // should be expanded as new function attributes are introduced. 4119 if (Caller.hasFnAttribute("interrupt")) 4120 return false; 4121 4122 // Do not tail call opt if the stack is used to pass parameters. 4123 if (CCInfo.getNextStackOffset() != 0) 4124 return false; 4125 4126 // Do not tail call opt if any parameters need to be passed indirectly. 4127 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 4128 // passed indirectly. So the address of the value will be passed in a 4129 // register, or if not available, then the address is put on the stack. In 4130 // order to pass indirectly, space on the stack often needs to be allocated 4131 // in order to store the value. In this case the CCInfo.getNextStackOffset() 4132 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 4133 // are passed CCValAssign::Indirect. 4134 for (auto &VA : ArgLocs) 4135 if (VA.getLocInfo() == CCValAssign::Indirect) 4136 return false; 4137 4138 // Do not tail call opt if either caller or callee uses struct return 4139 // semantics. 4140 auto IsCallerStructRet = Caller.hasStructRetAttr(); 4141 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 4142 if (IsCallerStructRet || IsCalleeStructRet) 4143 return false; 4144 4145 // Externally-defined functions with weak linkage should not be 4146 // tail-called. The behaviour of branch instructions in this situation (as 4147 // used for tail calls) is implementation-defined, so we cannot rely on the 4148 // linker replacing the tail call with a return. 4149 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4150 const GlobalValue *GV = G->getGlobal(); 4151 if (GV->hasExternalWeakLinkage()) 4152 return false; 4153 } 4154 4155 // The callee has to preserve all registers the caller needs to preserve. 4156 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4157 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 4158 if (CalleeCC != CallerCC) { 4159 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 4160 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 4161 return false; 4162 } 4163 4164 // Byval parameters hand the function a pointer directly into the stack area 4165 // we want to reuse during a tail call. Working around this *is* possible 4166 // but less efficient and uglier in LowerCall. 4167 for (auto &Arg : Outs) 4168 if (Arg.Flags.isByVal()) 4169 return false; 4170 4171 return true; 4172 } 4173 4174 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 4175 // and output parameter nodes. 4176 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 4177 SmallVectorImpl<SDValue> &InVals) const { 4178 SelectionDAG &DAG = CLI.DAG; 4179 SDLoc &DL = CLI.DL; 4180 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4181 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4182 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4183 SDValue Chain = CLI.Chain; 4184 SDValue Callee = CLI.Callee; 4185 bool &IsTailCall = CLI.IsTailCall; 4186 CallingConv::ID CallConv = CLI.CallConv; 4187 bool IsVarArg = CLI.IsVarArg; 4188 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4189 MVT XLenVT = Subtarget.getXLenVT(); 4190 4191 MachineFunction &MF = DAG.getMachineFunction(); 4192 4193 // Analyze the operands of the call, assigning locations to each operand. 4194 SmallVector<CCValAssign, 16> ArgLocs; 4195 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 4196 4197 if (CallConv == CallingConv::Fast) 4198 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 4199 else if (CallConv == CallingConv::GHC) 4200 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 4201 else 4202 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 4203 4204 // Check if it's really possible to do a tail call. 4205 if (IsTailCall) 4206 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 4207 4208 if (IsTailCall) 4209 ++NumTailCalls; 4210 else if (CLI.CB && CLI.CB->isMustTailCall()) 4211 report_fatal_error("failed to perform tail call elimination on a call " 4212 "site marked musttail"); 4213 4214 // Get a count of how many bytes are to be pushed on the stack. 4215 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 4216 4217 // Create local copies for byval args 4218 SmallVector<SDValue, 8> ByValArgs; 4219 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4220 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4221 if (!Flags.isByVal()) 4222 continue; 4223 4224 SDValue Arg = OutVals[i]; 4225 unsigned Size = Flags.getByValSize(); 4226 Align Alignment = Flags.getNonZeroByValAlign(); 4227 4228 int FI = 4229 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 4230 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 4231 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 4232 4233 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 4234 /*IsVolatile=*/false, 4235 /*AlwaysInline=*/false, IsTailCall, 4236 MachinePointerInfo(), MachinePointerInfo()); 4237 ByValArgs.push_back(FIPtr); 4238 } 4239 4240 if (!IsTailCall) 4241 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 4242 4243 // Copy argument values to their designated locations. 4244 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 4245 SmallVector<SDValue, 8> MemOpChains; 4246 SDValue StackPtr; 4247 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 4248 CCValAssign &VA = ArgLocs[i]; 4249 SDValue ArgValue = OutVals[i]; 4250 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4251 4252 // Handle passing f64 on RV32D with a soft float ABI as a special case. 4253 bool IsF64OnRV32DSoftABI = 4254 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 4255 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 4256 SDValue SplitF64 = DAG.getNode( 4257 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 4258 SDValue Lo = SplitF64.getValue(0); 4259 SDValue Hi = SplitF64.getValue(1); 4260 4261 Register RegLo = VA.getLocReg(); 4262 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 4263 4264 if (RegLo == RISCV::X17) { 4265 // Second half of f64 is passed on the stack. 4266 // Work out the address of the stack slot. 4267 if (!StackPtr.getNode()) 4268 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 4269 // Emit the store. 4270 MemOpChains.push_back( 4271 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 4272 } else { 4273 // Second half of f64 is passed in another GPR. 4274 assert(RegLo < RISCV::X31 && "Invalid register pair"); 4275 Register RegHigh = RegLo + 1; 4276 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 4277 } 4278 continue; 4279 } 4280 4281 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 4282 // as any other MemLoc. 4283 4284 // Promote the value if needed. 4285 // For now, only handle fully promoted and indirect arguments. 4286 if (VA.getLocInfo() == CCValAssign::Indirect) { 4287 // Store the argument in a stack slot and pass its address. 4288 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 4289 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 4290 MemOpChains.push_back( 4291 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 4292 MachinePointerInfo::getFixedStack(MF, FI))); 4293 // If the original argument was split (e.g. i128), we need 4294 // to store all parts of it here (and pass just one address). 4295 unsigned ArgIndex = Outs[i].OrigArgIndex; 4296 assert(Outs[i].PartOffset == 0); 4297 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 4298 SDValue PartValue = OutVals[i + 1]; 4299 unsigned PartOffset = Outs[i + 1].PartOffset; 4300 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 4301 DAG.getIntPtrConstant(PartOffset, DL)); 4302 MemOpChains.push_back( 4303 DAG.getStore(Chain, DL, PartValue, Address, 4304 MachinePointerInfo::getFixedStack(MF, FI))); 4305 ++i; 4306 } 4307 ArgValue = SpillSlot; 4308 } else { 4309 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 4310 } 4311 4312 // Use local copy if it is a byval arg. 4313 if (Flags.isByVal()) 4314 ArgValue = ByValArgs[j++]; 4315 4316 if (VA.isRegLoc()) { 4317 // Queue up the argument copies and emit them at the end. 4318 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 4319 } else { 4320 assert(VA.isMemLoc() && "Argument not register or memory"); 4321 assert(!IsTailCall && "Tail call not allowed if stack is used " 4322 "for passing parameters"); 4323 4324 // Work out the address of the stack slot. 4325 if (!StackPtr.getNode()) 4326 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 4327 SDValue Address = 4328 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 4329 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 4330 4331 // Emit the store. 4332 MemOpChains.push_back( 4333 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 4334 } 4335 } 4336 4337 // Join the stores, which are independent of one another. 4338 if (!MemOpChains.empty()) 4339 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 4340 4341 SDValue Glue; 4342 4343 // Build a sequence of copy-to-reg nodes, chained and glued together. 4344 for (auto &Reg : RegsToPass) { 4345 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 4346 Glue = Chain.getValue(1); 4347 } 4348 4349 // Validate that none of the argument registers have been marked as 4350 // reserved, if so report an error. Do the same for the return address if this 4351 // is not a tailcall. 4352 validateCCReservedRegs(RegsToPass, MF); 4353 if (!IsTailCall && 4354 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 4355 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4356 MF.getFunction(), 4357 "Return address register required, but has been reserved."}); 4358 4359 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 4360 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 4361 // split it and then direct call can be matched by PseudoCALL. 4362 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 4363 const GlobalValue *GV = S->getGlobal(); 4364 4365 unsigned OpFlags = RISCVII::MO_CALL; 4366 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 4367 OpFlags = RISCVII::MO_PLT; 4368 4369 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 4370 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4371 unsigned OpFlags = RISCVII::MO_CALL; 4372 4373 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 4374 nullptr)) 4375 OpFlags = RISCVII::MO_PLT; 4376 4377 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 4378 } 4379 4380 // The first call operand is the chain and the second is the target address. 4381 SmallVector<SDValue, 8> Ops; 4382 Ops.push_back(Chain); 4383 Ops.push_back(Callee); 4384 4385 // Add argument registers to the end of the list so that they are 4386 // known live into the call. 4387 for (auto &Reg : RegsToPass) 4388 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 4389 4390 if (!IsTailCall) { 4391 // Add a register mask operand representing the call-preserved registers. 4392 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4393 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 4394 assert(Mask && "Missing call preserved mask for calling convention"); 4395 Ops.push_back(DAG.getRegisterMask(Mask)); 4396 } 4397 4398 // Glue the call to the argument copies, if any. 4399 if (Glue.getNode()) 4400 Ops.push_back(Glue); 4401 4402 // Emit the call. 4403 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 4404 4405 if (IsTailCall) { 4406 MF.getFrameInfo().setHasTailCall(); 4407 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 4408 } 4409 4410 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 4411 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 4412 Glue = Chain.getValue(1); 4413 4414 // Mark the end of the call, which is glued to the call itself. 4415 Chain = DAG.getCALLSEQ_END(Chain, 4416 DAG.getConstant(NumBytes, DL, PtrVT, true), 4417 DAG.getConstant(0, DL, PtrVT, true), 4418 Glue, DL); 4419 Glue = Chain.getValue(1); 4420 4421 // Assign locations to each value returned by this call. 4422 SmallVector<CCValAssign, 16> RVLocs; 4423 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 4424 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 4425 4426 // Copy all of the result registers out of their specified physreg. 4427 for (auto &VA : RVLocs) { 4428 // Copy the value out 4429 SDValue RetValue = 4430 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 4431 // Glue the RetValue to the end of the call sequence 4432 Chain = RetValue.getValue(1); 4433 Glue = RetValue.getValue(2); 4434 4435 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 4436 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 4437 SDValue RetValue2 = 4438 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 4439 Chain = RetValue2.getValue(1); 4440 Glue = RetValue2.getValue(2); 4441 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 4442 RetValue2); 4443 } 4444 4445 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 4446 4447 InVals.push_back(RetValue); 4448 } 4449 4450 return Chain; 4451 } 4452 4453 bool RISCVTargetLowering::CanLowerReturn( 4454 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 4455 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 4456 SmallVector<CCValAssign, 16> RVLocs; 4457 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 4458 4459 Optional<unsigned> FirstMaskArgument; 4460 if (Subtarget.hasStdExtV()) 4461 FirstMaskArgument = preAssignMask(Outs); 4462 4463 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 4464 MVT VT = Outs[i].VT; 4465 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4466 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 4467 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 4468 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 4469 *this, FirstMaskArgument)) 4470 return false; 4471 } 4472 return true; 4473 } 4474 4475 SDValue 4476 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 4477 bool IsVarArg, 4478 const SmallVectorImpl<ISD::OutputArg> &Outs, 4479 const SmallVectorImpl<SDValue> &OutVals, 4480 const SDLoc &DL, SelectionDAG &DAG) const { 4481 const MachineFunction &MF = DAG.getMachineFunction(); 4482 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 4483 4484 // Stores the assignment of the return value to a location. 4485 SmallVector<CCValAssign, 16> RVLocs; 4486 4487 // Info about the registers and stack slot. 4488 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 4489 *DAG.getContext()); 4490 4491 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 4492 nullptr); 4493 4494 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 4495 report_fatal_error("GHC functions return void only"); 4496 4497 SDValue Glue; 4498 SmallVector<SDValue, 4> RetOps(1, Chain); 4499 4500 // Copy the result values into the output registers. 4501 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 4502 SDValue Val = OutVals[i]; 4503 CCValAssign &VA = RVLocs[i]; 4504 assert(VA.isRegLoc() && "Can only return in registers!"); 4505 4506 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 4507 // Handle returning f64 on RV32D with a soft float ABI. 4508 assert(VA.isRegLoc() && "Expected return via registers"); 4509 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 4510 DAG.getVTList(MVT::i32, MVT::i32), Val); 4511 SDValue Lo = SplitF64.getValue(0); 4512 SDValue Hi = SplitF64.getValue(1); 4513 Register RegLo = VA.getLocReg(); 4514 assert(RegLo < RISCV::X31 && "Invalid register pair"); 4515 Register RegHi = RegLo + 1; 4516 4517 if (STI.isRegisterReservedByUser(RegLo) || 4518 STI.isRegisterReservedByUser(RegHi)) 4519 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4520 MF.getFunction(), 4521 "Return value register required, but has been reserved."}); 4522 4523 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 4524 Glue = Chain.getValue(1); 4525 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 4526 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 4527 Glue = Chain.getValue(1); 4528 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 4529 } else { 4530 // Handle a 'normal' return. 4531 Val = convertValVTToLocVT(DAG, Val, VA, DL); 4532 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 4533 4534 if (STI.isRegisterReservedByUser(VA.getLocReg())) 4535 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 4536 MF.getFunction(), 4537 "Return value register required, but has been reserved."}); 4538 4539 // Guarantee that all emitted copies are stuck together. 4540 Glue = Chain.getValue(1); 4541 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4542 } 4543 } 4544 4545 RetOps[0] = Chain; // Update chain. 4546 4547 // Add the glue node if we have it. 4548 if (Glue.getNode()) { 4549 RetOps.push_back(Glue); 4550 } 4551 4552 // Interrupt service routines use different return instructions. 4553 const Function &Func = DAG.getMachineFunction().getFunction(); 4554 if (Func.hasFnAttribute("interrupt")) { 4555 if (!Func.getReturnType()->isVoidTy()) 4556 report_fatal_error( 4557 "Functions with the interrupt attribute must have void return type!"); 4558 4559 MachineFunction &MF = DAG.getMachineFunction(); 4560 StringRef Kind = 4561 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 4562 4563 unsigned RetOpc; 4564 if (Kind == "user") 4565 RetOpc = RISCVISD::URET_FLAG; 4566 else if (Kind == "supervisor") 4567 RetOpc = RISCVISD::SRET_FLAG; 4568 else 4569 RetOpc = RISCVISD::MRET_FLAG; 4570 4571 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 4572 } 4573 4574 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 4575 } 4576 4577 void RISCVTargetLowering::validateCCReservedRegs( 4578 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 4579 MachineFunction &MF) const { 4580 const Function &F = MF.getFunction(); 4581 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 4582 4583 if (llvm::any_of(Regs, [&STI](auto Reg) { 4584 return STI.isRegisterReservedByUser(Reg.first); 4585 })) 4586 F.getContext().diagnose(DiagnosticInfoUnsupported{ 4587 F, "Argument register required, but has been reserved."}); 4588 } 4589 4590 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 4591 return CI->isTailCall(); 4592 } 4593 4594 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 4595 #define NODE_NAME_CASE(NODE) \ 4596 case RISCVISD::NODE: \ 4597 return "RISCVISD::" #NODE; 4598 // clang-format off 4599 switch ((RISCVISD::NodeType)Opcode) { 4600 case RISCVISD::FIRST_NUMBER: 4601 break; 4602 NODE_NAME_CASE(RET_FLAG) 4603 NODE_NAME_CASE(URET_FLAG) 4604 NODE_NAME_CASE(SRET_FLAG) 4605 NODE_NAME_CASE(MRET_FLAG) 4606 NODE_NAME_CASE(CALL) 4607 NODE_NAME_CASE(SELECT_CC) 4608 NODE_NAME_CASE(BuildPairF64) 4609 NODE_NAME_CASE(SplitF64) 4610 NODE_NAME_CASE(TAIL) 4611 NODE_NAME_CASE(SLLW) 4612 NODE_NAME_CASE(SRAW) 4613 NODE_NAME_CASE(SRLW) 4614 NODE_NAME_CASE(DIVW) 4615 NODE_NAME_CASE(DIVUW) 4616 NODE_NAME_CASE(REMUW) 4617 NODE_NAME_CASE(ROLW) 4618 NODE_NAME_CASE(RORW) 4619 NODE_NAME_CASE(FSLW) 4620 NODE_NAME_CASE(FSRW) 4621 NODE_NAME_CASE(FSL) 4622 NODE_NAME_CASE(FSR) 4623 NODE_NAME_CASE(FMV_H_X) 4624 NODE_NAME_CASE(FMV_X_ANYEXTH) 4625 NODE_NAME_CASE(FMV_W_X_RV64) 4626 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 4627 NODE_NAME_CASE(READ_CYCLE_WIDE) 4628 NODE_NAME_CASE(GREVI) 4629 NODE_NAME_CASE(GREVIW) 4630 NODE_NAME_CASE(GORCI) 4631 NODE_NAME_CASE(GORCIW) 4632 NODE_NAME_CASE(VMV_V_X_VL) 4633 NODE_NAME_CASE(VFMV_V_F_VL) 4634 NODE_NAME_CASE(VMV_X_S) 4635 NODE_NAME_CASE(SPLAT_VECTOR_I64) 4636 NODE_NAME_CASE(READ_VLENB) 4637 NODE_NAME_CASE(TRUNCATE_VECTOR) 4638 NODE_NAME_CASE(VLEFF) 4639 NODE_NAME_CASE(VLEFF_MASK) 4640 NODE_NAME_CASE(VSLIDEUP) 4641 NODE_NAME_CASE(VSLIDEDOWN) 4642 NODE_NAME_CASE(VID_VL) 4643 NODE_NAME_CASE(VFNCVT_ROD) 4644 NODE_NAME_CASE(VECREDUCE_ADD) 4645 NODE_NAME_CASE(VECREDUCE_UMAX) 4646 NODE_NAME_CASE(VECREDUCE_SMAX) 4647 NODE_NAME_CASE(VECREDUCE_UMIN) 4648 NODE_NAME_CASE(VECREDUCE_SMIN) 4649 NODE_NAME_CASE(VECREDUCE_AND) 4650 NODE_NAME_CASE(VECREDUCE_OR) 4651 NODE_NAME_CASE(VECREDUCE_XOR) 4652 NODE_NAME_CASE(VECREDUCE_FADD) 4653 NODE_NAME_CASE(VECREDUCE_SEQ_FADD) 4654 NODE_NAME_CASE(ADD_VL) 4655 NODE_NAME_CASE(AND_VL) 4656 NODE_NAME_CASE(MUL_VL) 4657 NODE_NAME_CASE(OR_VL) 4658 NODE_NAME_CASE(SDIV_VL) 4659 NODE_NAME_CASE(SHL_VL) 4660 NODE_NAME_CASE(SREM_VL) 4661 NODE_NAME_CASE(SRA_VL) 4662 NODE_NAME_CASE(SRL_VL) 4663 NODE_NAME_CASE(SUB_VL) 4664 NODE_NAME_CASE(UDIV_VL) 4665 NODE_NAME_CASE(UREM_VL) 4666 NODE_NAME_CASE(XOR_VL) 4667 NODE_NAME_CASE(FADD_VL) 4668 NODE_NAME_CASE(FSUB_VL) 4669 NODE_NAME_CASE(FMUL_VL) 4670 NODE_NAME_CASE(FDIV_VL) 4671 NODE_NAME_CASE(FNEG_VL) 4672 NODE_NAME_CASE(FMA_VL) 4673 NODE_NAME_CASE(VMCLR_VL) 4674 NODE_NAME_CASE(VMSET_VL) 4675 NODE_NAME_CASE(VRGATHER_VX_VL) 4676 NODE_NAME_CASE(VLE_VL) 4677 NODE_NAME_CASE(VSE_VL) 4678 } 4679 // clang-format on 4680 return nullptr; 4681 #undef NODE_NAME_CASE 4682 } 4683 4684 /// getConstraintType - Given a constraint letter, return the type of 4685 /// constraint it is for this target. 4686 RISCVTargetLowering::ConstraintType 4687 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 4688 if (Constraint.size() == 1) { 4689 switch (Constraint[0]) { 4690 default: 4691 break; 4692 case 'f': 4693 return C_RegisterClass; 4694 case 'I': 4695 case 'J': 4696 case 'K': 4697 return C_Immediate; 4698 case 'A': 4699 return C_Memory; 4700 } 4701 } 4702 return TargetLowering::getConstraintType(Constraint); 4703 } 4704 4705 std::pair<unsigned, const TargetRegisterClass *> 4706 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 4707 StringRef Constraint, 4708 MVT VT) const { 4709 // First, see if this is a constraint that directly corresponds to a 4710 // RISCV register class. 4711 if (Constraint.size() == 1) { 4712 switch (Constraint[0]) { 4713 case 'r': 4714 return std::make_pair(0U, &RISCV::GPRRegClass); 4715 case 'f': 4716 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 4717 return std::make_pair(0U, &RISCV::FPR16RegClass); 4718 if (Subtarget.hasStdExtF() && VT == MVT::f32) 4719 return std::make_pair(0U, &RISCV::FPR32RegClass); 4720 if (Subtarget.hasStdExtD() && VT == MVT::f64) 4721 return std::make_pair(0U, &RISCV::FPR64RegClass); 4722 break; 4723 default: 4724 break; 4725 } 4726 } 4727 4728 // Clang will correctly decode the usage of register name aliases into their 4729 // official names. However, other frontends like `rustc` do not. This allows 4730 // users of these frontends to use the ABI names for registers in LLVM-style 4731 // register constraints. 4732 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 4733 .Case("{zero}", RISCV::X0) 4734 .Case("{ra}", RISCV::X1) 4735 .Case("{sp}", RISCV::X2) 4736 .Case("{gp}", RISCV::X3) 4737 .Case("{tp}", RISCV::X4) 4738 .Case("{t0}", RISCV::X5) 4739 .Case("{t1}", RISCV::X6) 4740 .Case("{t2}", RISCV::X7) 4741 .Cases("{s0}", "{fp}", RISCV::X8) 4742 .Case("{s1}", RISCV::X9) 4743 .Case("{a0}", RISCV::X10) 4744 .Case("{a1}", RISCV::X11) 4745 .Case("{a2}", RISCV::X12) 4746 .Case("{a3}", RISCV::X13) 4747 .Case("{a4}", RISCV::X14) 4748 .Case("{a5}", RISCV::X15) 4749 .Case("{a6}", RISCV::X16) 4750 .Case("{a7}", RISCV::X17) 4751 .Case("{s2}", RISCV::X18) 4752 .Case("{s3}", RISCV::X19) 4753 .Case("{s4}", RISCV::X20) 4754 .Case("{s5}", RISCV::X21) 4755 .Case("{s6}", RISCV::X22) 4756 .Case("{s7}", RISCV::X23) 4757 .Case("{s8}", RISCV::X24) 4758 .Case("{s9}", RISCV::X25) 4759 .Case("{s10}", RISCV::X26) 4760 .Case("{s11}", RISCV::X27) 4761 .Case("{t3}", RISCV::X28) 4762 .Case("{t4}", RISCV::X29) 4763 .Case("{t5}", RISCV::X30) 4764 .Case("{t6}", RISCV::X31) 4765 .Default(RISCV::NoRegister); 4766 if (XRegFromAlias != RISCV::NoRegister) 4767 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 4768 4769 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 4770 // TableGen record rather than the AsmName to choose registers for InlineAsm 4771 // constraints, plus we want to match those names to the widest floating point 4772 // register type available, manually select floating point registers here. 4773 // 4774 // The second case is the ABI name of the register, so that frontends can also 4775 // use the ABI names in register constraint lists. 4776 if (Subtarget.hasStdExtF()) { 4777 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 4778 .Cases("{f0}", "{ft0}", RISCV::F0_F) 4779 .Cases("{f1}", "{ft1}", RISCV::F1_F) 4780 .Cases("{f2}", "{ft2}", RISCV::F2_F) 4781 .Cases("{f3}", "{ft3}", RISCV::F3_F) 4782 .Cases("{f4}", "{ft4}", RISCV::F4_F) 4783 .Cases("{f5}", "{ft5}", RISCV::F5_F) 4784 .Cases("{f6}", "{ft6}", RISCV::F6_F) 4785 .Cases("{f7}", "{ft7}", RISCV::F7_F) 4786 .Cases("{f8}", "{fs0}", RISCV::F8_F) 4787 .Cases("{f9}", "{fs1}", RISCV::F9_F) 4788 .Cases("{f10}", "{fa0}", RISCV::F10_F) 4789 .Cases("{f11}", "{fa1}", RISCV::F11_F) 4790 .Cases("{f12}", "{fa2}", RISCV::F12_F) 4791 .Cases("{f13}", "{fa3}", RISCV::F13_F) 4792 .Cases("{f14}", "{fa4}", RISCV::F14_F) 4793 .Cases("{f15}", "{fa5}", RISCV::F15_F) 4794 .Cases("{f16}", "{fa6}", RISCV::F16_F) 4795 .Cases("{f17}", "{fa7}", RISCV::F17_F) 4796 .Cases("{f18}", "{fs2}", RISCV::F18_F) 4797 .Cases("{f19}", "{fs3}", RISCV::F19_F) 4798 .Cases("{f20}", "{fs4}", RISCV::F20_F) 4799 .Cases("{f21}", "{fs5}", RISCV::F21_F) 4800 .Cases("{f22}", "{fs6}", RISCV::F22_F) 4801 .Cases("{f23}", "{fs7}", RISCV::F23_F) 4802 .Cases("{f24}", "{fs8}", RISCV::F24_F) 4803 .Cases("{f25}", "{fs9}", RISCV::F25_F) 4804 .Cases("{f26}", "{fs10}", RISCV::F26_F) 4805 .Cases("{f27}", "{fs11}", RISCV::F27_F) 4806 .Cases("{f28}", "{ft8}", RISCV::F28_F) 4807 .Cases("{f29}", "{ft9}", RISCV::F29_F) 4808 .Cases("{f30}", "{ft10}", RISCV::F30_F) 4809 .Cases("{f31}", "{ft11}", RISCV::F31_F) 4810 .Default(RISCV::NoRegister); 4811 if (FReg != RISCV::NoRegister) { 4812 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 4813 if (Subtarget.hasStdExtD()) { 4814 unsigned RegNo = FReg - RISCV::F0_F; 4815 unsigned DReg = RISCV::F0_D + RegNo; 4816 return std::make_pair(DReg, &RISCV::FPR64RegClass); 4817 } 4818 return std::make_pair(FReg, &RISCV::FPR32RegClass); 4819 } 4820 } 4821 4822 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 4823 } 4824 4825 unsigned 4826 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 4827 // Currently only support length 1 constraints. 4828 if (ConstraintCode.size() == 1) { 4829 switch (ConstraintCode[0]) { 4830 case 'A': 4831 return InlineAsm::Constraint_A; 4832 default: 4833 break; 4834 } 4835 } 4836 4837 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 4838 } 4839 4840 void RISCVTargetLowering::LowerAsmOperandForConstraint( 4841 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 4842 SelectionDAG &DAG) const { 4843 // Currently only support length 1 constraints. 4844 if (Constraint.length() == 1) { 4845 switch (Constraint[0]) { 4846 case 'I': 4847 // Validate & create a 12-bit signed immediate operand. 4848 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4849 uint64_t CVal = C->getSExtValue(); 4850 if (isInt<12>(CVal)) 4851 Ops.push_back( 4852 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4853 } 4854 return; 4855 case 'J': 4856 // Validate & create an integer zero operand. 4857 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 4858 if (C->getZExtValue() == 0) 4859 Ops.push_back( 4860 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 4861 return; 4862 case 'K': 4863 // Validate & create a 5-bit unsigned immediate operand. 4864 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4865 uint64_t CVal = C->getZExtValue(); 4866 if (isUInt<5>(CVal)) 4867 Ops.push_back( 4868 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4869 } 4870 return; 4871 default: 4872 break; 4873 } 4874 } 4875 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 4876 } 4877 4878 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 4879 Instruction *Inst, 4880 AtomicOrdering Ord) const { 4881 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 4882 return Builder.CreateFence(Ord); 4883 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 4884 return Builder.CreateFence(AtomicOrdering::Release); 4885 return nullptr; 4886 } 4887 4888 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 4889 Instruction *Inst, 4890 AtomicOrdering Ord) const { 4891 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 4892 return Builder.CreateFence(AtomicOrdering::Acquire); 4893 return nullptr; 4894 } 4895 4896 TargetLowering::AtomicExpansionKind 4897 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 4898 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 4899 // point operations can't be used in an lr/sc sequence without breaking the 4900 // forward-progress guarantee. 4901 if (AI->isFloatingPointOperation()) 4902 return AtomicExpansionKind::CmpXChg; 4903 4904 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 4905 if (Size == 8 || Size == 16) 4906 return AtomicExpansionKind::MaskedIntrinsic; 4907 return AtomicExpansionKind::None; 4908 } 4909 4910 static Intrinsic::ID 4911 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 4912 if (XLen == 32) { 4913 switch (BinOp) { 4914 default: 4915 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4916 case AtomicRMWInst::Xchg: 4917 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 4918 case AtomicRMWInst::Add: 4919 return Intrinsic::riscv_masked_atomicrmw_add_i32; 4920 case AtomicRMWInst::Sub: 4921 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 4922 case AtomicRMWInst::Nand: 4923 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 4924 case AtomicRMWInst::Max: 4925 return Intrinsic::riscv_masked_atomicrmw_max_i32; 4926 case AtomicRMWInst::Min: 4927 return Intrinsic::riscv_masked_atomicrmw_min_i32; 4928 case AtomicRMWInst::UMax: 4929 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 4930 case AtomicRMWInst::UMin: 4931 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 4932 } 4933 } 4934 4935 if (XLen == 64) { 4936 switch (BinOp) { 4937 default: 4938 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4939 case AtomicRMWInst::Xchg: 4940 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 4941 case AtomicRMWInst::Add: 4942 return Intrinsic::riscv_masked_atomicrmw_add_i64; 4943 case AtomicRMWInst::Sub: 4944 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 4945 case AtomicRMWInst::Nand: 4946 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 4947 case AtomicRMWInst::Max: 4948 return Intrinsic::riscv_masked_atomicrmw_max_i64; 4949 case AtomicRMWInst::Min: 4950 return Intrinsic::riscv_masked_atomicrmw_min_i64; 4951 case AtomicRMWInst::UMax: 4952 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 4953 case AtomicRMWInst::UMin: 4954 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 4955 } 4956 } 4957 4958 llvm_unreachable("Unexpected XLen\n"); 4959 } 4960 4961 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 4962 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 4963 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 4964 unsigned XLen = Subtarget.getXLen(); 4965 Value *Ordering = 4966 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 4967 Type *Tys[] = {AlignedAddr->getType()}; 4968 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 4969 AI->getModule(), 4970 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 4971 4972 if (XLen == 64) { 4973 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 4974 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4975 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 4976 } 4977 4978 Value *Result; 4979 4980 // Must pass the shift amount needed to sign extend the loaded value prior 4981 // to performing a signed comparison for min/max. ShiftAmt is the number of 4982 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 4983 // is the number of bits to left+right shift the value in order to 4984 // sign-extend. 4985 if (AI->getOperation() == AtomicRMWInst::Min || 4986 AI->getOperation() == AtomicRMWInst::Max) { 4987 const DataLayout &DL = AI->getModule()->getDataLayout(); 4988 unsigned ValWidth = 4989 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 4990 Value *SextShamt = 4991 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 4992 Result = Builder.CreateCall(LrwOpScwLoop, 4993 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 4994 } else { 4995 Result = 4996 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 4997 } 4998 4999 if (XLen == 64) 5000 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 5001 return Result; 5002 } 5003 5004 TargetLowering::AtomicExpansionKind 5005 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 5006 AtomicCmpXchgInst *CI) const { 5007 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 5008 if (Size == 8 || Size == 16) 5009 return AtomicExpansionKind::MaskedIntrinsic; 5010 return AtomicExpansionKind::None; 5011 } 5012 5013 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 5014 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 5015 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 5016 unsigned XLen = Subtarget.getXLen(); 5017 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 5018 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 5019 if (XLen == 64) { 5020 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 5021 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 5022 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 5023 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 5024 } 5025 Type *Tys[] = {AlignedAddr->getType()}; 5026 Function *MaskedCmpXchg = 5027 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 5028 Value *Result = Builder.CreateCall( 5029 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 5030 if (XLen == 64) 5031 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 5032 return Result; 5033 } 5034 5035 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 5036 EVT VT) const { 5037 VT = VT.getScalarType(); 5038 5039 if (!VT.isSimple()) 5040 return false; 5041 5042 switch (VT.getSimpleVT().SimpleTy) { 5043 case MVT::f16: 5044 return Subtarget.hasStdExtZfh(); 5045 case MVT::f32: 5046 return Subtarget.hasStdExtF(); 5047 case MVT::f64: 5048 return Subtarget.hasStdExtD(); 5049 default: 5050 break; 5051 } 5052 5053 return false; 5054 } 5055 5056 Register RISCVTargetLowering::getExceptionPointerRegister( 5057 const Constant *PersonalityFn) const { 5058 return RISCV::X10; 5059 } 5060 5061 Register RISCVTargetLowering::getExceptionSelectorRegister( 5062 const Constant *PersonalityFn) const { 5063 return RISCV::X11; 5064 } 5065 5066 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 5067 // Return false to suppress the unnecessary extensions if the LibCall 5068 // arguments or return value is f32 type for LP64 ABI. 5069 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 5070 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 5071 return false; 5072 5073 return true; 5074 } 5075 5076 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 5077 if (Subtarget.is64Bit() && Type == MVT::i32) 5078 return true; 5079 5080 return IsSigned; 5081 } 5082 5083 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 5084 SDValue C) const { 5085 // Check integral scalar types. 5086 if (VT.isScalarInteger()) { 5087 // Omit the optimization if the sub target has the M extension and the data 5088 // size exceeds XLen. 5089 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 5090 return false; 5091 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 5092 // Break the MUL to a SLLI and an ADD/SUB. 5093 const APInt &Imm = ConstNode->getAPIntValue(); 5094 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 5095 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 5096 return true; 5097 // Omit the following optimization if the sub target has the M extension 5098 // and the data size >= XLen. 5099 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 5100 return false; 5101 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 5102 // a pair of LUI/ADDI. 5103 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 5104 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 5105 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 5106 (1 - ImmS).isPowerOf2()) 5107 return true; 5108 } 5109 } 5110 } 5111 5112 return false; 5113 } 5114 5115 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 5116 if (!Subtarget.useRVVForFixedLengthVectors()) 5117 return false; 5118 5119 if (!VT.isFixedLengthVector()) 5120 return false; 5121 5122 // Don't use RVV for vectors we cannot scalarize if required. 5123 switch (VT.getVectorElementType().SimpleTy) { 5124 default: 5125 return false; 5126 case MVT::i1: 5127 case MVT::i8: 5128 case MVT::i16: 5129 case MVT::i32: 5130 case MVT::i64: 5131 break; 5132 case MVT::f16: 5133 if (!Subtarget.hasStdExtZfh()) 5134 return false; 5135 break; 5136 case MVT::f32: 5137 if (!Subtarget.hasStdExtF()) 5138 return false; 5139 break; 5140 case MVT::f64: 5141 if (!Subtarget.hasStdExtD()) 5142 return false; 5143 break; 5144 } 5145 5146 unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); 5147 // Don't use RVV for types that don't fit. 5148 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 5149 return false; 5150 5151 // TODO: Perhaps an artificial restriction, but worth having whilst getting 5152 // the base fixed length RVV support in place. 5153 if (!VT.isPow2VectorType()) 5154 return false; 5155 5156 return true; 5157 } 5158 5159 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 5160 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 5161 bool *Fast) const { 5162 if (!VT.isScalableVector()) 5163 return false; 5164 5165 EVT ElemVT = VT.getVectorElementType(); 5166 if (Alignment >= ElemVT.getStoreSize()) { 5167 if (Fast) 5168 *Fast = true; 5169 return true; 5170 } 5171 5172 return false; 5173 } 5174 5175 #define GET_REGISTER_MATCHER 5176 #include "RISCVGenAsmMatcher.inc" 5177 5178 Register 5179 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 5180 const MachineFunction &MF) const { 5181 Register Reg = MatchRegisterAltName(RegName); 5182 if (Reg == RISCV::NoRegister) 5183 Reg = MatchRegisterName(RegName); 5184 if (Reg == RISCV::NoRegister) 5185 report_fatal_error( 5186 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 5187 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 5188 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 5189 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 5190 StringRef(RegName) + "\".")); 5191 return Reg; 5192 } 5193 5194 namespace llvm { 5195 namespace RISCVVIntrinsicsTable { 5196 5197 #define GET_RISCVVIntrinsicsTable_IMPL 5198 #include "RISCVGenSearchableTables.inc" 5199 5200 } // namespace RISCVVIntrinsicsTable 5201 5202 namespace RISCVZvlssegTable { 5203 5204 #define GET_RISCVZvlssegTable_IMPL 5205 #include "RISCVGenSearchableTables.inc" 5206 5207 } // namespace RISCVZvlssegTable 5208 } // namespace llvm 5209