1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtZfh()) 87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 88 if (Subtarget.hasStdExtF()) 89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 90 if (Subtarget.hasStdExtD()) 91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 92 93 static const MVT::SimpleValueType BoolVecVTs[] = { 94 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 95 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 96 static const MVT::SimpleValueType IntVecVTs[] = { 97 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 98 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 99 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 100 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 101 MVT::nxv4i64, MVT::nxv8i64}; 102 static const MVT::SimpleValueType F16VecVTs[] = { 103 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 104 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 105 static const MVT::SimpleValueType F32VecVTs[] = { 106 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 107 static const MVT::SimpleValueType F64VecVTs[] = { 108 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 109 110 if (Subtarget.hasStdExtV()) { 111 auto addRegClassForRVV = [this](MVT VT) { 112 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 113 assert(Size <= 512 && isPowerOf2_32(Size)); 114 const TargetRegisterClass *RC; 115 if (Size <= 64) 116 RC = &RISCV::VRRegClass; 117 else if (Size == 128) 118 RC = &RISCV::VRM2RegClass; 119 else if (Size == 256) 120 RC = &RISCV::VRM4RegClass; 121 else 122 RC = &RISCV::VRM8RegClass; 123 124 addRegisterClass(VT, RC); 125 }; 126 127 for (MVT VT : BoolVecVTs) 128 addRegClassForRVV(VT); 129 for (MVT VT : IntVecVTs) 130 addRegClassForRVV(VT); 131 132 if (Subtarget.hasStdExtZfh()) 133 for (MVT VT : F16VecVTs) 134 addRegClassForRVV(VT); 135 136 if (Subtarget.hasStdExtF()) 137 for (MVT VT : F32VecVTs) 138 addRegClassForRVV(VT); 139 140 if (Subtarget.hasStdExtD()) 141 for (MVT VT : F64VecVTs) 142 addRegClassForRVV(VT); 143 } 144 145 // Compute derived properties from the register classes. 146 computeRegisterProperties(STI.getRegisterInfo()); 147 148 setStackPointerRegisterToSaveRestore(RISCV::X2); 149 150 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 151 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 152 153 // TODO: add all necessary setOperationAction calls. 154 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 155 156 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 157 setOperationAction(ISD::BR_CC, XLenVT, Expand); 158 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 159 160 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 161 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 162 163 setOperationAction(ISD::VASTART, MVT::Other, Custom); 164 setOperationAction(ISD::VAARG, MVT::Other, Expand); 165 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 166 setOperationAction(ISD::VAEND, MVT::Other, Expand); 167 168 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 169 if (!Subtarget.hasStdExtZbb()) { 170 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 171 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 172 } 173 174 if (Subtarget.is64Bit()) { 175 setOperationAction(ISD::ADD, MVT::i32, Custom); 176 setOperationAction(ISD::SUB, MVT::i32, Custom); 177 setOperationAction(ISD::SHL, MVT::i32, Custom); 178 setOperationAction(ISD::SRA, MVT::i32, Custom); 179 setOperationAction(ISD::SRL, MVT::i32, Custom); 180 } 181 182 if (!Subtarget.hasStdExtM()) { 183 setOperationAction(ISD::MUL, XLenVT, Expand); 184 setOperationAction(ISD::MULHS, XLenVT, Expand); 185 setOperationAction(ISD::MULHU, XLenVT, Expand); 186 setOperationAction(ISD::SDIV, XLenVT, Expand); 187 setOperationAction(ISD::UDIV, XLenVT, Expand); 188 setOperationAction(ISD::SREM, XLenVT, Expand); 189 setOperationAction(ISD::UREM, XLenVT, Expand); 190 } 191 192 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 193 setOperationAction(ISD::MUL, MVT::i32, Custom); 194 195 setOperationAction(ISD::SDIV, MVT::i8, Custom); 196 setOperationAction(ISD::UDIV, MVT::i8, Custom); 197 setOperationAction(ISD::UREM, MVT::i8, Custom); 198 setOperationAction(ISD::SDIV, MVT::i16, Custom); 199 setOperationAction(ISD::UDIV, MVT::i16, Custom); 200 setOperationAction(ISD::UREM, MVT::i16, Custom); 201 setOperationAction(ISD::SDIV, MVT::i32, Custom); 202 setOperationAction(ISD::UDIV, MVT::i32, Custom); 203 setOperationAction(ISD::UREM, MVT::i32, Custom); 204 } 205 206 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 207 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 208 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 209 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 210 211 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 212 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 213 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 214 215 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 216 if (Subtarget.is64Bit()) { 217 setOperationAction(ISD::ROTL, MVT::i32, Custom); 218 setOperationAction(ISD::ROTR, MVT::i32, Custom); 219 } 220 } else { 221 setOperationAction(ISD::ROTL, XLenVT, Expand); 222 setOperationAction(ISD::ROTR, XLenVT, Expand); 223 } 224 225 if (Subtarget.hasStdExtZbp()) { 226 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 227 // more combining. 228 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 229 setOperationAction(ISD::BSWAP, XLenVT, Custom); 230 231 if (Subtarget.is64Bit()) { 232 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 233 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 234 } 235 } else { 236 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 237 // pattern match it directly in isel. 238 setOperationAction(ISD::BSWAP, XLenVT, 239 Subtarget.hasStdExtZbb() ? Legal : Expand); 240 } 241 242 if (Subtarget.hasStdExtZbb()) { 243 setOperationAction(ISD::SMIN, XLenVT, Legal); 244 setOperationAction(ISD::SMAX, XLenVT, Legal); 245 setOperationAction(ISD::UMIN, XLenVT, Legal); 246 setOperationAction(ISD::UMAX, XLenVT, Legal); 247 } else { 248 setOperationAction(ISD::CTTZ, XLenVT, Expand); 249 setOperationAction(ISD::CTLZ, XLenVT, Expand); 250 setOperationAction(ISD::CTPOP, XLenVT, Expand); 251 } 252 253 if (Subtarget.hasStdExtZbt()) { 254 setOperationAction(ISD::FSHL, XLenVT, Legal); 255 setOperationAction(ISD::FSHR, XLenVT, Legal); 256 setOperationAction(ISD::SELECT, XLenVT, Legal); 257 258 if (Subtarget.is64Bit()) { 259 setOperationAction(ISD::FSHL, MVT::i32, Custom); 260 setOperationAction(ISD::FSHR, MVT::i32, Custom); 261 } 262 } else { 263 setOperationAction(ISD::SELECT, XLenVT, Custom); 264 } 265 266 ISD::CondCode FPCCToExpand[] = { 267 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 268 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 269 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 270 271 ISD::NodeType FPOpToExpand[] = { 272 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 273 ISD::FP_TO_FP16}; 274 275 if (Subtarget.hasStdExtZfh()) 276 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 277 278 if (Subtarget.hasStdExtZfh()) { 279 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 280 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 281 for (auto CC : FPCCToExpand) 282 setCondCodeAction(CC, MVT::f16, Expand); 283 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 284 setOperationAction(ISD::SELECT, MVT::f16, Custom); 285 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 286 for (auto Op : FPOpToExpand) 287 setOperationAction(Op, MVT::f16, Expand); 288 } 289 290 if (Subtarget.hasStdExtF()) { 291 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 292 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 293 for (auto CC : FPCCToExpand) 294 setCondCodeAction(CC, MVT::f32, Expand); 295 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 296 setOperationAction(ISD::SELECT, MVT::f32, Custom); 297 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 298 for (auto Op : FPOpToExpand) 299 setOperationAction(Op, MVT::f32, Expand); 300 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 301 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 302 } 303 304 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 305 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 306 307 if (Subtarget.hasStdExtD()) { 308 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 309 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 310 for (auto CC : FPCCToExpand) 311 setCondCodeAction(CC, MVT::f64, Expand); 312 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 313 setOperationAction(ISD::SELECT, MVT::f64, Custom); 314 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 315 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 316 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 317 for (auto Op : FPOpToExpand) 318 setOperationAction(Op, MVT::f64, Expand); 319 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 320 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 321 } 322 323 if (Subtarget.is64Bit()) { 324 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 325 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 326 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 327 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 328 } 329 330 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 331 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 332 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 333 setOperationAction(ISD::JumpTable, XLenVT, Custom); 334 335 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 336 337 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 338 // Unfortunately this can't be determined just from the ISA naming string. 339 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 340 Subtarget.is64Bit() ? Legal : Custom); 341 342 setOperationAction(ISD::TRAP, MVT::Other, Legal); 343 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 344 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 345 346 if (Subtarget.hasStdExtA()) { 347 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 348 setMinCmpXchgSizeInBits(32); 349 } else { 350 setMaxAtomicSizeInBitsSupported(0); 351 } 352 353 setBooleanContents(ZeroOrOneBooleanContent); 354 355 if (Subtarget.hasStdExtV()) { 356 setBooleanVectorContents(ZeroOrOneBooleanContent); 357 358 setOperationAction(ISD::VSCALE, XLenVT, Custom); 359 360 // RVV intrinsics may have illegal operands. 361 // We also need to custom legalize vmv.x.s. 362 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 363 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 364 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 365 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 366 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 367 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 368 369 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 370 371 if (Subtarget.is64Bit()) { 372 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 373 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 374 } else { 375 // We must custom-lower certain vXi64 operations on RV32 due to the vector 376 // element type being illegal. 377 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); 378 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 379 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 380 } 381 382 for (MVT VT : BoolVecVTs) { 383 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 384 385 // Mask VTs are custom-expanded into a series of standard nodes 386 setOperationAction(ISD::TRUNCATE, VT, Custom); 387 } 388 389 for (MVT VT : IntVecVTs) { 390 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 391 392 setOperationAction(ISD::SMIN, VT, Legal); 393 setOperationAction(ISD::SMAX, VT, Legal); 394 setOperationAction(ISD::UMIN, VT, Legal); 395 setOperationAction(ISD::UMAX, VT, Legal); 396 397 setOperationAction(ISD::ROTL, VT, Expand); 398 setOperationAction(ISD::ROTR, VT, Expand); 399 400 // Custom-lower extensions and truncations from/to mask types. 401 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 402 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 403 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 404 405 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR" 406 // nodes which truncate by one power of two at a time. 407 setOperationAction(ISD::TRUNCATE, VT, Custom); 408 409 // Custom-lower insert/extract operations to simplify patterns. 410 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 411 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 412 } 413 414 // Expand various CCs to best match the RVV ISA, which natively supports UNE 415 // but no other unordered comparisons, and supports all ordered comparisons 416 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 417 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 418 // and we pattern-match those back to the "original", swapping operands once 419 // more. This way we catch both operations and both "vf" and "fv" forms with 420 // fewer patterns. 421 ISD::CondCode VFPCCToExpand[] = { 422 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 423 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 424 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 425 }; 426 427 // Sets common operation actions on RVV floating-point vector types. 428 const auto SetCommonVFPActions = [&](MVT VT) { 429 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 430 // Custom-lower insert/extract operations to simplify patterns. 431 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 432 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 433 for (auto CC : VFPCCToExpand) 434 setCondCodeAction(CC, VT, Expand); 435 }; 436 437 if (Subtarget.hasStdExtZfh()) 438 for (MVT VT : F16VecVTs) 439 SetCommonVFPActions(VT); 440 441 if (Subtarget.hasStdExtF()) 442 for (MVT VT : F32VecVTs) 443 SetCommonVFPActions(VT); 444 445 if (Subtarget.hasStdExtD()) 446 for (MVT VT : F64VecVTs) 447 SetCommonVFPActions(VT); 448 } 449 450 // Function alignments. 451 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 452 setMinFunctionAlignment(FunctionAlignment); 453 setPrefFunctionAlignment(FunctionAlignment); 454 455 setMinimumJumpTableEntries(5); 456 457 // Jumps are expensive, compared to logic 458 setJumpIsExpensive(); 459 460 // We can use any register for comparisons 461 setHasMultipleConditionRegisters(); 462 463 setTargetDAGCombine(ISD::SETCC); 464 if (Subtarget.hasStdExtZbp()) { 465 setTargetDAGCombine(ISD::OR); 466 } 467 } 468 469 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 470 LLVMContext &Context, 471 EVT VT) const { 472 if (!VT.isVector()) 473 return getPointerTy(DL); 474 if (Subtarget.hasStdExtV() && VT.isScalableVector()) 475 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 476 return VT.changeVectorElementTypeToInteger(); 477 } 478 479 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 480 const CallInst &I, 481 MachineFunction &MF, 482 unsigned Intrinsic) const { 483 switch (Intrinsic) { 484 default: 485 return false; 486 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 487 case Intrinsic::riscv_masked_atomicrmw_add_i32: 488 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 489 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 490 case Intrinsic::riscv_masked_atomicrmw_max_i32: 491 case Intrinsic::riscv_masked_atomicrmw_min_i32: 492 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 493 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 494 case Intrinsic::riscv_masked_cmpxchg_i32: 495 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 496 Info.opc = ISD::INTRINSIC_W_CHAIN; 497 Info.memVT = MVT::getVT(PtrTy->getElementType()); 498 Info.ptrVal = I.getArgOperand(0); 499 Info.offset = 0; 500 Info.align = Align(4); 501 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 502 MachineMemOperand::MOVolatile; 503 return true; 504 } 505 } 506 507 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 508 const AddrMode &AM, Type *Ty, 509 unsigned AS, 510 Instruction *I) const { 511 // No global is ever allowed as a base. 512 if (AM.BaseGV) 513 return false; 514 515 // Require a 12-bit signed offset. 516 if (!isInt<12>(AM.BaseOffs)) 517 return false; 518 519 switch (AM.Scale) { 520 case 0: // "r+i" or just "i", depending on HasBaseReg. 521 break; 522 case 1: 523 if (!AM.HasBaseReg) // allow "r+i". 524 break; 525 return false; // disallow "r+r" or "r+r+i". 526 default: 527 return false; 528 } 529 530 return true; 531 } 532 533 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 534 return isInt<12>(Imm); 535 } 536 537 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 538 return isInt<12>(Imm); 539 } 540 541 // On RV32, 64-bit integers are split into their high and low parts and held 542 // in two different registers, so the trunc is free since the low register can 543 // just be used. 544 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 545 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 546 return false; 547 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 548 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 549 return (SrcBits == 64 && DestBits == 32); 550 } 551 552 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 553 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 554 !SrcVT.isInteger() || !DstVT.isInteger()) 555 return false; 556 unsigned SrcBits = SrcVT.getSizeInBits(); 557 unsigned DestBits = DstVT.getSizeInBits(); 558 return (SrcBits == 64 && DestBits == 32); 559 } 560 561 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 562 // Zexts are free if they can be combined with a load. 563 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 564 EVT MemVT = LD->getMemoryVT(); 565 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 566 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 567 (LD->getExtensionType() == ISD::NON_EXTLOAD || 568 LD->getExtensionType() == ISD::ZEXTLOAD)) 569 return true; 570 } 571 572 return TargetLowering::isZExtFree(Val, VT2); 573 } 574 575 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 576 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 577 } 578 579 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 580 return Subtarget.hasStdExtZbb(); 581 } 582 583 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 584 return Subtarget.hasStdExtZbb(); 585 } 586 587 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 588 bool ForCodeSize) const { 589 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 590 return false; 591 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 592 return false; 593 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 594 return false; 595 if (Imm.isNegZero()) 596 return false; 597 return Imm.isZero(); 598 } 599 600 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 601 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 602 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 603 (VT == MVT::f64 && Subtarget.hasStdExtD()); 604 } 605 606 // Changes the condition code and swaps operands if necessary, so the SetCC 607 // operation matches one of the comparisons supported directly in the RISC-V 608 // ISA. 609 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 610 switch (CC) { 611 default: 612 break; 613 case ISD::SETGT: 614 case ISD::SETLE: 615 case ISD::SETUGT: 616 case ISD::SETULE: 617 CC = ISD::getSetCCSwappedOperands(CC); 618 std::swap(LHS, RHS); 619 break; 620 } 621 } 622 623 // Return the RISC-V branch opcode that matches the given DAG integer 624 // condition code. The CondCode must be one of those supported by the RISC-V 625 // ISA (see normaliseSetCC). 626 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 627 switch (CC) { 628 default: 629 llvm_unreachable("Unsupported CondCode"); 630 case ISD::SETEQ: 631 return RISCV::BEQ; 632 case ISD::SETNE: 633 return RISCV::BNE; 634 case ISD::SETLT: 635 return RISCV::BLT; 636 case ISD::SETGE: 637 return RISCV::BGE; 638 case ISD::SETULT: 639 return RISCV::BLTU; 640 case ISD::SETUGE: 641 return RISCV::BGEU; 642 } 643 } 644 645 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 646 SelectionDAG &DAG) const { 647 switch (Op.getOpcode()) { 648 default: 649 report_fatal_error("unimplemented operand"); 650 case ISD::GlobalAddress: 651 return lowerGlobalAddress(Op, DAG); 652 case ISD::BlockAddress: 653 return lowerBlockAddress(Op, DAG); 654 case ISD::ConstantPool: 655 return lowerConstantPool(Op, DAG); 656 case ISD::JumpTable: 657 return lowerJumpTable(Op, DAG); 658 case ISD::GlobalTLSAddress: 659 return lowerGlobalTLSAddress(Op, DAG); 660 case ISD::SELECT: 661 return lowerSELECT(Op, DAG); 662 case ISD::VASTART: 663 return lowerVASTART(Op, DAG); 664 case ISD::FRAMEADDR: 665 return lowerFRAMEADDR(Op, DAG); 666 case ISD::RETURNADDR: 667 return lowerRETURNADDR(Op, DAG); 668 case ISD::SHL_PARTS: 669 return lowerShiftLeftParts(Op, DAG); 670 case ISD::SRA_PARTS: 671 return lowerShiftRightParts(Op, DAG, true); 672 case ISD::SRL_PARTS: 673 return lowerShiftRightParts(Op, DAG, false); 674 case ISD::BITCAST: { 675 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 676 Subtarget.hasStdExtZfh()) && 677 "Unexpected custom legalisation"); 678 SDLoc DL(Op); 679 SDValue Op0 = Op.getOperand(0); 680 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 681 if (Op0.getValueType() != MVT::i16) 682 return SDValue(); 683 SDValue NewOp0 = 684 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 685 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 686 return FPConv; 687 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 688 Subtarget.hasStdExtF()) { 689 if (Op0.getValueType() != MVT::i32) 690 return SDValue(); 691 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 692 SDValue FPConv = 693 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 694 return FPConv; 695 } 696 return SDValue(); 697 } 698 case ISD::INTRINSIC_WO_CHAIN: 699 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 700 case ISD::INTRINSIC_W_CHAIN: 701 return LowerINTRINSIC_W_CHAIN(Op, DAG); 702 case ISD::BSWAP: 703 case ISD::BITREVERSE: { 704 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 705 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 706 MVT VT = Op.getSimpleValueType(); 707 SDLoc DL(Op); 708 // Start with the maximum immediate value which is the bitwidth - 1. 709 unsigned Imm = VT.getSizeInBits() - 1; 710 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 711 if (Op.getOpcode() == ISD::BSWAP) 712 Imm &= ~0x7U; 713 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 714 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 715 } 716 case ISD::TRUNCATE: { 717 SDLoc DL(Op); 718 EVT VT = Op.getValueType(); 719 // Only custom-lower vector truncates 720 if (!VT.isVector()) 721 return Op; 722 723 // Truncates to mask types are handled differently 724 if (VT.getVectorElementType() == MVT::i1) 725 return lowerVectorMaskTrunc(Op, DAG); 726 727 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 728 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which 729 // truncate by one power of two at a time. 730 EVT DstEltVT = VT.getVectorElementType(); 731 732 SDValue Src = Op.getOperand(0); 733 EVT SrcVT = Src.getValueType(); 734 EVT SrcEltVT = SrcVT.getVectorElementType(); 735 736 assert(DstEltVT.bitsLT(SrcEltVT) && 737 isPowerOf2_64(DstEltVT.getSizeInBits()) && 738 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 739 "Unexpected vector truncate lowering"); 740 741 SDValue Result = Src; 742 LLVMContext &Context = *DAG.getContext(); 743 const ElementCount Count = SrcVT.getVectorElementCount(); 744 do { 745 SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2); 746 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 747 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result); 748 } while (SrcEltVT != DstEltVT); 749 750 return Result; 751 } 752 case ISD::ANY_EXTEND: 753 case ISD::ZERO_EXTEND: 754 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 755 case ISD::SIGN_EXTEND: 756 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 757 case ISD::SPLAT_VECTOR: 758 return lowerSPLATVECTOR(Op, DAG); 759 case ISD::INSERT_VECTOR_ELT: 760 return lowerINSERT_VECTOR_ELT(Op, DAG); 761 case ISD::EXTRACT_VECTOR_ELT: 762 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 763 case ISD::VSCALE: { 764 MVT VT = Op.getSimpleValueType(); 765 SDLoc DL(Op); 766 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 767 // We define our scalable vector types for lmul=1 to use a 64 bit known 768 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 769 // vscale as VLENB / 8. 770 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 771 DAG.getConstant(3, DL, VT)); 772 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 773 } 774 } 775 } 776 777 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 778 SelectionDAG &DAG, unsigned Flags) { 779 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 780 } 781 782 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 783 SelectionDAG &DAG, unsigned Flags) { 784 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 785 Flags); 786 } 787 788 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 789 SelectionDAG &DAG, unsigned Flags) { 790 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 791 N->getOffset(), Flags); 792 } 793 794 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 795 SelectionDAG &DAG, unsigned Flags) { 796 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 797 } 798 799 template <class NodeTy> 800 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 801 bool IsLocal) const { 802 SDLoc DL(N); 803 EVT Ty = getPointerTy(DAG.getDataLayout()); 804 805 if (isPositionIndependent()) { 806 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 807 if (IsLocal) 808 // Use PC-relative addressing to access the symbol. This generates the 809 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 810 // %pcrel_lo(auipc)). 811 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 812 813 // Use PC-relative addressing to access the GOT for this symbol, then load 814 // the address from the GOT. This generates the pattern (PseudoLA sym), 815 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 816 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 817 } 818 819 switch (getTargetMachine().getCodeModel()) { 820 default: 821 report_fatal_error("Unsupported code model for lowering"); 822 case CodeModel::Small: { 823 // Generate a sequence for accessing addresses within the first 2 GiB of 824 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 825 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 826 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 827 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 828 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 829 } 830 case CodeModel::Medium: { 831 // Generate a sequence for accessing addresses within any 2GiB range within 832 // the address space. This generates the pattern (PseudoLLA sym), which 833 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 834 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 835 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 836 } 837 } 838 } 839 840 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 841 SelectionDAG &DAG) const { 842 SDLoc DL(Op); 843 EVT Ty = Op.getValueType(); 844 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 845 int64_t Offset = N->getOffset(); 846 MVT XLenVT = Subtarget.getXLenVT(); 847 848 const GlobalValue *GV = N->getGlobal(); 849 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 850 SDValue Addr = getAddr(N, DAG, IsLocal); 851 852 // In order to maximise the opportunity for common subexpression elimination, 853 // emit a separate ADD node for the global address offset instead of folding 854 // it in the global address node. Later peephole optimisations may choose to 855 // fold it back in when profitable. 856 if (Offset != 0) 857 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 858 DAG.getConstant(Offset, DL, XLenVT)); 859 return Addr; 860 } 861 862 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 863 SelectionDAG &DAG) const { 864 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 865 866 return getAddr(N, DAG); 867 } 868 869 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 870 SelectionDAG &DAG) const { 871 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 872 873 return getAddr(N, DAG); 874 } 875 876 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 877 SelectionDAG &DAG) const { 878 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 879 880 return getAddr(N, DAG); 881 } 882 883 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 884 SelectionDAG &DAG, 885 bool UseGOT) const { 886 SDLoc DL(N); 887 EVT Ty = getPointerTy(DAG.getDataLayout()); 888 const GlobalValue *GV = N->getGlobal(); 889 MVT XLenVT = Subtarget.getXLenVT(); 890 891 if (UseGOT) { 892 // Use PC-relative addressing to access the GOT for this TLS symbol, then 893 // load the address from the GOT and add the thread pointer. This generates 894 // the pattern (PseudoLA_TLS_IE sym), which expands to 895 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 896 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 897 SDValue Load = 898 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 899 900 // Add the thread pointer. 901 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 902 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 903 } 904 905 // Generate a sequence for accessing the address relative to the thread 906 // pointer, with the appropriate adjustment for the thread pointer offset. 907 // This generates the pattern 908 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 909 SDValue AddrHi = 910 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 911 SDValue AddrAdd = 912 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 913 SDValue AddrLo = 914 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 915 916 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 917 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 918 SDValue MNAdd = SDValue( 919 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 920 0); 921 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 922 } 923 924 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 925 SelectionDAG &DAG) const { 926 SDLoc DL(N); 927 EVT Ty = getPointerTy(DAG.getDataLayout()); 928 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 929 const GlobalValue *GV = N->getGlobal(); 930 931 // Use a PC-relative addressing mode to access the global dynamic GOT address. 932 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 933 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 934 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 935 SDValue Load = 936 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 937 938 // Prepare argument list to generate call. 939 ArgListTy Args; 940 ArgListEntry Entry; 941 Entry.Node = Load; 942 Entry.Ty = CallTy; 943 Args.push_back(Entry); 944 945 // Setup call to __tls_get_addr. 946 TargetLowering::CallLoweringInfo CLI(DAG); 947 CLI.setDebugLoc(DL) 948 .setChain(DAG.getEntryNode()) 949 .setLibCallee(CallingConv::C, CallTy, 950 DAG.getExternalSymbol("__tls_get_addr", Ty), 951 std::move(Args)); 952 953 return LowerCallTo(CLI).first; 954 } 955 956 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 957 SelectionDAG &DAG) const { 958 SDLoc DL(Op); 959 EVT Ty = Op.getValueType(); 960 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 961 int64_t Offset = N->getOffset(); 962 MVT XLenVT = Subtarget.getXLenVT(); 963 964 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 965 966 if (DAG.getMachineFunction().getFunction().getCallingConv() == 967 CallingConv::GHC) 968 report_fatal_error("In GHC calling convention TLS is not supported"); 969 970 SDValue Addr; 971 switch (Model) { 972 case TLSModel::LocalExec: 973 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 974 break; 975 case TLSModel::InitialExec: 976 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 977 break; 978 case TLSModel::LocalDynamic: 979 case TLSModel::GeneralDynamic: 980 Addr = getDynamicTLSAddr(N, DAG); 981 break; 982 } 983 984 // In order to maximise the opportunity for common subexpression elimination, 985 // emit a separate ADD node for the global address offset instead of folding 986 // it in the global address node. Later peephole optimisations may choose to 987 // fold it back in when profitable. 988 if (Offset != 0) 989 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 990 DAG.getConstant(Offset, DL, XLenVT)); 991 return Addr; 992 } 993 994 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 995 SDValue CondV = Op.getOperand(0); 996 SDValue TrueV = Op.getOperand(1); 997 SDValue FalseV = Op.getOperand(2); 998 SDLoc DL(Op); 999 MVT XLenVT = Subtarget.getXLenVT(); 1000 1001 // If the result type is XLenVT and CondV is the output of a SETCC node 1002 // which also operated on XLenVT inputs, then merge the SETCC node into the 1003 // lowered RISCVISD::SELECT_CC to take advantage of the integer 1004 // compare+branch instructions. i.e.: 1005 // (select (setcc lhs, rhs, cc), truev, falsev) 1006 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 1007 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 1008 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 1009 SDValue LHS = CondV.getOperand(0); 1010 SDValue RHS = CondV.getOperand(1); 1011 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 1012 ISD::CondCode CCVal = CC->get(); 1013 1014 normaliseSetCC(LHS, RHS, CCVal); 1015 1016 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 1017 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 1018 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1019 } 1020 1021 // Otherwise: 1022 // (select condv, truev, falsev) 1023 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 1024 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 1025 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 1026 1027 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 1028 1029 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 1030 } 1031 1032 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 1033 MachineFunction &MF = DAG.getMachineFunction(); 1034 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 1035 1036 SDLoc DL(Op); 1037 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1038 getPointerTy(MF.getDataLayout())); 1039 1040 // vastart just stores the address of the VarArgsFrameIndex slot into the 1041 // memory location argument. 1042 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1043 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1044 MachinePointerInfo(SV)); 1045 } 1046 1047 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 1048 SelectionDAG &DAG) const { 1049 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1050 MachineFunction &MF = DAG.getMachineFunction(); 1051 MachineFrameInfo &MFI = MF.getFrameInfo(); 1052 MFI.setFrameAddressIsTaken(true); 1053 Register FrameReg = RI.getFrameRegister(MF); 1054 int XLenInBytes = Subtarget.getXLen() / 8; 1055 1056 EVT VT = Op.getValueType(); 1057 SDLoc DL(Op); 1058 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1059 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1060 while (Depth--) { 1061 int Offset = -(XLenInBytes * 2); 1062 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1063 DAG.getIntPtrConstant(Offset, DL)); 1064 FrameAddr = 1065 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1066 } 1067 return FrameAddr; 1068 } 1069 1070 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 1071 SelectionDAG &DAG) const { 1072 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 1073 MachineFunction &MF = DAG.getMachineFunction(); 1074 MachineFrameInfo &MFI = MF.getFrameInfo(); 1075 MFI.setReturnAddressIsTaken(true); 1076 MVT XLenVT = Subtarget.getXLenVT(); 1077 int XLenInBytes = Subtarget.getXLen() / 8; 1078 1079 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1080 return SDValue(); 1081 1082 EVT VT = Op.getValueType(); 1083 SDLoc DL(Op); 1084 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1085 if (Depth) { 1086 int Off = -XLenInBytes; 1087 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 1088 SDValue Offset = DAG.getConstant(Off, DL, VT); 1089 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 1090 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 1091 MachinePointerInfo()); 1092 } 1093 1094 // Return the value of the return address register, marking it an implicit 1095 // live-in. 1096 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 1097 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 1098 } 1099 1100 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 1101 SelectionDAG &DAG) const { 1102 SDLoc DL(Op); 1103 SDValue Lo = Op.getOperand(0); 1104 SDValue Hi = Op.getOperand(1); 1105 SDValue Shamt = Op.getOperand(2); 1106 EVT VT = Lo.getValueType(); 1107 1108 // if Shamt-XLEN < 0: // Shamt < XLEN 1109 // Lo = Lo << Shamt 1110 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 1111 // else: 1112 // Lo = 0 1113 // Hi = Lo << (Shamt-XLEN) 1114 1115 SDValue Zero = DAG.getConstant(0, DL, VT); 1116 SDValue One = DAG.getConstant(1, DL, VT); 1117 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1118 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1119 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1120 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1121 1122 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 1123 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 1124 SDValue ShiftRightLo = 1125 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 1126 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 1127 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 1128 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 1129 1130 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1131 1132 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 1133 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1134 1135 SDValue Parts[2] = {Lo, Hi}; 1136 return DAG.getMergeValues(Parts, DL); 1137 } 1138 1139 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1140 bool IsSRA) const { 1141 SDLoc DL(Op); 1142 SDValue Lo = Op.getOperand(0); 1143 SDValue Hi = Op.getOperand(1); 1144 SDValue Shamt = Op.getOperand(2); 1145 EVT VT = Lo.getValueType(); 1146 1147 // SRA expansion: 1148 // if Shamt-XLEN < 0: // Shamt < XLEN 1149 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1150 // Hi = Hi >>s Shamt 1151 // else: 1152 // Lo = Hi >>s (Shamt-XLEN); 1153 // Hi = Hi >>s (XLEN-1) 1154 // 1155 // SRL expansion: 1156 // if Shamt-XLEN < 0: // Shamt < XLEN 1157 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1158 // Hi = Hi >>u Shamt 1159 // else: 1160 // Lo = Hi >>u (Shamt-XLEN); 1161 // Hi = 0; 1162 1163 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1164 1165 SDValue Zero = DAG.getConstant(0, DL, VT); 1166 SDValue One = DAG.getConstant(1, DL, VT); 1167 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1168 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1169 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1170 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1171 1172 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1173 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1174 SDValue ShiftLeftHi = 1175 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 1176 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1177 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1178 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 1179 SDValue HiFalse = 1180 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 1181 1182 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1183 1184 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 1185 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1186 1187 SDValue Parts[2] = {Lo, Hi}; 1188 return DAG.getMergeValues(Parts, DL); 1189 } 1190 1191 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is 1192 // illegal (currently only vXi64 RV32). 1193 // FIXME: We could also catch non-constant sign-extended i32 values and lower 1194 // them to SPLAT_VECTOR_I64 1195 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, 1196 SelectionDAG &DAG) const { 1197 SDLoc DL(Op); 1198 EVT VecVT = Op.getValueType(); 1199 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 1200 "Unexpected SPLAT_VECTOR lowering"); 1201 SDValue SplatVal = Op.getOperand(0); 1202 1203 // If we can prove that the value is a sign-extended 32-bit value, lower this 1204 // as a custom node in order to try and match RVV vector/scalar instructions. 1205 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { 1206 if (isInt<32>(CVal->getSExtValue())) 1207 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1208 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); 1209 } 1210 1211 if (SplatVal.getOpcode() == ISD::SIGN_EXTEND && 1212 SplatVal.getOperand(0).getValueType() == MVT::i32) { 1213 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1214 SplatVal.getOperand(0)); 1215 } 1216 1217 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 1218 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 1219 // vmv.v.x vX, hi 1220 // vsll.vx vX, vX, /*32*/ 1221 // vmv.v.x vY, lo 1222 // vsll.vx vY, vY, /*32*/ 1223 // vsrl.vx vY, vY, /*32*/ 1224 // vor.vv vX, vX, vY 1225 SDValue One = DAG.getConstant(1, DL, MVT::i32); 1226 SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 1227 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 1228 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); 1229 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); 1230 1231 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 1232 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 1233 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 1234 1235 if (isNullConstant(Hi)) 1236 return Lo; 1237 1238 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 1239 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 1240 1241 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 1242 } 1243 1244 // Custom-lower extensions from mask vectors by using a vselect either with 1 1245 // for zero/any-extension or -1 for sign-extension: 1246 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 1247 // Note that any-extension is lowered identically to zero-extension. 1248 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 1249 int64_t ExtTrueVal) const { 1250 SDLoc DL(Op); 1251 EVT VecVT = Op.getValueType(); 1252 SDValue Src = Op.getOperand(0); 1253 // Only custom-lower extensions from mask types 1254 if (!Src.getValueType().isVector() || 1255 Src.getValueType().getVectorElementType() != MVT::i1) 1256 return Op; 1257 1258 // Be careful not to introduce illegal scalar types at this stage, and be 1259 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1260 // illegal and must be expanded. Since we know that the constants are 1261 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1262 bool IsRV32E64 = 1263 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1264 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1265 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT()); 1266 1267 if (!IsRV32E64) { 1268 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1269 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 1270 } else { 1271 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1272 SplatTrueVal = 1273 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 1274 } 1275 1276 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 1277 } 1278 1279 // Custom-lower truncations from vectors to mask vectors by using a mask and a 1280 // setcc operation: 1281 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 1282 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 1283 SelectionDAG &DAG) const { 1284 SDLoc DL(Op); 1285 EVT MaskVT = Op.getValueType(); 1286 // Only expect to custom-lower truncations to mask types 1287 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 1288 "Unexpected type for vector mask lowering"); 1289 SDValue Src = Op.getOperand(0); 1290 EVT VecVT = Src.getValueType(); 1291 1292 // Be careful not to introduce illegal scalar types at this stage, and be 1293 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 1294 // illegal and must be expanded. Since we know that the constants are 1295 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 1296 bool IsRV32E64 = 1297 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 1298 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 1299 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1300 1301 if (!IsRV32E64) { 1302 SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne); 1303 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 1304 } else { 1305 SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne); 1306 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 1307 } 1308 1309 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 1310 1311 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 1312 } 1313 1314 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 1315 SelectionDAG &DAG) const { 1316 SDLoc DL(Op); 1317 EVT VecVT = Op.getValueType(); 1318 SDValue Vec = Op.getOperand(0); 1319 SDValue Val = Op.getOperand(1); 1320 SDValue Idx = Op.getOperand(2); 1321 1322 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is 1323 // first slid down into position, the value is inserted into the first 1324 // position, and the vector is slid back up. We do this to simplify patterns. 1325 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), 1326 if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) { 1327 if (isNullConstant(Idx)) 1328 return Op; 1329 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1330 DAG.getUNDEF(VecVT), Vec, Idx); 1331 SDValue InsertElt0 = 1332 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val, 1333 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1334 1335 return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx); 1336 } 1337 1338 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type 1339 // is illegal (currently only vXi64 RV32). 1340 // Since there is no easy way of getting a single element into a vector when 1341 // XLEN<SEW, we lower the operation to the following sequence: 1342 // splat vVal, rVal 1343 // vid.v vVid 1344 // vmseq.vx mMask, vVid, rIdx 1345 // vmerge.vvm vDest, vSrc, vVal, mMask 1346 // This essentially merges the original vector with the inserted element by 1347 // using a mask whose only set bit is that corresponding to the insert 1348 // index. 1349 SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val); 1350 SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx); 1351 1352 SDValue VID = DAG.getNode(RISCVISD::VID, DL, VecVT); 1353 auto SetCCVT = 1354 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT); 1355 SDValue Mask = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ); 1356 1357 return DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, SplattedVal, Vec); 1358 } 1359 1360 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 1361 // extract the first element: (extractelt (slidedown vec, idx), 0). This is 1362 // done to maintain partity with the legalization of RV32 vXi64 legalization. 1363 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 1364 SelectionDAG &DAG) const { 1365 SDLoc DL(Op); 1366 SDValue Idx = Op.getOperand(1); 1367 if (isNullConstant(Idx)) 1368 return Op; 1369 1370 SDValue Vec = Op.getOperand(0); 1371 EVT EltVT = Op.getValueType(); 1372 EVT VecVT = Vec.getValueType(); 1373 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1374 DAG.getUNDEF(VecVT), Vec, Idx); 1375 1376 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Slidedown, 1377 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 1378 } 1379 1380 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 1381 SelectionDAG &DAG) const { 1382 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1383 SDLoc DL(Op); 1384 1385 if (Subtarget.hasStdExtV()) { 1386 // Some RVV intrinsics may claim that they want an integer operand to be 1387 // extended. 1388 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1389 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1390 if (II->ExtendedOperand) { 1391 assert(II->ExtendedOperand < Op.getNumOperands()); 1392 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1393 SDValue &ScalarOp = Operands[II->ExtendedOperand]; 1394 EVT OpVT = ScalarOp.getValueType(); 1395 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1396 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1397 // If the operand is a constant, sign extend to increase our chances 1398 // of being able to use a .vi instruction. ANY_EXTEND would become a 1399 // a zero extend and the simm5 check in isel would fail. 1400 // FIXME: Should we ignore the upper bits in isel instead? 1401 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1402 : ISD::ANY_EXTEND; 1403 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1404 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), 1405 Operands); 1406 } 1407 } 1408 } 1409 } 1410 1411 switch (IntNo) { 1412 default: 1413 return SDValue(); // Don't custom lower most intrinsics. 1414 case Intrinsic::thread_pointer: { 1415 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1416 return DAG.getRegister(RISCV::X4, PtrVT); 1417 } 1418 case Intrinsic::riscv_vmv_x_s: 1419 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); 1420 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 1421 Op.getOperand(1)); 1422 } 1423 } 1424 1425 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 1426 SelectionDAG &DAG) const { 1427 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1428 SDLoc DL(Op); 1429 1430 if (Subtarget.hasStdExtV()) { 1431 // Some RVV intrinsics may claim that they want an integer operand to be 1432 // extended. 1433 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1434 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1435 if (II->ExtendedOperand) { 1436 // The operands start from the second argument in INTRINSIC_W_CHAIN. 1437 unsigned ExtendOp = II->ExtendedOperand + 1; 1438 assert(ExtendOp < Op.getNumOperands()); 1439 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1440 SDValue &ScalarOp = Operands[ExtendOp]; 1441 EVT OpVT = ScalarOp.getValueType(); 1442 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1443 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1444 // If the operand is a constant, sign extend to increase our chances 1445 // of being able to use a .vi instruction. ANY_EXTEND would become a 1446 // a zero extend and the simm5 check in isel would fail. 1447 // FIXME: Should we ignore the upper bits in isel instead? 1448 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1449 : ISD::ANY_EXTEND; 1450 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1451 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), 1452 Operands); 1453 } 1454 } 1455 } 1456 } 1457 1458 unsigned NF = 1; 1459 switch (IntNo) { 1460 default: 1461 return SDValue(); // Don't custom lower most intrinsics. 1462 case Intrinsic::riscv_vleff: { 1463 SDLoc DL(Op); 1464 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1465 SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0), 1466 Op.getOperand(2), Op.getOperand(3)); 1467 VTs = DAG.getVTList(Op->getValueType(1), MVT::Other); 1468 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2)); 1469 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1470 } 1471 case Intrinsic::riscv_vleff_mask: { 1472 SDLoc DL(Op); 1473 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); 1474 SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0), 1475 Op.getOperand(2), Op.getOperand(3), 1476 Op.getOperand(4), Op.getOperand(5)); 1477 VTs = DAG.getVTList(Op->getValueType(1), MVT::Other); 1478 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2)); 1479 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); 1480 } 1481 case Intrinsic::riscv_vlseg8ff: 1482 NF++; 1483 LLVM_FALLTHROUGH; 1484 case Intrinsic::riscv_vlseg7ff: 1485 NF++; 1486 LLVM_FALLTHROUGH; 1487 case Intrinsic::riscv_vlseg6ff: 1488 NF++; 1489 LLVM_FALLTHROUGH; 1490 case Intrinsic::riscv_vlseg5ff: 1491 NF++; 1492 LLVM_FALLTHROUGH; 1493 case Intrinsic::riscv_vlseg4ff: 1494 NF++; 1495 LLVM_FALLTHROUGH; 1496 case Intrinsic::riscv_vlseg3ff: 1497 NF++; 1498 LLVM_FALLTHROUGH; 1499 case Intrinsic::riscv_vlseg2ff: { 1500 NF++; 1501 SDLoc DL(Op); 1502 SmallVector<EVT, 8> EVTs(NF, Op.getValueType()); 1503 EVTs.push_back(MVT::Other); 1504 EVTs.push_back(MVT::Glue); 1505 SDVTList VTs = DAG.getVTList(EVTs); 1506 SDValue Load = 1507 DAG.getNode(RISCVISD::VLSEGFF, DL, VTs, Op.getOperand(0), 1508 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 1509 VTs = DAG.getVTList(Op->getValueType(NF), MVT::Other); 1510 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, 1511 /*Glue*/ Load.getValue(NF + 1)); 1512 SmallVector<SDValue, 8> Results; 1513 for (unsigned i = 0; i < NF; ++i) 1514 Results.push_back(Load.getValue(i)); 1515 Results.push_back(ReadVL); 1516 Results.push_back(Load.getValue(NF)); // Chain. 1517 return DAG.getMergeValues(Results, DL); 1518 } 1519 case Intrinsic::riscv_vlseg8ff_mask: 1520 NF++; 1521 LLVM_FALLTHROUGH; 1522 case Intrinsic::riscv_vlseg7ff_mask: 1523 NF++; 1524 LLVM_FALLTHROUGH; 1525 case Intrinsic::riscv_vlseg6ff_mask: 1526 NF++; 1527 LLVM_FALLTHROUGH; 1528 case Intrinsic::riscv_vlseg5ff_mask: 1529 NF++; 1530 LLVM_FALLTHROUGH; 1531 case Intrinsic::riscv_vlseg4ff_mask: 1532 NF++; 1533 LLVM_FALLTHROUGH; 1534 case Intrinsic::riscv_vlseg3ff_mask: 1535 NF++; 1536 LLVM_FALLTHROUGH; 1537 case Intrinsic::riscv_vlseg2ff_mask: { 1538 NF++; 1539 SDLoc DL(Op); 1540 SmallVector<EVT, 8> EVTs(NF, Op.getValueType()); 1541 EVTs.push_back(MVT::Other); 1542 EVTs.push_back(MVT::Glue); 1543 SDVTList VTs = DAG.getVTList(EVTs); 1544 SmallVector<SDValue, 13> LoadOps; 1545 LoadOps.push_back(Op.getOperand(0)); // Chain. 1546 LoadOps.push_back(Op.getOperand(1)); // Intrinsic ID. 1547 for (unsigned i = 0; i < NF; ++i) 1548 LoadOps.push_back(Op.getOperand(2 + i)); // MaskedOff. 1549 LoadOps.push_back(Op.getOperand(2 + NF)); // Base. 1550 LoadOps.push_back(Op.getOperand(3 + NF)); // Mask. 1551 LoadOps.push_back(Op.getOperand(4 + NF)); // VL. 1552 SDValue Load = DAG.getNode(RISCVISD::VLSEGFF_MASK, DL, VTs, LoadOps); 1553 VTs = DAG.getVTList(Op->getValueType(NF), MVT::Other); 1554 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, 1555 /*Glue*/ Load.getValue(NF + 1)); 1556 SmallVector<SDValue, 8> Results; 1557 for (unsigned i = 0; i < NF; ++i) 1558 Results.push_back(Load.getValue(i)); 1559 Results.push_back(ReadVL); 1560 Results.push_back(Load.getValue(NF)); // Chain. 1561 return DAG.getMergeValues(Results, DL); 1562 } 1563 } 1564 } 1565 1566 // Returns the opcode of the target-specific SDNode that implements the 32-bit 1567 // form of the given Opcode. 1568 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 1569 switch (Opcode) { 1570 default: 1571 llvm_unreachable("Unexpected opcode"); 1572 case ISD::SHL: 1573 return RISCVISD::SLLW; 1574 case ISD::SRA: 1575 return RISCVISD::SRAW; 1576 case ISD::SRL: 1577 return RISCVISD::SRLW; 1578 case ISD::SDIV: 1579 return RISCVISD::DIVW; 1580 case ISD::UDIV: 1581 return RISCVISD::DIVUW; 1582 case ISD::UREM: 1583 return RISCVISD::REMUW; 1584 case ISD::ROTL: 1585 return RISCVISD::ROLW; 1586 case ISD::ROTR: 1587 return RISCVISD::RORW; 1588 case RISCVISD::GREVI: 1589 return RISCVISD::GREVIW; 1590 case RISCVISD::GORCI: 1591 return RISCVISD::GORCIW; 1592 } 1593 } 1594 1595 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 1596 // Because i32 isn't a legal type for RV64, these operations would otherwise 1597 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 1598 // later one because the fact the operation was originally of type i32 is 1599 // lost. 1600 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 1601 unsigned ExtOpc = ISD::ANY_EXTEND) { 1602 SDLoc DL(N); 1603 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 1604 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 1605 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 1606 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 1607 // ReplaceNodeResults requires we maintain the same type for the return value. 1608 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 1609 } 1610 1611 // Converts the given 32-bit operation to a i64 operation with signed extension 1612 // semantic to reduce the signed extension instructions. 1613 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 1614 SDLoc DL(N); 1615 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1616 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 1617 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 1618 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 1619 DAG.getValueType(MVT::i32)); 1620 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 1621 } 1622 1623 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 1624 SmallVectorImpl<SDValue> &Results, 1625 SelectionDAG &DAG) const { 1626 SDLoc DL(N); 1627 switch (N->getOpcode()) { 1628 default: 1629 llvm_unreachable("Don't know how to custom type legalize this operation!"); 1630 case ISD::STRICT_FP_TO_SINT: 1631 case ISD::STRICT_FP_TO_UINT: 1632 case ISD::FP_TO_SINT: 1633 case ISD::FP_TO_UINT: { 1634 bool IsStrict = N->isStrictFPOpcode(); 1635 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1636 "Unexpected custom legalisation"); 1637 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 1638 // If the FP type needs to be softened, emit a library call using the 'si' 1639 // version. If we left it to default legalization we'd end up with 'di'. If 1640 // the FP type doesn't need to be softened just let generic type 1641 // legalization promote the result type. 1642 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 1643 TargetLowering::TypeSoftenFloat) 1644 return; 1645 RTLIB::Libcall LC; 1646 if (N->getOpcode() == ISD::FP_TO_SINT || 1647 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 1648 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 1649 else 1650 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 1651 MakeLibCallOptions CallOptions; 1652 EVT OpVT = Op0.getValueType(); 1653 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 1654 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 1655 SDValue Result; 1656 std::tie(Result, Chain) = 1657 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 1658 Results.push_back(Result); 1659 if (IsStrict) 1660 Results.push_back(Chain); 1661 break; 1662 } 1663 case ISD::READCYCLECOUNTER: { 1664 assert(!Subtarget.is64Bit() && 1665 "READCYCLECOUNTER only has custom type legalization on riscv32"); 1666 1667 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 1668 SDValue RCW = 1669 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 1670 1671 Results.push_back( 1672 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 1673 Results.push_back(RCW.getValue(2)); 1674 break; 1675 } 1676 case ISD::ADD: 1677 case ISD::SUB: 1678 case ISD::MUL: 1679 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1680 "Unexpected custom legalisation"); 1681 if (N->getOperand(1).getOpcode() == ISD::Constant) 1682 return; 1683 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 1684 break; 1685 case ISD::SHL: 1686 case ISD::SRA: 1687 case ISD::SRL: 1688 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1689 "Unexpected custom legalisation"); 1690 if (N->getOperand(1).getOpcode() == ISD::Constant) 1691 return; 1692 Results.push_back(customLegalizeToWOp(N, DAG)); 1693 break; 1694 case ISD::ROTL: 1695 case ISD::ROTR: 1696 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1697 "Unexpected custom legalisation"); 1698 Results.push_back(customLegalizeToWOp(N, DAG)); 1699 break; 1700 case ISD::SDIV: 1701 case ISD::UDIV: 1702 case ISD::UREM: { 1703 MVT VT = N->getSimpleValueType(0); 1704 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 1705 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 1706 "Unexpected custom legalisation"); 1707 if (N->getOperand(0).getOpcode() == ISD::Constant || 1708 N->getOperand(1).getOpcode() == ISD::Constant) 1709 return; 1710 1711 // If the input is i32, use ANY_EXTEND since the W instructions don't read 1712 // the upper 32 bits. For other types we need to sign or zero extend 1713 // based on the opcode. 1714 unsigned ExtOpc = ISD::ANY_EXTEND; 1715 if (VT != MVT::i32) 1716 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 1717 : ISD::ZERO_EXTEND; 1718 1719 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 1720 break; 1721 } 1722 case ISD::BITCAST: { 1723 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1724 Subtarget.hasStdExtF()) || 1725 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 1726 "Unexpected custom legalisation"); 1727 SDValue Op0 = N->getOperand(0); 1728 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 1729 if (Op0.getValueType() != MVT::f16) 1730 return; 1731 SDValue FPConv = 1732 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 1733 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 1734 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1735 Subtarget.hasStdExtF()) { 1736 if (Op0.getValueType() != MVT::f32) 1737 return; 1738 SDValue FPConv = 1739 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 1740 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 1741 } 1742 break; 1743 } 1744 case RISCVISD::GREVI: 1745 case RISCVISD::GORCI: { 1746 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1747 "Unexpected custom legalisation"); 1748 // This is similar to customLegalizeToWOp, except that we pass the second 1749 // operand (a TargetConstant) straight through: it is already of type 1750 // XLenVT. 1751 SDLoc DL(N); 1752 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 1753 SDValue NewOp0 = 1754 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1755 SDValue NewRes = 1756 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 1757 // ReplaceNodeResults requires we maintain the same type for the return 1758 // value. 1759 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 1760 break; 1761 } 1762 case ISD::BSWAP: 1763 case ISD::BITREVERSE: { 1764 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1765 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1766 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 1767 N->getOperand(0)); 1768 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 1769 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 1770 DAG.getTargetConstant(Imm, DL, 1771 Subtarget.getXLenVT())); 1772 // ReplaceNodeResults requires we maintain the same type for the return 1773 // value. 1774 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 1775 break; 1776 } 1777 case ISD::FSHL: 1778 case ISD::FSHR: { 1779 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1780 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 1781 SDValue NewOp0 = 1782 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1783 SDValue NewOp1 = 1784 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 1785 SDValue NewOp2 = 1786 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 1787 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 1788 // Mask the shift amount to 5 bits. 1789 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 1790 DAG.getConstant(0x1f, DL, MVT::i64)); 1791 unsigned Opc = 1792 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 1793 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 1794 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 1795 break; 1796 } 1797 case ISD::EXTRACT_VECTOR_ELT: { 1798 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 1799 // type is illegal (currently only vXi64 RV32). 1800 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 1801 // transferred to the destination register. We issue two of these from the 1802 // upper- and lower- halves of the SEW-bit vector element, slid down to the 1803 // first element. 1804 SDLoc DL(N); 1805 SDValue Vec = N->getOperand(0); 1806 SDValue Idx = N->getOperand(1); 1807 EVT VecVT = Vec.getValueType(); 1808 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 1809 VecVT.getVectorElementType() == MVT::i64 && 1810 "Unexpected EXTRACT_VECTOR_ELT legalization"); 1811 1812 SDValue Slidedown = Vec; 1813 // Unless the index is known to be 0, we must slide the vector down to get 1814 // the desired element into index 0. 1815 if (!isNullConstant(Idx)) 1816 Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, 1817 DAG.getUNDEF(VecVT), Vec, Idx); 1818 1819 MVT XLenVT = Subtarget.getXLenVT(); 1820 // Extract the lower XLEN bits of the correct vector element. 1821 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx); 1822 1823 // To extract the upper XLEN bits of the vector element, shift the first 1824 // element right by 32 bits and re-extract the lower XLEN bits. 1825 SDValue ThirtyTwoV = 1826 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1827 DAG.getConstant(32, DL, Subtarget.getXLenVT())); 1828 SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV); 1829 1830 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx); 1831 1832 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 1833 break; 1834 } 1835 case ISD::INTRINSIC_WO_CHAIN: { 1836 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 1837 switch (IntNo) { 1838 default: 1839 llvm_unreachable( 1840 "Don't know how to custom type legalize this intrinsic!"); 1841 case Intrinsic::riscv_vmv_x_s: { 1842 EVT VT = N->getValueType(0); 1843 assert((VT == MVT::i8 || VT == MVT::i16 || 1844 (Subtarget.is64Bit() && VT == MVT::i32)) && 1845 "Unexpected custom legalisation!"); 1846 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 1847 Subtarget.getXLenVT(), N->getOperand(1)); 1848 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 1849 break; 1850 } 1851 } 1852 break; 1853 } 1854 } 1855 } 1856 1857 // A structure to hold one of the bit-manipulation patterns below. Together, a 1858 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 1859 // (or (and (shl x, 1), 0xAAAAAAAA), 1860 // (and (srl x, 1), 0x55555555)) 1861 struct RISCVBitmanipPat { 1862 SDValue Op; 1863 unsigned ShAmt; 1864 bool IsSHL; 1865 1866 bool formsPairWith(const RISCVBitmanipPat &Other) const { 1867 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 1868 } 1869 }; 1870 1871 // Matches any of the following bit-manipulation patterns: 1872 // (and (shl x, 1), (0x55555555 << 1)) 1873 // (and (srl x, 1), 0x55555555) 1874 // (shl (and x, 0x55555555), 1) 1875 // (srl (and x, (0x55555555 << 1)), 1) 1876 // where the shift amount and mask may vary thus: 1877 // [1] = 0x55555555 / 0xAAAAAAAA 1878 // [2] = 0x33333333 / 0xCCCCCCCC 1879 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 1880 // [8] = 0x00FF00FF / 0xFF00FF00 1881 // [16] = 0x0000FFFF / 0xFFFFFFFF 1882 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 1883 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) { 1884 Optional<uint64_t> Mask; 1885 // Optionally consume a mask around the shift operation. 1886 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 1887 Mask = Op.getConstantOperandVal(1); 1888 Op = Op.getOperand(0); 1889 } 1890 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 1891 return None; 1892 bool IsSHL = Op.getOpcode() == ISD::SHL; 1893 1894 if (!isa<ConstantSDNode>(Op.getOperand(1))) 1895 return None; 1896 auto ShAmt = Op.getConstantOperandVal(1); 1897 1898 if (!isPowerOf2_64(ShAmt)) 1899 return None; 1900 1901 // These are the unshifted masks which we use to match bit-manipulation 1902 // patterns. They may be shifted left in certain circumstances. 1903 static const uint64_t BitmanipMasks[] = { 1904 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 1905 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL, 1906 }; 1907 1908 unsigned MaskIdx = Log2_64(ShAmt); 1909 if (MaskIdx >= array_lengthof(BitmanipMasks)) 1910 return None; 1911 1912 auto Src = Op.getOperand(0); 1913 1914 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 1915 auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 1916 1917 // The expected mask is shifted left when the AND is found around SHL 1918 // patterns. 1919 // ((x >> 1) & 0x55555555) 1920 // ((x << 1) & 0xAAAAAAAA) 1921 bool SHLExpMask = IsSHL; 1922 1923 if (!Mask) { 1924 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 1925 // the mask is all ones: consume that now. 1926 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 1927 Mask = Src.getConstantOperandVal(1); 1928 Src = Src.getOperand(0); 1929 // The expected mask is now in fact shifted left for SRL, so reverse the 1930 // decision. 1931 // ((x & 0xAAAAAAAA) >> 1) 1932 // ((x & 0x55555555) << 1) 1933 SHLExpMask = !SHLExpMask; 1934 } else { 1935 // Use a default shifted mask of all-ones if there's no AND, truncated 1936 // down to the expected width. This simplifies the logic later on. 1937 Mask = maskTrailingOnes<uint64_t>(Width); 1938 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 1939 } 1940 } 1941 1942 if (SHLExpMask) 1943 ExpMask <<= ShAmt; 1944 1945 if (Mask != ExpMask) 1946 return None; 1947 1948 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 1949 } 1950 1951 // Match the following pattern as a GREVI(W) operation 1952 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 1953 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 1954 const RISCVSubtarget &Subtarget) { 1955 EVT VT = Op.getValueType(); 1956 1957 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 1958 auto LHS = matchRISCVBitmanipPat(Op.getOperand(0)); 1959 auto RHS = matchRISCVBitmanipPat(Op.getOperand(1)); 1960 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 1961 SDLoc DL(Op); 1962 return DAG.getNode( 1963 RISCVISD::GREVI, DL, VT, LHS->Op, 1964 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 1965 } 1966 } 1967 return SDValue(); 1968 } 1969 1970 // Matches any the following pattern as a GORCI(W) operation 1971 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 1972 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 1973 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 1974 // Note that with the variant of 3., 1975 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 1976 // the inner pattern will first be matched as GREVI and then the outer 1977 // pattern will be matched to GORC via the first rule above. 1978 // 4. (or (rotl/rotr x, bitwidth/2), x) 1979 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 1980 const RISCVSubtarget &Subtarget) { 1981 EVT VT = Op.getValueType(); 1982 1983 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 1984 SDLoc DL(Op); 1985 SDValue Op0 = Op.getOperand(0); 1986 SDValue Op1 = Op.getOperand(1); 1987 1988 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 1989 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 1990 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 1991 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 1992 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 1993 if ((Reverse.getOpcode() == ISD::ROTL || 1994 Reverse.getOpcode() == ISD::ROTR) && 1995 Reverse.getOperand(0) == X && 1996 isa<ConstantSDNode>(Reverse.getOperand(1))) { 1997 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 1998 if (RotAmt == (VT.getSizeInBits() / 2)) 1999 return DAG.getNode( 2000 RISCVISD::GORCI, DL, VT, X, 2001 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 2002 } 2003 return SDValue(); 2004 }; 2005 2006 // Check for either commutable permutation of (or (GREVI x, shamt), x) 2007 if (SDValue V = MatchOROfReverse(Op0, Op1)) 2008 return V; 2009 if (SDValue V = MatchOROfReverse(Op1, Op0)) 2010 return V; 2011 2012 // OR is commutable so canonicalize its OR operand to the left 2013 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 2014 std::swap(Op0, Op1); 2015 if (Op0.getOpcode() != ISD::OR) 2016 return SDValue(); 2017 SDValue OrOp0 = Op0.getOperand(0); 2018 SDValue OrOp1 = Op0.getOperand(1); 2019 auto LHS = matchRISCVBitmanipPat(OrOp0); 2020 // OR is commutable so swap the operands and try again: x might have been 2021 // on the left 2022 if (!LHS) { 2023 std::swap(OrOp0, OrOp1); 2024 LHS = matchRISCVBitmanipPat(OrOp0); 2025 } 2026 auto RHS = matchRISCVBitmanipPat(Op1); 2027 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 2028 return DAG.getNode( 2029 RISCVISD::GORCI, DL, VT, LHS->Op, 2030 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 2031 } 2032 } 2033 return SDValue(); 2034 } 2035 2036 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 2037 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 2038 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 2039 // not undo itself, but they are redundant. 2040 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 2041 unsigned ShAmt1 = N->getConstantOperandVal(1); 2042 SDValue Src = N->getOperand(0); 2043 2044 if (Src.getOpcode() != N->getOpcode()) 2045 return SDValue(); 2046 2047 unsigned ShAmt2 = Src.getConstantOperandVal(1); 2048 Src = Src.getOperand(0); 2049 2050 unsigned CombinedShAmt; 2051 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 2052 CombinedShAmt = ShAmt1 | ShAmt2; 2053 else 2054 CombinedShAmt = ShAmt1 ^ ShAmt2; 2055 2056 if (CombinedShAmt == 0) 2057 return Src; 2058 2059 SDLoc DL(N); 2060 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 2061 DAG.getTargetConstant(CombinedShAmt, DL, 2062 N->getOperand(1).getValueType())); 2063 } 2064 2065 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 2066 DAGCombinerInfo &DCI) const { 2067 SelectionDAG &DAG = DCI.DAG; 2068 2069 switch (N->getOpcode()) { 2070 default: 2071 break; 2072 case RISCVISD::SplitF64: { 2073 SDValue Op0 = N->getOperand(0); 2074 // If the input to SplitF64 is just BuildPairF64 then the operation is 2075 // redundant. Instead, use BuildPairF64's operands directly. 2076 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 2077 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 2078 2079 SDLoc DL(N); 2080 2081 // It's cheaper to materialise two 32-bit integers than to load a double 2082 // from the constant pool and transfer it to integer registers through the 2083 // stack. 2084 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 2085 APInt V = C->getValueAPF().bitcastToAPInt(); 2086 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 2087 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 2088 return DCI.CombineTo(N, Lo, Hi); 2089 } 2090 2091 // This is a target-specific version of a DAGCombine performed in 2092 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2093 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2094 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2095 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2096 !Op0.getNode()->hasOneUse()) 2097 break; 2098 SDValue NewSplitF64 = 2099 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 2100 Op0.getOperand(0)); 2101 SDValue Lo = NewSplitF64.getValue(0); 2102 SDValue Hi = NewSplitF64.getValue(1); 2103 APInt SignBit = APInt::getSignMask(32); 2104 if (Op0.getOpcode() == ISD::FNEG) { 2105 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 2106 DAG.getConstant(SignBit, DL, MVT::i32)); 2107 return DCI.CombineTo(N, Lo, NewHi); 2108 } 2109 assert(Op0.getOpcode() == ISD::FABS); 2110 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 2111 DAG.getConstant(~SignBit, DL, MVT::i32)); 2112 return DCI.CombineTo(N, Lo, NewHi); 2113 } 2114 case RISCVISD::SLLW: 2115 case RISCVISD::SRAW: 2116 case RISCVISD::SRLW: 2117 case RISCVISD::ROLW: 2118 case RISCVISD::RORW: { 2119 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 2120 SDValue LHS = N->getOperand(0); 2121 SDValue RHS = N->getOperand(1); 2122 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 2123 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 2124 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 2125 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 2126 if (N->getOpcode() != ISD::DELETED_NODE) 2127 DCI.AddToWorklist(N); 2128 return SDValue(N, 0); 2129 } 2130 break; 2131 } 2132 case RISCVISD::FSLW: 2133 case RISCVISD::FSRW: { 2134 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 2135 // read. 2136 SDValue Op0 = N->getOperand(0); 2137 SDValue Op1 = N->getOperand(1); 2138 SDValue ShAmt = N->getOperand(2); 2139 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2140 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 2141 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 2142 SimplifyDemandedBits(Op1, OpMask, DCI) || 2143 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 2144 if (N->getOpcode() != ISD::DELETED_NODE) 2145 DCI.AddToWorklist(N); 2146 return SDValue(N, 0); 2147 } 2148 break; 2149 } 2150 case RISCVISD::GREVIW: 2151 case RISCVISD::GORCIW: { 2152 // Only the lower 32 bits of the first operand are read 2153 SDValue Op0 = N->getOperand(0); 2154 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 2155 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 2156 if (N->getOpcode() != ISD::DELETED_NODE) 2157 DCI.AddToWorklist(N); 2158 return SDValue(N, 0); 2159 } 2160 2161 return combineGREVI_GORCI(N, DCI.DAG); 2162 } 2163 case RISCVISD::FMV_X_ANYEXTW_RV64: { 2164 SDLoc DL(N); 2165 SDValue Op0 = N->getOperand(0); 2166 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 2167 // conversion is unnecessary and can be replaced with an ANY_EXTEND 2168 // of the FMV_W_X_RV64 operand. 2169 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 2170 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 2171 "Unexpected value type!"); 2172 return Op0.getOperand(0); 2173 } 2174 2175 // This is a target-specific version of a DAGCombine performed in 2176 // DAGCombiner::visitBITCAST. It performs the equivalent of: 2177 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 2178 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 2179 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 2180 !Op0.getNode()->hasOneUse()) 2181 break; 2182 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 2183 Op0.getOperand(0)); 2184 APInt SignBit = APInt::getSignMask(32).sext(64); 2185 if (Op0.getOpcode() == ISD::FNEG) 2186 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 2187 DAG.getConstant(SignBit, DL, MVT::i64)); 2188 2189 assert(Op0.getOpcode() == ISD::FABS); 2190 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 2191 DAG.getConstant(~SignBit, DL, MVT::i64)); 2192 } 2193 case RISCVISD::GREVI: 2194 case RISCVISD::GORCI: 2195 return combineGREVI_GORCI(N, DCI.DAG); 2196 case ISD::OR: 2197 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 2198 return GREV; 2199 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 2200 return GORC; 2201 break; 2202 case RISCVISD::SELECT_CC: { 2203 // Transform 2204 // (select_cc (xor X, 1), 0, setne, trueV, falseV) -> 2205 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 2206 // This can occur when legalizing some floating point comparisons. 2207 SDValue LHS = N->getOperand(0); 2208 SDValue RHS = N->getOperand(1); 2209 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); 2210 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2211 if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) && 2212 LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) && 2213 DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) { 2214 SDLoc DL(N); 2215 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 2216 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); 2217 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 2218 {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3), 2219 N->getOperand(4)}); 2220 } 2221 break; 2222 } 2223 case ISD::SETCC: { 2224 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. 2225 // Comparing with 0 may allow us to fold into bnez/beqz. 2226 SDValue LHS = N->getOperand(0); 2227 SDValue RHS = N->getOperand(1); 2228 if (LHS.getValueType().isScalableVector()) 2229 break; 2230 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2231 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 2232 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && 2233 DAG.MaskedValueIsZero(LHS, Mask)) { 2234 SDLoc DL(N); 2235 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); 2236 CC = ISD::getSetCCInverse(CC, LHS.getValueType()); 2237 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); 2238 } 2239 break; 2240 } 2241 } 2242 2243 return SDValue(); 2244 } 2245 2246 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 2247 const SDNode *N, CombineLevel Level) const { 2248 // The following folds are only desirable if `(OP _, c1 << c2)` can be 2249 // materialised in fewer instructions than `(OP _, c1)`: 2250 // 2251 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 2252 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 2253 SDValue N0 = N->getOperand(0); 2254 EVT Ty = N0.getValueType(); 2255 if (Ty.isScalarInteger() && 2256 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 2257 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 2258 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2259 if (C1 && C2) { 2260 const APInt &C1Int = C1->getAPIntValue(); 2261 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 2262 2263 // We can materialise `c1 << c2` into an add immediate, so it's "free", 2264 // and the combine should happen, to potentially allow further combines 2265 // later. 2266 if (ShiftedC1Int.getMinSignedBits() <= 64 && 2267 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 2268 return true; 2269 2270 // We can materialise `c1` in an add immediate, so it's "free", and the 2271 // combine should be prevented. 2272 if (C1Int.getMinSignedBits() <= 64 && 2273 isLegalAddImmediate(C1Int.getSExtValue())) 2274 return false; 2275 2276 // Neither constant will fit into an immediate, so find materialisation 2277 // costs. 2278 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 2279 Subtarget.is64Bit()); 2280 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 2281 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 2282 2283 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 2284 // combine should be prevented. 2285 if (C1Cost < ShiftedC1Cost) 2286 return false; 2287 } 2288 } 2289 return true; 2290 } 2291 2292 bool RISCVTargetLowering::targetShrinkDemandedConstant( 2293 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 2294 TargetLoweringOpt &TLO) const { 2295 // Delay this optimization as late as possible. 2296 if (!TLO.LegalOps) 2297 return false; 2298 2299 EVT VT = Op.getValueType(); 2300 if (VT.isVector()) 2301 return false; 2302 2303 // Only handle AND for now. 2304 if (Op.getOpcode() != ISD::AND) 2305 return false; 2306 2307 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 2308 if (!C) 2309 return false; 2310 2311 const APInt &Mask = C->getAPIntValue(); 2312 2313 // Clear all non-demanded bits initially. 2314 APInt ShrunkMask = Mask & DemandedBits; 2315 2316 // If the shrunk mask fits in sign extended 12 bits, let the target 2317 // independent code apply it. 2318 if (ShrunkMask.isSignedIntN(12)) 2319 return false; 2320 2321 // Try to make a smaller immediate by setting undemanded bits. 2322 2323 // We need to be able to make a negative number through a combination of mask 2324 // and undemanded bits. 2325 APInt ExpandedMask = Mask | ~DemandedBits; 2326 if (!ExpandedMask.isNegative()) 2327 return false; 2328 2329 // What is the fewest number of bits we need to represent the negative number. 2330 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 2331 2332 // Try to make a 12 bit negative immediate. If that fails try to make a 32 2333 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 2334 APInt NewMask = ShrunkMask; 2335 if (MinSignedBits <= 12) 2336 NewMask.setBitsFrom(11); 2337 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 2338 NewMask.setBitsFrom(31); 2339 else 2340 return false; 2341 2342 // Sanity check that our new mask is a subset of the demanded mask. 2343 assert(NewMask.isSubsetOf(ExpandedMask)); 2344 2345 // If we aren't changing the mask, just return true to keep it and prevent 2346 // the caller from optimizing. 2347 if (NewMask == Mask) 2348 return true; 2349 2350 // Replace the constant with the new mask. 2351 SDLoc DL(Op); 2352 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 2353 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 2354 return TLO.CombineTo(Op, NewOp); 2355 } 2356 2357 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 2358 KnownBits &Known, 2359 const APInt &DemandedElts, 2360 const SelectionDAG &DAG, 2361 unsigned Depth) const { 2362 unsigned BitWidth = Known.getBitWidth(); 2363 unsigned Opc = Op.getOpcode(); 2364 assert((Opc >= ISD::BUILTIN_OP_END || 2365 Opc == ISD::INTRINSIC_WO_CHAIN || 2366 Opc == ISD::INTRINSIC_W_CHAIN || 2367 Opc == ISD::INTRINSIC_VOID) && 2368 "Should use MaskedValueIsZero if you don't know whether Op" 2369 " is a target node!"); 2370 2371 Known.resetAll(); 2372 switch (Opc) { 2373 default: break; 2374 case RISCVISD::REMUW: { 2375 KnownBits Known2; 2376 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2377 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2378 // We only care about the lower 32 bits. 2379 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 2380 // Restore the original width by sign extending. 2381 Known = Known.sext(BitWidth); 2382 break; 2383 } 2384 case RISCVISD::DIVUW: { 2385 KnownBits Known2; 2386 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 2387 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 2388 // We only care about the lower 32 bits. 2389 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 2390 // Restore the original width by sign extending. 2391 Known = Known.sext(BitWidth); 2392 break; 2393 } 2394 case RISCVISD::READ_VLENB: 2395 // We assume VLENB is at least 8 bytes. 2396 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. 2397 Known.Zero.setLowBits(3); 2398 break; 2399 } 2400 } 2401 2402 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 2403 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 2404 unsigned Depth) const { 2405 switch (Op.getOpcode()) { 2406 default: 2407 break; 2408 case RISCVISD::SLLW: 2409 case RISCVISD::SRAW: 2410 case RISCVISD::SRLW: 2411 case RISCVISD::DIVW: 2412 case RISCVISD::DIVUW: 2413 case RISCVISD::REMUW: 2414 case RISCVISD::ROLW: 2415 case RISCVISD::RORW: 2416 case RISCVISD::GREVIW: 2417 case RISCVISD::GORCIW: 2418 case RISCVISD::FSLW: 2419 case RISCVISD::FSRW: 2420 // TODO: As the result is sign-extended, this is conservatively correct. A 2421 // more precise answer could be calculated for SRAW depending on known 2422 // bits in the shift amount. 2423 return 33; 2424 case RISCVISD::VMV_X_S: 2425 // The number of sign bits of the scalar result is computed by obtaining the 2426 // element type of the input vector operand, subtracting its width from the 2427 // XLEN, and then adding one (sign bit within the element type). If the 2428 // element type is wider than XLen, the least-significant XLEN bits are 2429 // taken. 2430 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 2431 return 1; 2432 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 2433 } 2434 2435 return 1; 2436 } 2437 2438 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 2439 MachineBasicBlock *BB) { 2440 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 2441 2442 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 2443 // Should the count have wrapped while it was being read, we need to try 2444 // again. 2445 // ... 2446 // read: 2447 // rdcycleh x3 # load high word of cycle 2448 // rdcycle x2 # load low word of cycle 2449 // rdcycleh x4 # load high word of cycle 2450 // bne x3, x4, read # check if high word reads match, otherwise try again 2451 // ... 2452 2453 MachineFunction &MF = *BB->getParent(); 2454 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2455 MachineFunction::iterator It = ++BB->getIterator(); 2456 2457 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 2458 MF.insert(It, LoopMBB); 2459 2460 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 2461 MF.insert(It, DoneMBB); 2462 2463 // Transfer the remainder of BB and its successor edges to DoneMBB. 2464 DoneMBB->splice(DoneMBB->begin(), BB, 2465 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 2466 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 2467 2468 BB->addSuccessor(LoopMBB); 2469 2470 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2471 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 2472 Register LoReg = MI.getOperand(0).getReg(); 2473 Register HiReg = MI.getOperand(1).getReg(); 2474 DebugLoc DL = MI.getDebugLoc(); 2475 2476 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 2477 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 2478 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 2479 .addReg(RISCV::X0); 2480 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 2481 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 2482 .addReg(RISCV::X0); 2483 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 2484 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 2485 .addReg(RISCV::X0); 2486 2487 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 2488 .addReg(HiReg) 2489 .addReg(ReadAgainReg) 2490 .addMBB(LoopMBB); 2491 2492 LoopMBB->addSuccessor(LoopMBB); 2493 LoopMBB->addSuccessor(DoneMBB); 2494 2495 MI.eraseFromParent(); 2496 2497 return DoneMBB; 2498 } 2499 2500 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 2501 MachineBasicBlock *BB) { 2502 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 2503 2504 MachineFunction &MF = *BB->getParent(); 2505 DebugLoc DL = MI.getDebugLoc(); 2506 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2507 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 2508 Register LoReg = MI.getOperand(0).getReg(); 2509 Register HiReg = MI.getOperand(1).getReg(); 2510 Register SrcReg = MI.getOperand(2).getReg(); 2511 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 2512 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 2513 2514 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 2515 RI); 2516 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 2517 MachineMemOperand *MMOLo = 2518 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 2519 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 2520 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 2521 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 2522 .addFrameIndex(FI) 2523 .addImm(0) 2524 .addMemOperand(MMOLo); 2525 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 2526 .addFrameIndex(FI) 2527 .addImm(4) 2528 .addMemOperand(MMOHi); 2529 MI.eraseFromParent(); // The pseudo instruction is gone now. 2530 return BB; 2531 } 2532 2533 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 2534 MachineBasicBlock *BB) { 2535 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 2536 "Unexpected instruction"); 2537 2538 MachineFunction &MF = *BB->getParent(); 2539 DebugLoc DL = MI.getDebugLoc(); 2540 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2541 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 2542 Register DstReg = MI.getOperand(0).getReg(); 2543 Register LoReg = MI.getOperand(1).getReg(); 2544 Register HiReg = MI.getOperand(2).getReg(); 2545 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 2546 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 2547 2548 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 2549 MachineMemOperand *MMOLo = 2550 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 2551 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 2552 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 2553 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 2554 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 2555 .addFrameIndex(FI) 2556 .addImm(0) 2557 .addMemOperand(MMOLo); 2558 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 2559 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 2560 .addFrameIndex(FI) 2561 .addImm(4) 2562 .addMemOperand(MMOHi); 2563 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 2564 MI.eraseFromParent(); // The pseudo instruction is gone now. 2565 return BB; 2566 } 2567 2568 static bool isSelectPseudo(MachineInstr &MI) { 2569 switch (MI.getOpcode()) { 2570 default: 2571 return false; 2572 case RISCV::Select_GPR_Using_CC_GPR: 2573 case RISCV::Select_FPR16_Using_CC_GPR: 2574 case RISCV::Select_FPR32_Using_CC_GPR: 2575 case RISCV::Select_FPR64_Using_CC_GPR: 2576 return true; 2577 } 2578 } 2579 2580 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 2581 MachineBasicBlock *BB) { 2582 // To "insert" Select_* instructions, we actually have to insert the triangle 2583 // control-flow pattern. The incoming instructions know the destination vreg 2584 // to set, the condition code register to branch on, the true/false values to 2585 // select between, and the condcode to use to select the appropriate branch. 2586 // 2587 // We produce the following control flow: 2588 // HeadMBB 2589 // | \ 2590 // | IfFalseMBB 2591 // | / 2592 // TailMBB 2593 // 2594 // When we find a sequence of selects we attempt to optimize their emission 2595 // by sharing the control flow. Currently we only handle cases where we have 2596 // multiple selects with the exact same condition (same LHS, RHS and CC). 2597 // The selects may be interleaved with other instructions if the other 2598 // instructions meet some requirements we deem safe: 2599 // - They are debug instructions. Otherwise, 2600 // - They do not have side-effects, do not access memory and their inputs do 2601 // not depend on the results of the select pseudo-instructions. 2602 // The TrueV/FalseV operands of the selects cannot depend on the result of 2603 // previous selects in the sequence. 2604 // These conditions could be further relaxed. See the X86 target for a 2605 // related approach and more information. 2606 Register LHS = MI.getOperand(1).getReg(); 2607 Register RHS = MI.getOperand(2).getReg(); 2608 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 2609 2610 SmallVector<MachineInstr *, 4> SelectDebugValues; 2611 SmallSet<Register, 4> SelectDests; 2612 SelectDests.insert(MI.getOperand(0).getReg()); 2613 2614 MachineInstr *LastSelectPseudo = &MI; 2615 2616 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 2617 SequenceMBBI != E; ++SequenceMBBI) { 2618 if (SequenceMBBI->isDebugInstr()) 2619 continue; 2620 else if (isSelectPseudo(*SequenceMBBI)) { 2621 if (SequenceMBBI->getOperand(1).getReg() != LHS || 2622 SequenceMBBI->getOperand(2).getReg() != RHS || 2623 SequenceMBBI->getOperand(3).getImm() != CC || 2624 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 2625 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 2626 break; 2627 LastSelectPseudo = &*SequenceMBBI; 2628 SequenceMBBI->collectDebugValues(SelectDebugValues); 2629 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 2630 } else { 2631 if (SequenceMBBI->hasUnmodeledSideEffects() || 2632 SequenceMBBI->mayLoadOrStore()) 2633 break; 2634 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 2635 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 2636 })) 2637 break; 2638 } 2639 } 2640 2641 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 2642 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2643 DebugLoc DL = MI.getDebugLoc(); 2644 MachineFunction::iterator I = ++BB->getIterator(); 2645 2646 MachineBasicBlock *HeadMBB = BB; 2647 MachineFunction *F = BB->getParent(); 2648 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 2649 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 2650 2651 F->insert(I, IfFalseMBB); 2652 F->insert(I, TailMBB); 2653 2654 // Transfer debug instructions associated with the selects to TailMBB. 2655 for (MachineInstr *DebugInstr : SelectDebugValues) { 2656 TailMBB->push_back(DebugInstr->removeFromParent()); 2657 } 2658 2659 // Move all instructions after the sequence to TailMBB. 2660 TailMBB->splice(TailMBB->end(), HeadMBB, 2661 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 2662 // Update machine-CFG edges by transferring all successors of the current 2663 // block to the new block which will contain the Phi nodes for the selects. 2664 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 2665 // Set the successors for HeadMBB. 2666 HeadMBB->addSuccessor(IfFalseMBB); 2667 HeadMBB->addSuccessor(TailMBB); 2668 2669 // Insert appropriate branch. 2670 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 2671 2672 BuildMI(HeadMBB, DL, TII.get(Opcode)) 2673 .addReg(LHS) 2674 .addReg(RHS) 2675 .addMBB(TailMBB); 2676 2677 // IfFalseMBB just falls through to TailMBB. 2678 IfFalseMBB->addSuccessor(TailMBB); 2679 2680 // Create PHIs for all of the select pseudo-instructions. 2681 auto SelectMBBI = MI.getIterator(); 2682 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 2683 auto InsertionPoint = TailMBB->begin(); 2684 while (SelectMBBI != SelectEnd) { 2685 auto Next = std::next(SelectMBBI); 2686 if (isSelectPseudo(*SelectMBBI)) { 2687 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 2688 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 2689 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 2690 .addReg(SelectMBBI->getOperand(4).getReg()) 2691 .addMBB(HeadMBB) 2692 .addReg(SelectMBBI->getOperand(5).getReg()) 2693 .addMBB(IfFalseMBB); 2694 SelectMBBI->eraseFromParent(); 2695 } 2696 SelectMBBI = Next; 2697 } 2698 2699 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 2700 return TailMBB; 2701 } 2702 2703 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 2704 int VLIndex, unsigned SEWIndex, 2705 RISCVVLMUL VLMul, bool WritesElement0) { 2706 MachineFunction &MF = *BB->getParent(); 2707 DebugLoc DL = MI.getDebugLoc(); 2708 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2709 2710 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 2711 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 2712 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 2713 2714 MachineRegisterInfo &MRI = MF.getRegInfo(); 2715 2716 // VL and VTYPE are alive here. 2717 MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)); 2718 2719 if (VLIndex >= 0) { 2720 // Set VL (rs1 != X0). 2721 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 2722 MIB.addReg(DestReg, RegState::Define | RegState::Dead) 2723 .addReg(MI.getOperand(VLIndex).getReg()); 2724 } else 2725 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 2726 MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead) 2727 .addReg(RISCV::X0, RegState::Kill); 2728 2729 // Default to tail agnostic unless the destination is tied to a source. In 2730 // that case the user would have some control over the tail values. The tail 2731 // policy is also ignored on instructions that only update element 0 like 2732 // vmv.s.x or reductions so use agnostic there to match the common case. 2733 // FIXME: This is conservatively correct, but we might want to detect that 2734 // the input is undefined. 2735 bool TailAgnostic = true; 2736 unsigned UseOpIdx; 2737 if (MI.isRegTiedToUseOperand(0, &UseOpIdx) && !WritesElement0) { 2738 TailAgnostic = false; 2739 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 2740 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 2741 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); 2742 if (UseMI && UseMI->isImplicitDef()) 2743 TailAgnostic = true; 2744 } 2745 2746 // For simplicity we reuse the vtype representation here. 2747 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, 2748 /*TailAgnostic*/ TailAgnostic, 2749 /*MaskAgnostic*/ false)); 2750 2751 // Remove (now) redundant operands from pseudo 2752 MI.getOperand(SEWIndex).setImm(-1); 2753 if (VLIndex >= 0) { 2754 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 2755 MI.getOperand(VLIndex).setIsKill(false); 2756 } 2757 2758 return BB; 2759 } 2760 2761 MachineBasicBlock * 2762 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 2763 MachineBasicBlock *BB) const { 2764 uint64_t TSFlags = MI.getDesc().TSFlags; 2765 2766 if (TSFlags & RISCVII::HasSEWOpMask) { 2767 unsigned NumOperands = MI.getNumExplicitOperands(); 2768 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; 2769 unsigned SEWIndex = NumOperands - 1; 2770 bool WritesElement0 = TSFlags & RISCVII::WritesElement0Mask; 2771 2772 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> 2773 RISCVII::VLMulShift); 2774 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, WritesElement0); 2775 } 2776 2777 switch (MI.getOpcode()) { 2778 default: 2779 llvm_unreachable("Unexpected instr type to insert"); 2780 case RISCV::ReadCycleWide: 2781 assert(!Subtarget.is64Bit() && 2782 "ReadCycleWrite is only to be used on riscv32"); 2783 return emitReadCycleWidePseudo(MI, BB); 2784 case RISCV::Select_GPR_Using_CC_GPR: 2785 case RISCV::Select_FPR16_Using_CC_GPR: 2786 case RISCV::Select_FPR32_Using_CC_GPR: 2787 case RISCV::Select_FPR64_Using_CC_GPR: 2788 return emitSelectPseudo(MI, BB); 2789 case RISCV::BuildPairF64Pseudo: 2790 return emitBuildPairF64Pseudo(MI, BB); 2791 case RISCV::SplitF64Pseudo: 2792 return emitSplitF64Pseudo(MI, BB); 2793 } 2794 } 2795 2796 // Calling Convention Implementation. 2797 // The expectations for frontend ABI lowering vary from target to target. 2798 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 2799 // details, but this is a longer term goal. For now, we simply try to keep the 2800 // role of the frontend as simple and well-defined as possible. The rules can 2801 // be summarised as: 2802 // * Never split up large scalar arguments. We handle them here. 2803 // * If a hardfloat calling convention is being used, and the struct may be 2804 // passed in a pair of registers (fp+fp, int+fp), and both registers are 2805 // available, then pass as two separate arguments. If either the GPRs or FPRs 2806 // are exhausted, then pass according to the rule below. 2807 // * If a struct could never be passed in registers or directly in a stack 2808 // slot (as it is larger than 2*XLEN and the floating point rules don't 2809 // apply), then pass it using a pointer with the byval attribute. 2810 // * If a struct is less than 2*XLEN, then coerce to either a two-element 2811 // word-sized array or a 2*XLEN scalar (depending on alignment). 2812 // * The frontend can determine whether a struct is returned by reference or 2813 // not based on its size and fields. If it will be returned by reference, the 2814 // frontend must modify the prototype so a pointer with the sret annotation is 2815 // passed as the first argument. This is not necessary for large scalar 2816 // returns. 2817 // * Struct return values and varargs should be coerced to structs containing 2818 // register-size fields in the same situations they would be for fixed 2819 // arguments. 2820 2821 static const MCPhysReg ArgGPRs[] = { 2822 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 2823 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 2824 }; 2825 static const MCPhysReg ArgFPR16s[] = { 2826 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 2827 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 2828 }; 2829 static const MCPhysReg ArgFPR32s[] = { 2830 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 2831 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 2832 }; 2833 static const MCPhysReg ArgFPR64s[] = { 2834 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 2835 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 2836 }; 2837 // This is an interim calling convention and it may be changed in the future. 2838 static const MCPhysReg ArgVRs[] = { 2839 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 2840 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 2841 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 2842 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 2843 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 2844 RISCV::V20M2, RISCV::V22M2}; 2845 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 2846 RISCV::V20M4}; 2847 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 2848 2849 // Pass a 2*XLEN argument that has been split into two XLEN values through 2850 // registers or the stack as necessary. 2851 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 2852 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 2853 MVT ValVT2, MVT LocVT2, 2854 ISD::ArgFlagsTy ArgFlags2) { 2855 unsigned XLenInBytes = XLen / 8; 2856 if (Register Reg = State.AllocateReg(ArgGPRs)) { 2857 // At least one half can be passed via register. 2858 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 2859 VA1.getLocVT(), CCValAssign::Full)); 2860 } else { 2861 // Both halves must be passed on the stack, with proper alignment. 2862 Align StackAlign = 2863 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 2864 State.addLoc( 2865 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 2866 State.AllocateStack(XLenInBytes, StackAlign), 2867 VA1.getLocVT(), CCValAssign::Full)); 2868 State.addLoc(CCValAssign::getMem( 2869 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 2870 LocVT2, CCValAssign::Full)); 2871 return false; 2872 } 2873 2874 if (Register Reg = State.AllocateReg(ArgGPRs)) { 2875 // The second half can also be passed via register. 2876 State.addLoc( 2877 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 2878 } else { 2879 // The second half is passed via the stack, without additional alignment. 2880 State.addLoc(CCValAssign::getMem( 2881 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 2882 LocVT2, CCValAssign::Full)); 2883 } 2884 2885 return false; 2886 } 2887 2888 // Implements the RISC-V calling convention. Returns true upon failure. 2889 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 2890 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 2891 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 2892 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 2893 Optional<unsigned> FirstMaskArgument) { 2894 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 2895 assert(XLen == 32 || XLen == 64); 2896 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 2897 2898 // Any return value split in to more than two values can't be returned 2899 // directly. 2900 if (IsRet && ValNo > 1) 2901 return true; 2902 2903 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 2904 // variadic argument, or if no F16/F32 argument registers are available. 2905 bool UseGPRForF16_F32 = true; 2906 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 2907 // variadic argument, or if no F64 argument registers are available. 2908 bool UseGPRForF64 = true; 2909 2910 switch (ABI) { 2911 default: 2912 llvm_unreachable("Unexpected ABI"); 2913 case RISCVABI::ABI_ILP32: 2914 case RISCVABI::ABI_LP64: 2915 break; 2916 case RISCVABI::ABI_ILP32F: 2917 case RISCVABI::ABI_LP64F: 2918 UseGPRForF16_F32 = !IsFixed; 2919 break; 2920 case RISCVABI::ABI_ILP32D: 2921 case RISCVABI::ABI_LP64D: 2922 UseGPRForF16_F32 = !IsFixed; 2923 UseGPRForF64 = !IsFixed; 2924 break; 2925 } 2926 2927 // FPR16, FPR32, and FPR64 alias each other. 2928 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 2929 UseGPRForF16_F32 = true; 2930 UseGPRForF64 = true; 2931 } 2932 2933 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 2934 // similar local variables rather than directly checking against the target 2935 // ABI. 2936 2937 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 2938 LocVT = XLenVT; 2939 LocInfo = CCValAssign::BCvt; 2940 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 2941 LocVT = MVT::i64; 2942 LocInfo = CCValAssign::BCvt; 2943 } 2944 2945 // If this is a variadic argument, the RISC-V calling convention requires 2946 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 2947 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 2948 // be used regardless of whether the original argument was split during 2949 // legalisation or not. The argument will not be passed by registers if the 2950 // original type is larger than 2*XLEN, so the register alignment rule does 2951 // not apply. 2952 unsigned TwoXLenInBytes = (2 * XLen) / 8; 2953 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 2954 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 2955 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 2956 // Skip 'odd' register if necessary. 2957 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 2958 State.AllocateReg(ArgGPRs); 2959 } 2960 2961 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 2962 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 2963 State.getPendingArgFlags(); 2964 2965 assert(PendingLocs.size() == PendingArgFlags.size() && 2966 "PendingLocs and PendingArgFlags out of sync"); 2967 2968 // Handle passing f64 on RV32D with a soft float ABI or when floating point 2969 // registers are exhausted. 2970 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 2971 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 2972 "Can't lower f64 if it is split"); 2973 // Depending on available argument GPRS, f64 may be passed in a pair of 2974 // GPRs, split between a GPR and the stack, or passed completely on the 2975 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 2976 // cases. 2977 Register Reg = State.AllocateReg(ArgGPRs); 2978 LocVT = MVT::i32; 2979 if (!Reg) { 2980 unsigned StackOffset = State.AllocateStack(8, Align(8)); 2981 State.addLoc( 2982 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 2983 return false; 2984 } 2985 if (!State.AllocateReg(ArgGPRs)) 2986 State.AllocateStack(4, Align(4)); 2987 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2988 return false; 2989 } 2990 2991 // Split arguments might be passed indirectly, so keep track of the pending 2992 // values. 2993 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 2994 LocVT = XLenVT; 2995 LocInfo = CCValAssign::Indirect; 2996 PendingLocs.push_back( 2997 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 2998 PendingArgFlags.push_back(ArgFlags); 2999 if (!ArgFlags.isSplitEnd()) { 3000 return false; 3001 } 3002 } 3003 3004 // If the split argument only had two elements, it should be passed directly 3005 // in registers or on the stack. 3006 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 3007 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 3008 // Apply the normal calling convention rules to the first half of the 3009 // split argument. 3010 CCValAssign VA = PendingLocs[0]; 3011 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 3012 PendingLocs.clear(); 3013 PendingArgFlags.clear(); 3014 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 3015 ArgFlags); 3016 } 3017 3018 // Allocate to a register if possible, or else a stack slot. 3019 Register Reg; 3020 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 3021 Reg = State.AllocateReg(ArgFPR16s); 3022 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 3023 Reg = State.AllocateReg(ArgFPR32s); 3024 else if (ValVT == MVT::f64 && !UseGPRForF64) 3025 Reg = State.AllocateReg(ArgFPR64s); 3026 else if (ValVT.isScalableVector()) { 3027 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 3028 if (RC == &RISCV::VRRegClass) { 3029 // Assign the first mask argument to V0. 3030 // This is an interim calling convention and it may be changed in the 3031 // future. 3032 if (FirstMaskArgument.hasValue() && 3033 ValNo == FirstMaskArgument.getValue()) { 3034 Reg = State.AllocateReg(RISCV::V0); 3035 } else { 3036 Reg = State.AllocateReg(ArgVRs); 3037 } 3038 } else if (RC == &RISCV::VRM2RegClass) { 3039 Reg = State.AllocateReg(ArgVRM2s); 3040 } else if (RC == &RISCV::VRM4RegClass) { 3041 Reg = State.AllocateReg(ArgVRM4s); 3042 } else if (RC == &RISCV::VRM8RegClass) { 3043 Reg = State.AllocateReg(ArgVRM8s); 3044 } else { 3045 llvm_unreachable("Unhandled class register for ValueType"); 3046 } 3047 if (!Reg) { 3048 LocInfo = CCValAssign::Indirect; 3049 // Try using a GPR to pass the address 3050 Reg = State.AllocateReg(ArgGPRs); 3051 LocVT = XLenVT; 3052 } 3053 } else 3054 Reg = State.AllocateReg(ArgGPRs); 3055 unsigned StackOffset = 3056 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 3057 3058 // If we reach this point and PendingLocs is non-empty, we must be at the 3059 // end of a split argument that must be passed indirectly. 3060 if (!PendingLocs.empty()) { 3061 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 3062 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 3063 3064 for (auto &It : PendingLocs) { 3065 if (Reg) 3066 It.convertToReg(Reg); 3067 else 3068 It.convertToMem(StackOffset); 3069 State.addLoc(It); 3070 } 3071 PendingLocs.clear(); 3072 PendingArgFlags.clear(); 3073 return false; 3074 } 3075 3076 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 3077 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 3078 "Expected an XLenVT or scalable vector types at this stage"); 3079 3080 if (Reg) { 3081 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3082 return false; 3083 } 3084 3085 // When a floating-point value is passed on the stack, no bit-conversion is 3086 // needed. 3087 if (ValVT.isFloatingPoint()) { 3088 LocVT = ValVT; 3089 LocInfo = CCValAssign::Full; 3090 } 3091 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 3092 return false; 3093 } 3094 3095 template <typename ArgTy> 3096 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 3097 for (const auto &ArgIdx : enumerate(Args)) { 3098 MVT ArgVT = ArgIdx.value().VT; 3099 if (ArgVT.isScalableVector() && 3100 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 3101 return ArgIdx.index(); 3102 } 3103 return None; 3104 } 3105 3106 void RISCVTargetLowering::analyzeInputArgs( 3107 MachineFunction &MF, CCState &CCInfo, 3108 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 3109 unsigned NumArgs = Ins.size(); 3110 FunctionType *FType = MF.getFunction().getFunctionType(); 3111 3112 Optional<unsigned> FirstMaskArgument; 3113 if (Subtarget.hasStdExtV()) 3114 FirstMaskArgument = preAssignMask(Ins); 3115 3116 for (unsigned i = 0; i != NumArgs; ++i) { 3117 MVT ArgVT = Ins[i].VT; 3118 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 3119 3120 Type *ArgTy = nullptr; 3121 if (IsRet) 3122 ArgTy = FType->getReturnType(); 3123 else if (Ins[i].isOrigArg()) 3124 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 3125 3126 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3127 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3128 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 3129 FirstMaskArgument)) { 3130 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 3131 << EVT(ArgVT).getEVTString() << '\n'); 3132 llvm_unreachable(nullptr); 3133 } 3134 } 3135 } 3136 3137 void RISCVTargetLowering::analyzeOutputArgs( 3138 MachineFunction &MF, CCState &CCInfo, 3139 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 3140 CallLoweringInfo *CLI) const { 3141 unsigned NumArgs = Outs.size(); 3142 3143 Optional<unsigned> FirstMaskArgument; 3144 if (Subtarget.hasStdExtV()) 3145 FirstMaskArgument = preAssignMask(Outs); 3146 3147 for (unsigned i = 0; i != NumArgs; i++) { 3148 MVT ArgVT = Outs[i].VT; 3149 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3150 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 3151 3152 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3153 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 3154 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 3155 FirstMaskArgument)) { 3156 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 3157 << EVT(ArgVT).getEVTString() << "\n"); 3158 llvm_unreachable(nullptr); 3159 } 3160 } 3161 } 3162 3163 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 3164 // values. 3165 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 3166 const CCValAssign &VA, const SDLoc &DL) { 3167 switch (VA.getLocInfo()) { 3168 default: 3169 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3170 case CCValAssign::Full: 3171 break; 3172 case CCValAssign::BCvt: 3173 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3174 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 3175 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3176 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 3177 else 3178 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 3179 break; 3180 } 3181 return Val; 3182 } 3183 3184 // The caller is responsible for loading the full value if the argument is 3185 // passed with CCValAssign::Indirect. 3186 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 3187 const CCValAssign &VA, const SDLoc &DL, 3188 const RISCVTargetLowering &TLI) { 3189 MachineFunction &MF = DAG.getMachineFunction(); 3190 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3191 EVT LocVT = VA.getLocVT(); 3192 SDValue Val; 3193 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 3194 Register VReg = RegInfo.createVirtualRegister(RC); 3195 RegInfo.addLiveIn(VA.getLocReg(), VReg); 3196 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 3197 3198 if (VA.getLocInfo() == CCValAssign::Indirect) 3199 return Val; 3200 3201 return convertLocVTToValVT(DAG, Val, VA, DL); 3202 } 3203 3204 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 3205 const CCValAssign &VA, const SDLoc &DL) { 3206 EVT LocVT = VA.getLocVT(); 3207 3208 switch (VA.getLocInfo()) { 3209 default: 3210 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3211 case CCValAssign::Full: 3212 break; 3213 case CCValAssign::BCvt: 3214 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 3215 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 3216 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 3217 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 3218 else 3219 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 3220 break; 3221 } 3222 return Val; 3223 } 3224 3225 // The caller is responsible for loading the full value if the argument is 3226 // passed with CCValAssign::Indirect. 3227 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 3228 const CCValAssign &VA, const SDLoc &DL) { 3229 MachineFunction &MF = DAG.getMachineFunction(); 3230 MachineFrameInfo &MFI = MF.getFrameInfo(); 3231 EVT LocVT = VA.getLocVT(); 3232 EVT ValVT = VA.getValVT(); 3233 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 3234 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 3235 VA.getLocMemOffset(), /*Immutable=*/true); 3236 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3237 SDValue Val; 3238 3239 ISD::LoadExtType ExtType; 3240 switch (VA.getLocInfo()) { 3241 default: 3242 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 3243 case CCValAssign::Full: 3244 case CCValAssign::Indirect: 3245 case CCValAssign::BCvt: 3246 ExtType = ISD::NON_EXTLOAD; 3247 break; 3248 } 3249 Val = DAG.getExtLoad( 3250 ExtType, DL, LocVT, Chain, FIN, 3251 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 3252 return Val; 3253 } 3254 3255 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 3256 const CCValAssign &VA, const SDLoc &DL) { 3257 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 3258 "Unexpected VA"); 3259 MachineFunction &MF = DAG.getMachineFunction(); 3260 MachineFrameInfo &MFI = MF.getFrameInfo(); 3261 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3262 3263 if (VA.isMemLoc()) { 3264 // f64 is passed on the stack. 3265 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 3266 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3267 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 3268 MachinePointerInfo::getFixedStack(MF, FI)); 3269 } 3270 3271 assert(VA.isRegLoc() && "Expected register VA assignment"); 3272 3273 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3274 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 3275 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 3276 SDValue Hi; 3277 if (VA.getLocReg() == RISCV::X17) { 3278 // Second half of f64 is passed on the stack. 3279 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 3280 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 3281 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 3282 MachinePointerInfo::getFixedStack(MF, FI)); 3283 } else { 3284 // Second half of f64 is passed in another GPR. 3285 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 3286 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 3287 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 3288 } 3289 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 3290 } 3291 3292 // FastCC has less than 1% performance improvement for some particular 3293 // benchmark. But theoretically, it may has benenfit for some cases. 3294 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 3295 CCValAssign::LocInfo LocInfo, 3296 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3297 3298 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3299 // X5 and X6 might be used for save-restore libcall. 3300 static const MCPhysReg GPRList[] = { 3301 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 3302 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 3303 RISCV::X29, RISCV::X30, RISCV::X31}; 3304 if (unsigned Reg = State.AllocateReg(GPRList)) { 3305 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3306 return false; 3307 } 3308 } 3309 3310 if (LocVT == MVT::f16) { 3311 static const MCPhysReg FPR16List[] = { 3312 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 3313 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 3314 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 3315 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 3316 if (unsigned Reg = State.AllocateReg(FPR16List)) { 3317 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3318 return false; 3319 } 3320 } 3321 3322 if (LocVT == MVT::f32) { 3323 static const MCPhysReg FPR32List[] = { 3324 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 3325 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 3326 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 3327 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 3328 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3329 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3330 return false; 3331 } 3332 } 3333 3334 if (LocVT == MVT::f64) { 3335 static const MCPhysReg FPR64List[] = { 3336 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 3337 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 3338 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 3339 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 3340 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3341 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3342 return false; 3343 } 3344 } 3345 3346 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 3347 unsigned Offset4 = State.AllocateStack(4, Align(4)); 3348 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 3349 return false; 3350 } 3351 3352 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 3353 unsigned Offset5 = State.AllocateStack(8, Align(8)); 3354 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 3355 return false; 3356 } 3357 3358 return true; // CC didn't match. 3359 } 3360 3361 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 3362 CCValAssign::LocInfo LocInfo, 3363 ISD::ArgFlagsTy ArgFlags, CCState &State) { 3364 3365 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 3366 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 3367 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 3368 static const MCPhysReg GPRList[] = { 3369 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 3370 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 3371 if (unsigned Reg = State.AllocateReg(GPRList)) { 3372 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3373 return false; 3374 } 3375 } 3376 3377 if (LocVT == MVT::f32) { 3378 // Pass in STG registers: F1, ..., F6 3379 // fs0 ... fs5 3380 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 3381 RISCV::F18_F, RISCV::F19_F, 3382 RISCV::F20_F, RISCV::F21_F}; 3383 if (unsigned Reg = State.AllocateReg(FPR32List)) { 3384 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3385 return false; 3386 } 3387 } 3388 3389 if (LocVT == MVT::f64) { 3390 // Pass in STG registers: D1, ..., D6 3391 // fs6 ... fs11 3392 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 3393 RISCV::F24_D, RISCV::F25_D, 3394 RISCV::F26_D, RISCV::F27_D}; 3395 if (unsigned Reg = State.AllocateReg(FPR64List)) { 3396 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 3397 return false; 3398 } 3399 } 3400 3401 report_fatal_error("No registers left in GHC calling convention"); 3402 return true; 3403 } 3404 3405 // Transform physical registers into virtual registers. 3406 SDValue RISCVTargetLowering::LowerFormalArguments( 3407 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 3408 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 3409 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3410 3411 MachineFunction &MF = DAG.getMachineFunction(); 3412 3413 switch (CallConv) { 3414 default: 3415 report_fatal_error("Unsupported calling convention"); 3416 case CallingConv::C: 3417 case CallingConv::Fast: 3418 break; 3419 case CallingConv::GHC: 3420 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 3421 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 3422 report_fatal_error( 3423 "GHC calling convention requires the F and D instruction set extensions"); 3424 } 3425 3426 const Function &Func = MF.getFunction(); 3427 if (Func.hasFnAttribute("interrupt")) { 3428 if (!Func.arg_empty()) 3429 report_fatal_error( 3430 "Functions with the interrupt attribute cannot have arguments!"); 3431 3432 StringRef Kind = 3433 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 3434 3435 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 3436 report_fatal_error( 3437 "Function interrupt attribute argument not supported!"); 3438 } 3439 3440 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3441 MVT XLenVT = Subtarget.getXLenVT(); 3442 unsigned XLenInBytes = Subtarget.getXLen() / 8; 3443 // Used with vargs to acumulate store chains. 3444 std::vector<SDValue> OutChains; 3445 3446 // Assign locations to all of the incoming arguments. 3447 SmallVector<CCValAssign, 16> ArgLocs; 3448 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3449 3450 if (CallConv == CallingConv::Fast) 3451 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 3452 else if (CallConv == CallingConv::GHC) 3453 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 3454 else 3455 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 3456 3457 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3458 CCValAssign &VA = ArgLocs[i]; 3459 SDValue ArgValue; 3460 // Passing f64 on RV32D with a soft float ABI must be handled as a special 3461 // case. 3462 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 3463 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 3464 else if (VA.isRegLoc()) 3465 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 3466 else 3467 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 3468 3469 if (VA.getLocInfo() == CCValAssign::Indirect) { 3470 // If the original argument was split and passed by reference (e.g. i128 3471 // on RV32), we need to load all parts of it here (using the same 3472 // address). 3473 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 3474 MachinePointerInfo())); 3475 unsigned ArgIndex = Ins[i].OrigArgIndex; 3476 assert(Ins[i].PartOffset == 0); 3477 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 3478 CCValAssign &PartVA = ArgLocs[i + 1]; 3479 unsigned PartOffset = Ins[i + 1].PartOffset; 3480 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 3481 DAG.getIntPtrConstant(PartOffset, DL)); 3482 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 3483 MachinePointerInfo())); 3484 ++i; 3485 } 3486 continue; 3487 } 3488 InVals.push_back(ArgValue); 3489 } 3490 3491 if (IsVarArg) { 3492 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 3493 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 3494 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 3495 MachineFrameInfo &MFI = MF.getFrameInfo(); 3496 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3497 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 3498 3499 // Offset of the first variable argument from stack pointer, and size of 3500 // the vararg save area. For now, the varargs save area is either zero or 3501 // large enough to hold a0-a7. 3502 int VaArgOffset, VarArgsSaveSize; 3503 3504 // If all registers are allocated, then all varargs must be passed on the 3505 // stack and we don't need to save any argregs. 3506 if (ArgRegs.size() == Idx) { 3507 VaArgOffset = CCInfo.getNextStackOffset(); 3508 VarArgsSaveSize = 0; 3509 } else { 3510 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 3511 VaArgOffset = -VarArgsSaveSize; 3512 } 3513 3514 // Record the frame index of the first variable argument 3515 // which is a value necessary to VASTART. 3516 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 3517 RVFI->setVarArgsFrameIndex(FI); 3518 3519 // If saving an odd number of registers then create an extra stack slot to 3520 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 3521 // offsets to even-numbered registered remain 2*XLEN-aligned. 3522 if (Idx % 2) { 3523 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 3524 VarArgsSaveSize += XLenInBytes; 3525 } 3526 3527 // Copy the integer registers that may have been used for passing varargs 3528 // to the vararg save area. 3529 for (unsigned I = Idx; I < ArgRegs.size(); 3530 ++I, VaArgOffset += XLenInBytes) { 3531 const Register Reg = RegInfo.createVirtualRegister(RC); 3532 RegInfo.addLiveIn(ArgRegs[I], Reg); 3533 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 3534 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 3535 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 3536 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 3537 MachinePointerInfo::getFixedStack(MF, FI)); 3538 cast<StoreSDNode>(Store.getNode()) 3539 ->getMemOperand() 3540 ->setValue((Value *)nullptr); 3541 OutChains.push_back(Store); 3542 } 3543 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 3544 } 3545 3546 // All stores are grouped in one node to allow the matching between 3547 // the size of Ins and InVals. This only happens for vararg functions. 3548 if (!OutChains.empty()) { 3549 OutChains.push_back(Chain); 3550 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 3551 } 3552 3553 return Chain; 3554 } 3555 3556 /// isEligibleForTailCallOptimization - Check whether the call is eligible 3557 /// for tail call optimization. 3558 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 3559 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 3560 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 3561 const SmallVector<CCValAssign, 16> &ArgLocs) const { 3562 3563 auto &Callee = CLI.Callee; 3564 auto CalleeCC = CLI.CallConv; 3565 auto &Outs = CLI.Outs; 3566 auto &Caller = MF.getFunction(); 3567 auto CallerCC = Caller.getCallingConv(); 3568 3569 // Exception-handling functions need a special set of instructions to 3570 // indicate a return to the hardware. Tail-calling another function would 3571 // probably break this. 3572 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 3573 // should be expanded as new function attributes are introduced. 3574 if (Caller.hasFnAttribute("interrupt")) 3575 return false; 3576 3577 // Do not tail call opt if the stack is used to pass parameters. 3578 if (CCInfo.getNextStackOffset() != 0) 3579 return false; 3580 3581 // Do not tail call opt if any parameters need to be passed indirectly. 3582 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 3583 // passed indirectly. So the address of the value will be passed in a 3584 // register, or if not available, then the address is put on the stack. In 3585 // order to pass indirectly, space on the stack often needs to be allocated 3586 // in order to store the value. In this case the CCInfo.getNextStackOffset() 3587 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 3588 // are passed CCValAssign::Indirect. 3589 for (auto &VA : ArgLocs) 3590 if (VA.getLocInfo() == CCValAssign::Indirect) 3591 return false; 3592 3593 // Do not tail call opt if either caller or callee uses struct return 3594 // semantics. 3595 auto IsCallerStructRet = Caller.hasStructRetAttr(); 3596 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 3597 if (IsCallerStructRet || IsCalleeStructRet) 3598 return false; 3599 3600 // Externally-defined functions with weak linkage should not be 3601 // tail-called. The behaviour of branch instructions in this situation (as 3602 // used for tail calls) is implementation-defined, so we cannot rely on the 3603 // linker replacing the tail call with a return. 3604 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3605 const GlobalValue *GV = G->getGlobal(); 3606 if (GV->hasExternalWeakLinkage()) 3607 return false; 3608 } 3609 3610 // The callee has to preserve all registers the caller needs to preserve. 3611 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3612 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 3613 if (CalleeCC != CallerCC) { 3614 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 3615 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 3616 return false; 3617 } 3618 3619 // Byval parameters hand the function a pointer directly into the stack area 3620 // we want to reuse during a tail call. Working around this *is* possible 3621 // but less efficient and uglier in LowerCall. 3622 for (auto &Arg : Outs) 3623 if (Arg.Flags.isByVal()) 3624 return false; 3625 3626 return true; 3627 } 3628 3629 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 3630 // and output parameter nodes. 3631 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 3632 SmallVectorImpl<SDValue> &InVals) const { 3633 SelectionDAG &DAG = CLI.DAG; 3634 SDLoc &DL = CLI.DL; 3635 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 3636 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 3637 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 3638 SDValue Chain = CLI.Chain; 3639 SDValue Callee = CLI.Callee; 3640 bool &IsTailCall = CLI.IsTailCall; 3641 CallingConv::ID CallConv = CLI.CallConv; 3642 bool IsVarArg = CLI.IsVarArg; 3643 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3644 MVT XLenVT = Subtarget.getXLenVT(); 3645 3646 MachineFunction &MF = DAG.getMachineFunction(); 3647 3648 // Analyze the operands of the call, assigning locations to each operand. 3649 SmallVector<CCValAssign, 16> ArgLocs; 3650 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3651 3652 if (CallConv == CallingConv::Fast) 3653 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 3654 else if (CallConv == CallingConv::GHC) 3655 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 3656 else 3657 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 3658 3659 // Check if it's really possible to do a tail call. 3660 if (IsTailCall) 3661 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 3662 3663 if (IsTailCall) 3664 ++NumTailCalls; 3665 else if (CLI.CB && CLI.CB->isMustTailCall()) 3666 report_fatal_error("failed to perform tail call elimination on a call " 3667 "site marked musttail"); 3668 3669 // Get a count of how many bytes are to be pushed on the stack. 3670 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 3671 3672 // Create local copies for byval args 3673 SmallVector<SDValue, 8> ByValArgs; 3674 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 3675 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3676 if (!Flags.isByVal()) 3677 continue; 3678 3679 SDValue Arg = OutVals[i]; 3680 unsigned Size = Flags.getByValSize(); 3681 Align Alignment = Flags.getNonZeroByValAlign(); 3682 3683 int FI = 3684 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 3685 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 3686 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 3687 3688 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 3689 /*IsVolatile=*/false, 3690 /*AlwaysInline=*/false, IsTailCall, 3691 MachinePointerInfo(), MachinePointerInfo()); 3692 ByValArgs.push_back(FIPtr); 3693 } 3694 3695 if (!IsTailCall) 3696 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 3697 3698 // Copy argument values to their designated locations. 3699 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 3700 SmallVector<SDValue, 8> MemOpChains; 3701 SDValue StackPtr; 3702 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 3703 CCValAssign &VA = ArgLocs[i]; 3704 SDValue ArgValue = OutVals[i]; 3705 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3706 3707 // Handle passing f64 on RV32D with a soft float ABI as a special case. 3708 bool IsF64OnRV32DSoftABI = 3709 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 3710 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 3711 SDValue SplitF64 = DAG.getNode( 3712 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 3713 SDValue Lo = SplitF64.getValue(0); 3714 SDValue Hi = SplitF64.getValue(1); 3715 3716 Register RegLo = VA.getLocReg(); 3717 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 3718 3719 if (RegLo == RISCV::X17) { 3720 // Second half of f64 is passed on the stack. 3721 // Work out the address of the stack slot. 3722 if (!StackPtr.getNode()) 3723 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 3724 // Emit the store. 3725 MemOpChains.push_back( 3726 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 3727 } else { 3728 // Second half of f64 is passed in another GPR. 3729 assert(RegLo < RISCV::X31 && "Invalid register pair"); 3730 Register RegHigh = RegLo + 1; 3731 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 3732 } 3733 continue; 3734 } 3735 3736 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 3737 // as any other MemLoc. 3738 3739 // Promote the value if needed. 3740 // For now, only handle fully promoted and indirect arguments. 3741 if (VA.getLocInfo() == CCValAssign::Indirect) { 3742 // Store the argument in a stack slot and pass its address. 3743 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 3744 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 3745 MemOpChains.push_back( 3746 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 3747 MachinePointerInfo::getFixedStack(MF, FI))); 3748 // If the original argument was split (e.g. i128), we need 3749 // to store all parts of it here (and pass just one address). 3750 unsigned ArgIndex = Outs[i].OrigArgIndex; 3751 assert(Outs[i].PartOffset == 0); 3752 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 3753 SDValue PartValue = OutVals[i + 1]; 3754 unsigned PartOffset = Outs[i + 1].PartOffset; 3755 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 3756 DAG.getIntPtrConstant(PartOffset, DL)); 3757 MemOpChains.push_back( 3758 DAG.getStore(Chain, DL, PartValue, Address, 3759 MachinePointerInfo::getFixedStack(MF, FI))); 3760 ++i; 3761 } 3762 ArgValue = SpillSlot; 3763 } else { 3764 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 3765 } 3766 3767 // Use local copy if it is a byval arg. 3768 if (Flags.isByVal()) 3769 ArgValue = ByValArgs[j++]; 3770 3771 if (VA.isRegLoc()) { 3772 // Queue up the argument copies and emit them at the end. 3773 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 3774 } else { 3775 assert(VA.isMemLoc() && "Argument not register or memory"); 3776 assert(!IsTailCall && "Tail call not allowed if stack is used " 3777 "for passing parameters"); 3778 3779 // Work out the address of the stack slot. 3780 if (!StackPtr.getNode()) 3781 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 3782 SDValue Address = 3783 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 3784 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 3785 3786 // Emit the store. 3787 MemOpChains.push_back( 3788 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 3789 } 3790 } 3791 3792 // Join the stores, which are independent of one another. 3793 if (!MemOpChains.empty()) 3794 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 3795 3796 SDValue Glue; 3797 3798 // Build a sequence of copy-to-reg nodes, chained and glued together. 3799 for (auto &Reg : RegsToPass) { 3800 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 3801 Glue = Chain.getValue(1); 3802 } 3803 3804 // Validate that none of the argument registers have been marked as 3805 // reserved, if so report an error. Do the same for the return address if this 3806 // is not a tailcall. 3807 validateCCReservedRegs(RegsToPass, MF); 3808 if (!IsTailCall && 3809 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 3810 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3811 MF.getFunction(), 3812 "Return address register required, but has been reserved."}); 3813 3814 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 3815 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 3816 // split it and then direct call can be matched by PseudoCALL. 3817 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 3818 const GlobalValue *GV = S->getGlobal(); 3819 3820 unsigned OpFlags = RISCVII::MO_CALL; 3821 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 3822 OpFlags = RISCVII::MO_PLT; 3823 3824 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 3825 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3826 unsigned OpFlags = RISCVII::MO_CALL; 3827 3828 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 3829 nullptr)) 3830 OpFlags = RISCVII::MO_PLT; 3831 3832 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 3833 } 3834 3835 // The first call operand is the chain and the second is the target address. 3836 SmallVector<SDValue, 8> Ops; 3837 Ops.push_back(Chain); 3838 Ops.push_back(Callee); 3839 3840 // Add argument registers to the end of the list so that they are 3841 // known live into the call. 3842 for (auto &Reg : RegsToPass) 3843 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 3844 3845 if (!IsTailCall) { 3846 // Add a register mask operand representing the call-preserved registers. 3847 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3848 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 3849 assert(Mask && "Missing call preserved mask for calling convention"); 3850 Ops.push_back(DAG.getRegisterMask(Mask)); 3851 } 3852 3853 // Glue the call to the argument copies, if any. 3854 if (Glue.getNode()) 3855 Ops.push_back(Glue); 3856 3857 // Emit the call. 3858 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 3859 3860 if (IsTailCall) { 3861 MF.getFrameInfo().setHasTailCall(); 3862 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 3863 } 3864 3865 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 3866 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 3867 Glue = Chain.getValue(1); 3868 3869 // Mark the end of the call, which is glued to the call itself. 3870 Chain = DAG.getCALLSEQ_END(Chain, 3871 DAG.getConstant(NumBytes, DL, PtrVT, true), 3872 DAG.getConstant(0, DL, PtrVT, true), 3873 Glue, DL); 3874 Glue = Chain.getValue(1); 3875 3876 // Assign locations to each value returned by this call. 3877 SmallVector<CCValAssign, 16> RVLocs; 3878 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 3879 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 3880 3881 // Copy all of the result registers out of their specified physreg. 3882 for (auto &VA : RVLocs) { 3883 // Copy the value out 3884 SDValue RetValue = 3885 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 3886 // Glue the RetValue to the end of the call sequence 3887 Chain = RetValue.getValue(1); 3888 Glue = RetValue.getValue(2); 3889 3890 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 3891 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 3892 SDValue RetValue2 = 3893 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 3894 Chain = RetValue2.getValue(1); 3895 Glue = RetValue2.getValue(2); 3896 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 3897 RetValue2); 3898 } 3899 3900 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 3901 3902 InVals.push_back(RetValue); 3903 } 3904 3905 return Chain; 3906 } 3907 3908 bool RISCVTargetLowering::CanLowerReturn( 3909 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 3910 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 3911 SmallVector<CCValAssign, 16> RVLocs; 3912 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 3913 3914 Optional<unsigned> FirstMaskArgument; 3915 if (Subtarget.hasStdExtV()) 3916 FirstMaskArgument = preAssignMask(Outs); 3917 3918 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 3919 MVT VT = Outs[i].VT; 3920 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3921 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3922 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 3923 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 3924 *this, FirstMaskArgument)) 3925 return false; 3926 } 3927 return true; 3928 } 3929 3930 SDValue 3931 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 3932 bool IsVarArg, 3933 const SmallVectorImpl<ISD::OutputArg> &Outs, 3934 const SmallVectorImpl<SDValue> &OutVals, 3935 const SDLoc &DL, SelectionDAG &DAG) const { 3936 const MachineFunction &MF = DAG.getMachineFunction(); 3937 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 3938 3939 // Stores the assignment of the return value to a location. 3940 SmallVector<CCValAssign, 16> RVLocs; 3941 3942 // Info about the registers and stack slot. 3943 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 3944 *DAG.getContext()); 3945 3946 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 3947 nullptr); 3948 3949 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 3950 report_fatal_error("GHC functions return void only"); 3951 3952 SDValue Glue; 3953 SmallVector<SDValue, 4> RetOps(1, Chain); 3954 3955 // Copy the result values into the output registers. 3956 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 3957 SDValue Val = OutVals[i]; 3958 CCValAssign &VA = RVLocs[i]; 3959 assert(VA.isRegLoc() && "Can only return in registers!"); 3960 3961 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 3962 // Handle returning f64 on RV32D with a soft float ABI. 3963 assert(VA.isRegLoc() && "Expected return via registers"); 3964 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 3965 DAG.getVTList(MVT::i32, MVT::i32), Val); 3966 SDValue Lo = SplitF64.getValue(0); 3967 SDValue Hi = SplitF64.getValue(1); 3968 Register RegLo = VA.getLocReg(); 3969 assert(RegLo < RISCV::X31 && "Invalid register pair"); 3970 Register RegHi = RegLo + 1; 3971 3972 if (STI.isRegisterReservedByUser(RegLo) || 3973 STI.isRegisterReservedByUser(RegHi)) 3974 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3975 MF.getFunction(), 3976 "Return value register required, but has been reserved."}); 3977 3978 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 3979 Glue = Chain.getValue(1); 3980 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 3981 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 3982 Glue = Chain.getValue(1); 3983 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 3984 } else { 3985 // Handle a 'normal' return. 3986 Val = convertValVTToLocVT(DAG, Val, VA, DL); 3987 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 3988 3989 if (STI.isRegisterReservedByUser(VA.getLocReg())) 3990 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3991 MF.getFunction(), 3992 "Return value register required, but has been reserved."}); 3993 3994 // Guarantee that all emitted copies are stuck together. 3995 Glue = Chain.getValue(1); 3996 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 3997 } 3998 } 3999 4000 RetOps[0] = Chain; // Update chain. 4001 4002 // Add the glue node if we have it. 4003 if (Glue.getNode()) { 4004 RetOps.push_back(Glue); 4005 } 4006 4007 // Interrupt service routines use different return instructions. 4008 const Function &Func = DAG.getMachineFunction().getFunction(); 4009 if (Func.hasFnAttribute("interrupt")) { 4010 if (!Func.getReturnType()->isVoidTy()) 4011 report_fatal_error( 4012 "Functions with the interrupt attribute must have void return type!"); 4013 4014 MachineFunction &MF = DAG.getMachineFunction(); 4015 StringRef Kind = 4016 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 4017 4018 unsigned RetOpc; 4019 if (Kind == "user") 4020 RetOpc = RISCVISD::URET_FLAG; 4021 else if (Kind == "supervisor") 4022 RetOpc = RISCVISD::SRET_FLAG; 4023 else 4024 RetOpc = RISCVISD::MRET_FLAG; 4025 4026 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 4027 } 4028 4029 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 4030 } 4031 4032 void RISCVTargetLowering::validateCCReservedRegs( 4033 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 4034 MachineFunction &MF) const { 4035 const Function &F = MF.getFunction(); 4036 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 4037 4038 if (llvm::any_of(Regs, [&STI](auto Reg) { 4039 return STI.isRegisterReservedByUser(Reg.first); 4040 })) 4041 F.getContext().diagnose(DiagnosticInfoUnsupported{ 4042 F, "Argument register required, but has been reserved."}); 4043 } 4044 4045 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 4046 return CI->isTailCall(); 4047 } 4048 4049 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 4050 #define NODE_NAME_CASE(NODE) \ 4051 case RISCVISD::NODE: \ 4052 return "RISCVISD::" #NODE; 4053 // clang-format off 4054 switch ((RISCVISD::NodeType)Opcode) { 4055 case RISCVISD::FIRST_NUMBER: 4056 break; 4057 NODE_NAME_CASE(RET_FLAG) 4058 NODE_NAME_CASE(URET_FLAG) 4059 NODE_NAME_CASE(SRET_FLAG) 4060 NODE_NAME_CASE(MRET_FLAG) 4061 NODE_NAME_CASE(CALL) 4062 NODE_NAME_CASE(SELECT_CC) 4063 NODE_NAME_CASE(BuildPairF64) 4064 NODE_NAME_CASE(SplitF64) 4065 NODE_NAME_CASE(TAIL) 4066 NODE_NAME_CASE(SLLW) 4067 NODE_NAME_CASE(SRAW) 4068 NODE_NAME_CASE(SRLW) 4069 NODE_NAME_CASE(DIVW) 4070 NODE_NAME_CASE(DIVUW) 4071 NODE_NAME_CASE(REMUW) 4072 NODE_NAME_CASE(ROLW) 4073 NODE_NAME_CASE(RORW) 4074 NODE_NAME_CASE(FSLW) 4075 NODE_NAME_CASE(FSRW) 4076 NODE_NAME_CASE(FMV_H_X) 4077 NODE_NAME_CASE(FMV_X_ANYEXTH) 4078 NODE_NAME_CASE(FMV_W_X_RV64) 4079 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 4080 NODE_NAME_CASE(READ_CYCLE_WIDE) 4081 NODE_NAME_CASE(GREVI) 4082 NODE_NAME_CASE(GREVIW) 4083 NODE_NAME_CASE(GORCI) 4084 NODE_NAME_CASE(GORCIW) 4085 NODE_NAME_CASE(VMV_X_S) 4086 NODE_NAME_CASE(SPLAT_VECTOR_I64) 4087 NODE_NAME_CASE(READ_VLENB) 4088 NODE_NAME_CASE(TRUNCATE_VECTOR) 4089 NODE_NAME_CASE(VLEFF) 4090 NODE_NAME_CASE(VLEFF_MASK) 4091 NODE_NAME_CASE(VLSEGFF) 4092 NODE_NAME_CASE(VLSEGFF_MASK) 4093 NODE_NAME_CASE(READ_VL) 4094 NODE_NAME_CASE(VSLIDEUP) 4095 NODE_NAME_CASE(VSLIDEDOWN) 4096 NODE_NAME_CASE(VID) 4097 } 4098 // clang-format on 4099 return nullptr; 4100 #undef NODE_NAME_CASE 4101 } 4102 4103 /// getConstraintType - Given a constraint letter, return the type of 4104 /// constraint it is for this target. 4105 RISCVTargetLowering::ConstraintType 4106 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 4107 if (Constraint.size() == 1) { 4108 switch (Constraint[0]) { 4109 default: 4110 break; 4111 case 'f': 4112 return C_RegisterClass; 4113 case 'I': 4114 case 'J': 4115 case 'K': 4116 return C_Immediate; 4117 case 'A': 4118 return C_Memory; 4119 } 4120 } 4121 return TargetLowering::getConstraintType(Constraint); 4122 } 4123 4124 std::pair<unsigned, const TargetRegisterClass *> 4125 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 4126 StringRef Constraint, 4127 MVT VT) const { 4128 // First, see if this is a constraint that directly corresponds to a 4129 // RISCV register class. 4130 if (Constraint.size() == 1) { 4131 switch (Constraint[0]) { 4132 case 'r': 4133 return std::make_pair(0U, &RISCV::GPRRegClass); 4134 case 'f': 4135 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 4136 return std::make_pair(0U, &RISCV::FPR16RegClass); 4137 if (Subtarget.hasStdExtF() && VT == MVT::f32) 4138 return std::make_pair(0U, &RISCV::FPR32RegClass); 4139 if (Subtarget.hasStdExtD() && VT == MVT::f64) 4140 return std::make_pair(0U, &RISCV::FPR64RegClass); 4141 break; 4142 default: 4143 break; 4144 } 4145 } 4146 4147 // Clang will correctly decode the usage of register name aliases into their 4148 // official names. However, other frontends like `rustc` do not. This allows 4149 // users of these frontends to use the ABI names for registers in LLVM-style 4150 // register constraints. 4151 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 4152 .Case("{zero}", RISCV::X0) 4153 .Case("{ra}", RISCV::X1) 4154 .Case("{sp}", RISCV::X2) 4155 .Case("{gp}", RISCV::X3) 4156 .Case("{tp}", RISCV::X4) 4157 .Case("{t0}", RISCV::X5) 4158 .Case("{t1}", RISCV::X6) 4159 .Case("{t2}", RISCV::X7) 4160 .Cases("{s0}", "{fp}", RISCV::X8) 4161 .Case("{s1}", RISCV::X9) 4162 .Case("{a0}", RISCV::X10) 4163 .Case("{a1}", RISCV::X11) 4164 .Case("{a2}", RISCV::X12) 4165 .Case("{a3}", RISCV::X13) 4166 .Case("{a4}", RISCV::X14) 4167 .Case("{a5}", RISCV::X15) 4168 .Case("{a6}", RISCV::X16) 4169 .Case("{a7}", RISCV::X17) 4170 .Case("{s2}", RISCV::X18) 4171 .Case("{s3}", RISCV::X19) 4172 .Case("{s4}", RISCV::X20) 4173 .Case("{s5}", RISCV::X21) 4174 .Case("{s6}", RISCV::X22) 4175 .Case("{s7}", RISCV::X23) 4176 .Case("{s8}", RISCV::X24) 4177 .Case("{s9}", RISCV::X25) 4178 .Case("{s10}", RISCV::X26) 4179 .Case("{s11}", RISCV::X27) 4180 .Case("{t3}", RISCV::X28) 4181 .Case("{t4}", RISCV::X29) 4182 .Case("{t5}", RISCV::X30) 4183 .Case("{t6}", RISCV::X31) 4184 .Default(RISCV::NoRegister); 4185 if (XRegFromAlias != RISCV::NoRegister) 4186 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 4187 4188 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 4189 // TableGen record rather than the AsmName to choose registers for InlineAsm 4190 // constraints, plus we want to match those names to the widest floating point 4191 // register type available, manually select floating point registers here. 4192 // 4193 // The second case is the ABI name of the register, so that frontends can also 4194 // use the ABI names in register constraint lists. 4195 if (Subtarget.hasStdExtF()) { 4196 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 4197 .Cases("{f0}", "{ft0}", RISCV::F0_F) 4198 .Cases("{f1}", "{ft1}", RISCV::F1_F) 4199 .Cases("{f2}", "{ft2}", RISCV::F2_F) 4200 .Cases("{f3}", "{ft3}", RISCV::F3_F) 4201 .Cases("{f4}", "{ft4}", RISCV::F4_F) 4202 .Cases("{f5}", "{ft5}", RISCV::F5_F) 4203 .Cases("{f6}", "{ft6}", RISCV::F6_F) 4204 .Cases("{f7}", "{ft7}", RISCV::F7_F) 4205 .Cases("{f8}", "{fs0}", RISCV::F8_F) 4206 .Cases("{f9}", "{fs1}", RISCV::F9_F) 4207 .Cases("{f10}", "{fa0}", RISCV::F10_F) 4208 .Cases("{f11}", "{fa1}", RISCV::F11_F) 4209 .Cases("{f12}", "{fa2}", RISCV::F12_F) 4210 .Cases("{f13}", "{fa3}", RISCV::F13_F) 4211 .Cases("{f14}", "{fa4}", RISCV::F14_F) 4212 .Cases("{f15}", "{fa5}", RISCV::F15_F) 4213 .Cases("{f16}", "{fa6}", RISCV::F16_F) 4214 .Cases("{f17}", "{fa7}", RISCV::F17_F) 4215 .Cases("{f18}", "{fs2}", RISCV::F18_F) 4216 .Cases("{f19}", "{fs3}", RISCV::F19_F) 4217 .Cases("{f20}", "{fs4}", RISCV::F20_F) 4218 .Cases("{f21}", "{fs5}", RISCV::F21_F) 4219 .Cases("{f22}", "{fs6}", RISCV::F22_F) 4220 .Cases("{f23}", "{fs7}", RISCV::F23_F) 4221 .Cases("{f24}", "{fs8}", RISCV::F24_F) 4222 .Cases("{f25}", "{fs9}", RISCV::F25_F) 4223 .Cases("{f26}", "{fs10}", RISCV::F26_F) 4224 .Cases("{f27}", "{fs11}", RISCV::F27_F) 4225 .Cases("{f28}", "{ft8}", RISCV::F28_F) 4226 .Cases("{f29}", "{ft9}", RISCV::F29_F) 4227 .Cases("{f30}", "{ft10}", RISCV::F30_F) 4228 .Cases("{f31}", "{ft11}", RISCV::F31_F) 4229 .Default(RISCV::NoRegister); 4230 if (FReg != RISCV::NoRegister) { 4231 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 4232 if (Subtarget.hasStdExtD()) { 4233 unsigned RegNo = FReg - RISCV::F0_F; 4234 unsigned DReg = RISCV::F0_D + RegNo; 4235 return std::make_pair(DReg, &RISCV::FPR64RegClass); 4236 } 4237 return std::make_pair(FReg, &RISCV::FPR32RegClass); 4238 } 4239 } 4240 4241 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 4242 } 4243 4244 unsigned 4245 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 4246 // Currently only support length 1 constraints. 4247 if (ConstraintCode.size() == 1) { 4248 switch (ConstraintCode[0]) { 4249 case 'A': 4250 return InlineAsm::Constraint_A; 4251 default: 4252 break; 4253 } 4254 } 4255 4256 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 4257 } 4258 4259 void RISCVTargetLowering::LowerAsmOperandForConstraint( 4260 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 4261 SelectionDAG &DAG) const { 4262 // Currently only support length 1 constraints. 4263 if (Constraint.length() == 1) { 4264 switch (Constraint[0]) { 4265 case 'I': 4266 // Validate & create a 12-bit signed immediate operand. 4267 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4268 uint64_t CVal = C->getSExtValue(); 4269 if (isInt<12>(CVal)) 4270 Ops.push_back( 4271 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4272 } 4273 return; 4274 case 'J': 4275 // Validate & create an integer zero operand. 4276 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 4277 if (C->getZExtValue() == 0) 4278 Ops.push_back( 4279 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 4280 return; 4281 case 'K': 4282 // Validate & create a 5-bit unsigned immediate operand. 4283 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4284 uint64_t CVal = C->getZExtValue(); 4285 if (isUInt<5>(CVal)) 4286 Ops.push_back( 4287 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 4288 } 4289 return; 4290 default: 4291 break; 4292 } 4293 } 4294 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 4295 } 4296 4297 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 4298 Instruction *Inst, 4299 AtomicOrdering Ord) const { 4300 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 4301 return Builder.CreateFence(Ord); 4302 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 4303 return Builder.CreateFence(AtomicOrdering::Release); 4304 return nullptr; 4305 } 4306 4307 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 4308 Instruction *Inst, 4309 AtomicOrdering Ord) const { 4310 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 4311 return Builder.CreateFence(AtomicOrdering::Acquire); 4312 return nullptr; 4313 } 4314 4315 TargetLowering::AtomicExpansionKind 4316 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 4317 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 4318 // point operations can't be used in an lr/sc sequence without breaking the 4319 // forward-progress guarantee. 4320 if (AI->isFloatingPointOperation()) 4321 return AtomicExpansionKind::CmpXChg; 4322 4323 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 4324 if (Size == 8 || Size == 16) 4325 return AtomicExpansionKind::MaskedIntrinsic; 4326 return AtomicExpansionKind::None; 4327 } 4328 4329 static Intrinsic::ID 4330 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 4331 if (XLen == 32) { 4332 switch (BinOp) { 4333 default: 4334 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4335 case AtomicRMWInst::Xchg: 4336 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 4337 case AtomicRMWInst::Add: 4338 return Intrinsic::riscv_masked_atomicrmw_add_i32; 4339 case AtomicRMWInst::Sub: 4340 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 4341 case AtomicRMWInst::Nand: 4342 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 4343 case AtomicRMWInst::Max: 4344 return Intrinsic::riscv_masked_atomicrmw_max_i32; 4345 case AtomicRMWInst::Min: 4346 return Intrinsic::riscv_masked_atomicrmw_min_i32; 4347 case AtomicRMWInst::UMax: 4348 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 4349 case AtomicRMWInst::UMin: 4350 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 4351 } 4352 } 4353 4354 if (XLen == 64) { 4355 switch (BinOp) { 4356 default: 4357 llvm_unreachable("Unexpected AtomicRMW BinOp"); 4358 case AtomicRMWInst::Xchg: 4359 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 4360 case AtomicRMWInst::Add: 4361 return Intrinsic::riscv_masked_atomicrmw_add_i64; 4362 case AtomicRMWInst::Sub: 4363 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 4364 case AtomicRMWInst::Nand: 4365 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 4366 case AtomicRMWInst::Max: 4367 return Intrinsic::riscv_masked_atomicrmw_max_i64; 4368 case AtomicRMWInst::Min: 4369 return Intrinsic::riscv_masked_atomicrmw_min_i64; 4370 case AtomicRMWInst::UMax: 4371 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 4372 case AtomicRMWInst::UMin: 4373 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 4374 } 4375 } 4376 4377 llvm_unreachable("Unexpected XLen\n"); 4378 } 4379 4380 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 4381 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 4382 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 4383 unsigned XLen = Subtarget.getXLen(); 4384 Value *Ordering = 4385 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 4386 Type *Tys[] = {AlignedAddr->getType()}; 4387 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 4388 AI->getModule(), 4389 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 4390 4391 if (XLen == 64) { 4392 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 4393 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4394 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 4395 } 4396 4397 Value *Result; 4398 4399 // Must pass the shift amount needed to sign extend the loaded value prior 4400 // to performing a signed comparison for min/max. ShiftAmt is the number of 4401 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 4402 // is the number of bits to left+right shift the value in order to 4403 // sign-extend. 4404 if (AI->getOperation() == AtomicRMWInst::Min || 4405 AI->getOperation() == AtomicRMWInst::Max) { 4406 const DataLayout &DL = AI->getModule()->getDataLayout(); 4407 unsigned ValWidth = 4408 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 4409 Value *SextShamt = 4410 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 4411 Result = Builder.CreateCall(LrwOpScwLoop, 4412 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 4413 } else { 4414 Result = 4415 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 4416 } 4417 4418 if (XLen == 64) 4419 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4420 return Result; 4421 } 4422 4423 TargetLowering::AtomicExpansionKind 4424 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 4425 AtomicCmpXchgInst *CI) const { 4426 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 4427 if (Size == 8 || Size == 16) 4428 return AtomicExpansionKind::MaskedIntrinsic; 4429 return AtomicExpansionKind::None; 4430 } 4431 4432 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 4433 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 4434 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 4435 unsigned XLen = Subtarget.getXLen(); 4436 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 4437 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 4438 if (XLen == 64) { 4439 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 4440 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 4441 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 4442 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 4443 } 4444 Type *Tys[] = {AlignedAddr->getType()}; 4445 Function *MaskedCmpXchg = 4446 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 4447 Value *Result = Builder.CreateCall( 4448 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 4449 if (XLen == 64) 4450 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 4451 return Result; 4452 } 4453 4454 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 4455 EVT VT) const { 4456 VT = VT.getScalarType(); 4457 4458 if (!VT.isSimple()) 4459 return false; 4460 4461 switch (VT.getSimpleVT().SimpleTy) { 4462 case MVT::f16: 4463 return Subtarget.hasStdExtZfh(); 4464 case MVT::f32: 4465 return Subtarget.hasStdExtF(); 4466 case MVT::f64: 4467 return Subtarget.hasStdExtD(); 4468 default: 4469 break; 4470 } 4471 4472 return false; 4473 } 4474 4475 Register RISCVTargetLowering::getExceptionPointerRegister( 4476 const Constant *PersonalityFn) const { 4477 return RISCV::X10; 4478 } 4479 4480 Register RISCVTargetLowering::getExceptionSelectorRegister( 4481 const Constant *PersonalityFn) const { 4482 return RISCV::X11; 4483 } 4484 4485 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 4486 // Return false to suppress the unnecessary extensions if the LibCall 4487 // arguments or return value is f32 type for LP64 ABI. 4488 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 4489 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 4490 return false; 4491 4492 return true; 4493 } 4494 4495 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 4496 if (Subtarget.is64Bit() && Type == MVT::i32) 4497 return true; 4498 4499 return IsSigned; 4500 } 4501 4502 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 4503 SDValue C) const { 4504 // Check integral scalar types. 4505 if (VT.isScalarInteger()) { 4506 // Omit the optimization if the sub target has the M extension and the data 4507 // size exceeds XLen. 4508 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 4509 return false; 4510 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 4511 // Break the MUL to a SLLI and an ADD/SUB. 4512 const APInt &Imm = ConstNode->getAPIntValue(); 4513 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 4514 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 4515 return true; 4516 // Omit the following optimization if the sub target has the M extension 4517 // and the data size >= XLen. 4518 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 4519 return false; 4520 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 4521 // a pair of LUI/ADDI. 4522 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 4523 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 4524 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 4525 (1 - ImmS).isPowerOf2()) 4526 return true; 4527 } 4528 } 4529 } 4530 4531 return false; 4532 } 4533 4534 #define GET_REGISTER_MATCHER 4535 #include "RISCVGenAsmMatcher.inc" 4536 4537 Register 4538 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 4539 const MachineFunction &MF) const { 4540 Register Reg = MatchRegisterAltName(RegName); 4541 if (Reg == RISCV::NoRegister) 4542 Reg = MatchRegisterName(RegName); 4543 if (Reg == RISCV::NoRegister) 4544 report_fatal_error( 4545 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 4546 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 4547 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 4548 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 4549 StringRef(RegName) + "\".")); 4550 return Reg; 4551 } 4552 4553 namespace llvm { 4554 namespace RISCVVIntrinsicsTable { 4555 4556 #define GET_RISCVVIntrinsicsTable_IMPL 4557 #include "RISCVGenSearchableTables.inc" 4558 4559 } // namespace RISCVVIntrinsicsTable 4560 4561 namespace RISCVZvlssegTable { 4562 4563 #define GET_RISCVZvlssegTable_IMPL 4564 #include "RISCVGenSearchableTables.inc" 4565 4566 } // namespace RISCVZvlssegTable 4567 } // namespace llvm 4568