1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVRegisterInfo.h" 18 #include "RISCVSubtarget.h" 19 #include "RISCVTargetMachine.h" 20 #include "Utils/RISCVMatInt.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/MathExtras.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "riscv-lower" 41 42 STATISTIC(NumTailCalls, "Number of tail calls"); 43 44 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 45 const RISCVSubtarget &STI) 46 : TargetLowering(TM), Subtarget(STI) { 47 48 if (Subtarget.isRV32E()) 49 report_fatal_error("Codegen not yet implemented for RV32E"); 50 51 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 52 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 53 54 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 55 !Subtarget.hasStdExtF()) { 56 errs() << "Hard-float 'f' ABI can't be used for a target that " 57 "doesn't support the F instruction set extension (ignoring " 58 "target-abi)\n"; 59 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 60 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 61 !Subtarget.hasStdExtD()) { 62 errs() << "Hard-float 'd' ABI can't be used for a target that " 63 "doesn't support the D instruction set extension (ignoring " 64 "target-abi)\n"; 65 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 66 } 67 68 switch (ABI) { 69 default: 70 report_fatal_error("Don't know how to lower this ABI"); 71 case RISCVABI::ABI_ILP32: 72 case RISCVABI::ABI_ILP32F: 73 case RISCVABI::ABI_ILP32D: 74 case RISCVABI::ABI_LP64: 75 case RISCVABI::ABI_LP64F: 76 case RISCVABI::ABI_LP64D: 77 break; 78 } 79 80 MVT XLenVT = Subtarget.getXLenVT(); 81 82 // Set up the register classes. 83 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 84 85 if (Subtarget.hasStdExtZfh()) 86 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 87 if (Subtarget.hasStdExtF()) 88 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 89 if (Subtarget.hasStdExtD()) 90 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 91 92 if (Subtarget.hasStdExtV()) { 93 addRegisterClass(RISCVVMVTs::vbool64_t, &RISCV::VRRegClass); 94 addRegisterClass(RISCVVMVTs::vbool32_t, &RISCV::VRRegClass); 95 addRegisterClass(RISCVVMVTs::vbool16_t, &RISCV::VRRegClass); 96 addRegisterClass(RISCVVMVTs::vbool8_t, &RISCV::VRRegClass); 97 addRegisterClass(RISCVVMVTs::vbool4_t, &RISCV::VRRegClass); 98 addRegisterClass(RISCVVMVTs::vbool2_t, &RISCV::VRRegClass); 99 addRegisterClass(RISCVVMVTs::vbool1_t, &RISCV::VRRegClass); 100 101 addRegisterClass(RISCVVMVTs::vint8mf8_t, &RISCV::VRRegClass); 102 addRegisterClass(RISCVVMVTs::vint8mf4_t, &RISCV::VRRegClass); 103 addRegisterClass(RISCVVMVTs::vint8mf2_t, &RISCV::VRRegClass); 104 addRegisterClass(RISCVVMVTs::vint8m1_t, &RISCV::VRRegClass); 105 addRegisterClass(RISCVVMVTs::vint8m2_t, &RISCV::VRM2RegClass); 106 addRegisterClass(RISCVVMVTs::vint8m4_t, &RISCV::VRM4RegClass); 107 addRegisterClass(RISCVVMVTs::vint8m8_t, &RISCV::VRM8RegClass); 108 109 addRegisterClass(RISCVVMVTs::vint16mf4_t, &RISCV::VRRegClass); 110 addRegisterClass(RISCVVMVTs::vint16mf2_t, &RISCV::VRRegClass); 111 addRegisterClass(RISCVVMVTs::vint16m1_t, &RISCV::VRRegClass); 112 addRegisterClass(RISCVVMVTs::vint16m2_t, &RISCV::VRM2RegClass); 113 addRegisterClass(RISCVVMVTs::vint16m4_t, &RISCV::VRM4RegClass); 114 addRegisterClass(RISCVVMVTs::vint16m8_t, &RISCV::VRM8RegClass); 115 116 addRegisterClass(RISCVVMVTs::vint32mf2_t, &RISCV::VRRegClass); 117 addRegisterClass(RISCVVMVTs::vint32m1_t, &RISCV::VRRegClass); 118 addRegisterClass(RISCVVMVTs::vint32m2_t, &RISCV::VRM2RegClass); 119 addRegisterClass(RISCVVMVTs::vint32m4_t, &RISCV::VRM4RegClass); 120 addRegisterClass(RISCVVMVTs::vint32m8_t, &RISCV::VRM8RegClass); 121 122 addRegisterClass(RISCVVMVTs::vint64m1_t, &RISCV::VRRegClass); 123 addRegisterClass(RISCVVMVTs::vint64m2_t, &RISCV::VRM2RegClass); 124 addRegisterClass(RISCVVMVTs::vint64m4_t, &RISCV::VRM4RegClass); 125 addRegisterClass(RISCVVMVTs::vint64m8_t, &RISCV::VRM8RegClass); 126 127 if (Subtarget.hasStdExtZfh()) { 128 addRegisterClass(RISCVVMVTs::vfloat16mf4_t, &RISCV::VRRegClass); 129 addRegisterClass(RISCVVMVTs::vfloat16mf2_t, &RISCV::VRRegClass); 130 addRegisterClass(RISCVVMVTs::vfloat16m1_t, &RISCV::VRRegClass); 131 addRegisterClass(RISCVVMVTs::vfloat16m2_t, &RISCV::VRM2RegClass); 132 addRegisterClass(RISCVVMVTs::vfloat16m4_t, &RISCV::VRM4RegClass); 133 addRegisterClass(RISCVVMVTs::vfloat16m8_t, &RISCV::VRM8RegClass); 134 } 135 136 if (Subtarget.hasStdExtF()) { 137 addRegisterClass(RISCVVMVTs::vfloat32mf2_t, &RISCV::VRRegClass); 138 addRegisterClass(RISCVVMVTs::vfloat32m1_t, &RISCV::VRRegClass); 139 addRegisterClass(RISCVVMVTs::vfloat32m2_t, &RISCV::VRM2RegClass); 140 addRegisterClass(RISCVVMVTs::vfloat32m4_t, &RISCV::VRM4RegClass); 141 addRegisterClass(RISCVVMVTs::vfloat32m8_t, &RISCV::VRM8RegClass); 142 } 143 144 if (Subtarget.hasStdExtD()) { 145 addRegisterClass(RISCVVMVTs::vfloat64m1_t, &RISCV::VRRegClass); 146 addRegisterClass(RISCVVMVTs::vfloat64m2_t, &RISCV::VRM2RegClass); 147 addRegisterClass(RISCVVMVTs::vfloat64m4_t, &RISCV::VRM4RegClass); 148 addRegisterClass(RISCVVMVTs::vfloat64m8_t, &RISCV::VRM8RegClass); 149 } 150 } 151 152 // Compute derived properties from the register classes. 153 computeRegisterProperties(STI.getRegisterInfo()); 154 155 setStackPointerRegisterToSaveRestore(RISCV::X2); 156 157 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 158 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 159 160 // TODO: add all necessary setOperationAction calls. 161 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 162 163 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 164 setOperationAction(ISD::BR_CC, XLenVT, Expand); 165 setOperationAction(ISD::SELECT, XLenVT, Custom); 166 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 167 168 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 169 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 170 171 setOperationAction(ISD::VASTART, MVT::Other, Custom); 172 setOperationAction(ISD::VAARG, MVT::Other, Expand); 173 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 174 setOperationAction(ISD::VAEND, MVT::Other, Expand); 175 176 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 177 if (!Subtarget.hasStdExtZbb()) { 178 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 179 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 180 } 181 182 if (Subtarget.is64Bit()) { 183 setOperationAction(ISD::ADD, MVT::i32, Custom); 184 setOperationAction(ISD::SUB, MVT::i32, Custom); 185 setOperationAction(ISD::SHL, MVT::i32, Custom); 186 setOperationAction(ISD::SRA, MVT::i32, Custom); 187 setOperationAction(ISD::SRL, MVT::i32, Custom); 188 } 189 190 if (!Subtarget.hasStdExtM()) { 191 setOperationAction(ISD::MUL, XLenVT, Expand); 192 setOperationAction(ISD::MULHS, XLenVT, Expand); 193 setOperationAction(ISD::MULHU, XLenVT, Expand); 194 setOperationAction(ISD::SDIV, XLenVT, Expand); 195 setOperationAction(ISD::UDIV, XLenVT, Expand); 196 setOperationAction(ISD::SREM, XLenVT, Expand); 197 setOperationAction(ISD::UREM, XLenVT, Expand); 198 } 199 200 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 201 setOperationAction(ISD::MUL, MVT::i32, Custom); 202 setOperationAction(ISD::SDIV, MVT::i32, Custom); 203 setOperationAction(ISD::UDIV, MVT::i32, Custom); 204 setOperationAction(ISD::UREM, MVT::i32, Custom); 205 } 206 207 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 208 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 209 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 210 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 211 212 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 213 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 214 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 215 216 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 217 if (Subtarget.is64Bit()) { 218 setOperationAction(ISD::ROTL, MVT::i32, Custom); 219 setOperationAction(ISD::ROTR, MVT::i32, Custom); 220 } 221 } else { 222 setOperationAction(ISD::ROTL, XLenVT, Expand); 223 setOperationAction(ISD::ROTR, XLenVT, Expand); 224 } 225 226 if (Subtarget.hasStdExtZbp()) { 227 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 228 setOperationAction(ISD::BSWAP, XLenVT, Custom); 229 230 if (Subtarget.is64Bit()) { 231 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 232 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 233 } 234 } else { 235 setOperationAction(ISD::BSWAP, XLenVT, Expand); 236 } 237 238 if (Subtarget.hasStdExtZbb()) { 239 setOperationAction(ISD::SMIN, XLenVT, Legal); 240 setOperationAction(ISD::SMAX, XLenVT, Legal); 241 setOperationAction(ISD::UMIN, XLenVT, Legal); 242 setOperationAction(ISD::UMAX, XLenVT, Legal); 243 } else { 244 setOperationAction(ISD::CTTZ, XLenVT, Expand); 245 setOperationAction(ISD::CTLZ, XLenVT, Expand); 246 setOperationAction(ISD::CTPOP, XLenVT, Expand); 247 } 248 249 if (Subtarget.hasStdExtZbt()) { 250 setOperationAction(ISD::FSHL, XLenVT, Legal); 251 setOperationAction(ISD::FSHR, XLenVT, Legal); 252 253 if (Subtarget.is64Bit()) { 254 setOperationAction(ISD::FSHL, MVT::i32, Custom); 255 setOperationAction(ISD::FSHR, MVT::i32, Custom); 256 } 257 } 258 259 ISD::CondCode FPCCToExpand[] = { 260 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 261 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 262 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 263 264 ISD::NodeType FPOpToExpand[] = { 265 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 266 ISD::FP_TO_FP16}; 267 268 if (Subtarget.hasStdExtZfh()) 269 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 270 271 if (Subtarget.hasStdExtZfh()) { 272 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 273 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 274 for (auto CC : FPCCToExpand) 275 setCondCodeAction(CC, MVT::f16, Expand); 276 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 277 setOperationAction(ISD::SELECT, MVT::f16, Custom); 278 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 279 for (auto Op : FPOpToExpand) 280 setOperationAction(Op, MVT::f16, Expand); 281 } 282 283 if (Subtarget.hasStdExtF()) { 284 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 285 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 286 for (auto CC : FPCCToExpand) 287 setCondCodeAction(CC, MVT::f32, Expand); 288 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 289 setOperationAction(ISD::SELECT, MVT::f32, Custom); 290 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 291 for (auto Op : FPOpToExpand) 292 setOperationAction(Op, MVT::f32, Expand); 293 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 294 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 295 } 296 297 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 298 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 299 300 if (Subtarget.hasStdExtD()) { 301 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 302 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 303 for (auto CC : FPCCToExpand) 304 setCondCodeAction(CC, MVT::f64, Expand); 305 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 306 setOperationAction(ISD::SELECT, MVT::f64, Custom); 307 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 308 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 309 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 310 for (auto Op : FPOpToExpand) 311 setOperationAction(Op, MVT::f64, Expand); 312 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 313 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 314 } 315 316 if (Subtarget.is64Bit()) { 317 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 318 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 319 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 320 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 321 } 322 323 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 324 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 325 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 326 setOperationAction(ISD::JumpTable, XLenVT, Custom); 327 328 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 329 330 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 331 // Unfortunately this can't be determined just from the ISA naming string. 332 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 333 Subtarget.is64Bit() ? Legal : Custom); 334 335 setOperationAction(ISD::TRAP, MVT::Other, Legal); 336 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 337 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 338 339 if (Subtarget.hasStdExtA()) { 340 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 341 setMinCmpXchgSizeInBits(32); 342 } else { 343 setMaxAtomicSizeInBitsSupported(0); 344 } 345 346 setBooleanContents(ZeroOrOneBooleanContent); 347 348 if (Subtarget.hasStdExtV()) { 349 setBooleanVectorContents(ZeroOrOneBooleanContent); 350 351 // RVV intrinsics may have illegal operands. 352 // We also need to custom legalize vmv.x.s. 353 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 354 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 355 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 356 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 357 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 358 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 359 360 if (Subtarget.is64Bit()) { 361 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 362 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 363 } 364 365 for (auto VT : MVT::integer_scalable_vector_valuetypes()) 366 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 367 368 // We must custom-lower SPLAT_VECTOR vXi64 on RV32 369 if (!Subtarget.is64Bit()) 370 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); 371 } 372 373 // Function alignments. 374 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 375 setMinFunctionAlignment(FunctionAlignment); 376 setPrefFunctionAlignment(FunctionAlignment); 377 378 setMinimumJumpTableEntries(5); 379 380 // Jumps are expensive, compared to logic 381 setJumpIsExpensive(); 382 383 // We can use any register for comparisons 384 setHasMultipleConditionRegisters(); 385 386 if (Subtarget.hasStdExtZbp()) { 387 setTargetDAGCombine(ISD::OR); 388 } 389 } 390 391 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 392 EVT VT) const { 393 if (!VT.isVector()) 394 return getPointerTy(DL); 395 return VT.changeVectorElementTypeToInteger(); 396 } 397 398 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 399 const CallInst &I, 400 MachineFunction &MF, 401 unsigned Intrinsic) const { 402 switch (Intrinsic) { 403 default: 404 return false; 405 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 406 case Intrinsic::riscv_masked_atomicrmw_add_i32: 407 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 408 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 409 case Intrinsic::riscv_masked_atomicrmw_max_i32: 410 case Intrinsic::riscv_masked_atomicrmw_min_i32: 411 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 412 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 413 case Intrinsic::riscv_masked_cmpxchg_i32: 414 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 415 Info.opc = ISD::INTRINSIC_W_CHAIN; 416 Info.memVT = MVT::getVT(PtrTy->getElementType()); 417 Info.ptrVal = I.getArgOperand(0); 418 Info.offset = 0; 419 Info.align = Align(4); 420 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 421 MachineMemOperand::MOVolatile; 422 return true; 423 } 424 } 425 426 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 427 const AddrMode &AM, Type *Ty, 428 unsigned AS, 429 Instruction *I) const { 430 // No global is ever allowed as a base. 431 if (AM.BaseGV) 432 return false; 433 434 // Require a 12-bit signed offset. 435 if (!isInt<12>(AM.BaseOffs)) 436 return false; 437 438 switch (AM.Scale) { 439 case 0: // "r+i" or just "i", depending on HasBaseReg. 440 break; 441 case 1: 442 if (!AM.HasBaseReg) // allow "r+i". 443 break; 444 return false; // disallow "r+r" or "r+r+i". 445 default: 446 return false; 447 } 448 449 return true; 450 } 451 452 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 453 return isInt<12>(Imm); 454 } 455 456 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 457 return isInt<12>(Imm); 458 } 459 460 // On RV32, 64-bit integers are split into their high and low parts and held 461 // in two different registers, so the trunc is free since the low register can 462 // just be used. 463 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 464 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 465 return false; 466 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 467 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 468 return (SrcBits == 64 && DestBits == 32); 469 } 470 471 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 472 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 473 !SrcVT.isInteger() || !DstVT.isInteger()) 474 return false; 475 unsigned SrcBits = SrcVT.getSizeInBits(); 476 unsigned DestBits = DstVT.getSizeInBits(); 477 return (SrcBits == 64 && DestBits == 32); 478 } 479 480 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 481 // Zexts are free if they can be combined with a load. 482 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 483 EVT MemVT = LD->getMemoryVT(); 484 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 485 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 486 (LD->getExtensionType() == ISD::NON_EXTLOAD || 487 LD->getExtensionType() == ISD::ZEXTLOAD)) 488 return true; 489 } 490 491 return TargetLowering::isZExtFree(Val, VT2); 492 } 493 494 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 495 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 496 } 497 498 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 499 return Subtarget.hasStdExtZbb(); 500 } 501 502 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 503 return Subtarget.hasStdExtZbb(); 504 } 505 506 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 507 bool ForCodeSize) const { 508 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) 509 return false; 510 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 511 return false; 512 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 513 return false; 514 if (Imm.isNegZero()) 515 return false; 516 return Imm.isZero(); 517 } 518 519 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 520 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 521 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 522 (VT == MVT::f64 && Subtarget.hasStdExtD()); 523 } 524 525 // Changes the condition code and swaps operands if necessary, so the SetCC 526 // operation matches one of the comparisons supported directly in the RISC-V 527 // ISA. 528 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 529 switch (CC) { 530 default: 531 break; 532 case ISD::SETGT: 533 case ISD::SETLE: 534 case ISD::SETUGT: 535 case ISD::SETULE: 536 CC = ISD::getSetCCSwappedOperands(CC); 537 std::swap(LHS, RHS); 538 break; 539 } 540 } 541 542 // Return the RISC-V branch opcode that matches the given DAG integer 543 // condition code. The CondCode must be one of those supported by the RISC-V 544 // ISA (see normaliseSetCC). 545 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 546 switch (CC) { 547 default: 548 llvm_unreachable("Unsupported CondCode"); 549 case ISD::SETEQ: 550 return RISCV::BEQ; 551 case ISD::SETNE: 552 return RISCV::BNE; 553 case ISD::SETLT: 554 return RISCV::BLT; 555 case ISD::SETGE: 556 return RISCV::BGE; 557 case ISD::SETULT: 558 return RISCV::BLTU; 559 case ISD::SETUGE: 560 return RISCV::BGEU; 561 } 562 } 563 564 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 565 SelectionDAG &DAG) const { 566 switch (Op.getOpcode()) { 567 default: 568 report_fatal_error("unimplemented operand"); 569 case ISD::GlobalAddress: 570 return lowerGlobalAddress(Op, DAG); 571 case ISD::BlockAddress: 572 return lowerBlockAddress(Op, DAG); 573 case ISD::ConstantPool: 574 return lowerConstantPool(Op, DAG); 575 case ISD::JumpTable: 576 return lowerJumpTable(Op, DAG); 577 case ISD::GlobalTLSAddress: 578 return lowerGlobalTLSAddress(Op, DAG); 579 case ISD::SELECT: 580 return lowerSELECT(Op, DAG); 581 case ISD::VASTART: 582 return lowerVASTART(Op, DAG); 583 case ISD::FRAMEADDR: 584 return lowerFRAMEADDR(Op, DAG); 585 case ISD::RETURNADDR: 586 return lowerRETURNADDR(Op, DAG); 587 case ISD::SHL_PARTS: 588 return lowerShiftLeftParts(Op, DAG); 589 case ISD::SRA_PARTS: 590 return lowerShiftRightParts(Op, DAG, true); 591 case ISD::SRL_PARTS: 592 return lowerShiftRightParts(Op, DAG, false); 593 case ISD::BITCAST: { 594 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || 595 Subtarget.hasStdExtZfh()) && 596 "Unexpected custom legalisation"); 597 SDLoc DL(Op); 598 SDValue Op0 = Op.getOperand(0); 599 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { 600 if (Op0.getValueType() != MVT::i16) 601 return SDValue(); 602 SDValue NewOp0 = 603 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); 604 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 605 return FPConv; 606 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && 607 Subtarget.hasStdExtF()) { 608 if (Op0.getValueType() != MVT::i32) 609 return SDValue(); 610 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 611 SDValue FPConv = 612 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 613 return FPConv; 614 } 615 return SDValue(); 616 } 617 case ISD::INTRINSIC_WO_CHAIN: 618 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 619 case ISD::INTRINSIC_W_CHAIN: 620 return LowerINTRINSIC_W_CHAIN(Op, DAG); 621 case ISD::BSWAP: 622 case ISD::BITREVERSE: { 623 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 624 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 625 MVT VT = Op.getSimpleValueType(); 626 SDLoc DL(Op); 627 // Start with the maximum immediate value which is the bitwidth - 1. 628 unsigned Imm = VT.getSizeInBits() - 1; 629 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 630 if (Op.getOpcode() == ISD::BSWAP) 631 Imm &= ~0x7U; 632 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), 633 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); 634 } 635 case ISD::SPLAT_VECTOR: 636 return lowerSPLATVECTOR(Op, DAG); 637 } 638 } 639 640 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 641 SelectionDAG &DAG, unsigned Flags) { 642 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 643 } 644 645 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 646 SelectionDAG &DAG, unsigned Flags) { 647 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 648 Flags); 649 } 650 651 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 652 SelectionDAG &DAG, unsigned Flags) { 653 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 654 N->getOffset(), Flags); 655 } 656 657 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 658 SelectionDAG &DAG, unsigned Flags) { 659 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 660 } 661 662 template <class NodeTy> 663 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 664 bool IsLocal) const { 665 SDLoc DL(N); 666 EVT Ty = getPointerTy(DAG.getDataLayout()); 667 668 if (isPositionIndependent()) { 669 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 670 if (IsLocal) 671 // Use PC-relative addressing to access the symbol. This generates the 672 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 673 // %pcrel_lo(auipc)). 674 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 675 676 // Use PC-relative addressing to access the GOT for this symbol, then load 677 // the address from the GOT. This generates the pattern (PseudoLA sym), 678 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 679 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 680 } 681 682 switch (getTargetMachine().getCodeModel()) { 683 default: 684 report_fatal_error("Unsupported code model for lowering"); 685 case CodeModel::Small: { 686 // Generate a sequence for accessing addresses within the first 2 GiB of 687 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 688 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 689 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 690 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 691 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 692 } 693 case CodeModel::Medium: { 694 // Generate a sequence for accessing addresses within any 2GiB range within 695 // the address space. This generates the pattern (PseudoLLA sym), which 696 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 697 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 698 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 699 } 700 } 701 } 702 703 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 704 SelectionDAG &DAG) const { 705 SDLoc DL(Op); 706 EVT Ty = Op.getValueType(); 707 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 708 int64_t Offset = N->getOffset(); 709 MVT XLenVT = Subtarget.getXLenVT(); 710 711 const GlobalValue *GV = N->getGlobal(); 712 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 713 SDValue Addr = getAddr(N, DAG, IsLocal); 714 715 // In order to maximise the opportunity for common subexpression elimination, 716 // emit a separate ADD node for the global address offset instead of folding 717 // it in the global address node. Later peephole optimisations may choose to 718 // fold it back in when profitable. 719 if (Offset != 0) 720 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 721 DAG.getConstant(Offset, DL, XLenVT)); 722 return Addr; 723 } 724 725 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 726 SelectionDAG &DAG) const { 727 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 728 729 return getAddr(N, DAG); 730 } 731 732 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 733 SelectionDAG &DAG) const { 734 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 735 736 return getAddr(N, DAG); 737 } 738 739 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 740 SelectionDAG &DAG) const { 741 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 742 743 return getAddr(N, DAG); 744 } 745 746 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 747 SelectionDAG &DAG, 748 bool UseGOT) const { 749 SDLoc DL(N); 750 EVT Ty = getPointerTy(DAG.getDataLayout()); 751 const GlobalValue *GV = N->getGlobal(); 752 MVT XLenVT = Subtarget.getXLenVT(); 753 754 if (UseGOT) { 755 // Use PC-relative addressing to access the GOT for this TLS symbol, then 756 // load the address from the GOT and add the thread pointer. This generates 757 // the pattern (PseudoLA_TLS_IE sym), which expands to 758 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 759 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 760 SDValue Load = 761 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 762 763 // Add the thread pointer. 764 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 765 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 766 } 767 768 // Generate a sequence for accessing the address relative to the thread 769 // pointer, with the appropriate adjustment for the thread pointer offset. 770 // This generates the pattern 771 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 772 SDValue AddrHi = 773 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 774 SDValue AddrAdd = 775 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 776 SDValue AddrLo = 777 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 778 779 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 780 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 781 SDValue MNAdd = SDValue( 782 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 783 0); 784 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 785 } 786 787 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 788 SelectionDAG &DAG) const { 789 SDLoc DL(N); 790 EVT Ty = getPointerTy(DAG.getDataLayout()); 791 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 792 const GlobalValue *GV = N->getGlobal(); 793 794 // Use a PC-relative addressing mode to access the global dynamic GOT address. 795 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 796 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 797 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 798 SDValue Load = 799 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 800 801 // Prepare argument list to generate call. 802 ArgListTy Args; 803 ArgListEntry Entry; 804 Entry.Node = Load; 805 Entry.Ty = CallTy; 806 Args.push_back(Entry); 807 808 // Setup call to __tls_get_addr. 809 TargetLowering::CallLoweringInfo CLI(DAG); 810 CLI.setDebugLoc(DL) 811 .setChain(DAG.getEntryNode()) 812 .setLibCallee(CallingConv::C, CallTy, 813 DAG.getExternalSymbol("__tls_get_addr", Ty), 814 std::move(Args)); 815 816 return LowerCallTo(CLI).first; 817 } 818 819 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 820 SelectionDAG &DAG) const { 821 SDLoc DL(Op); 822 EVT Ty = Op.getValueType(); 823 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 824 int64_t Offset = N->getOffset(); 825 MVT XLenVT = Subtarget.getXLenVT(); 826 827 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 828 829 if (DAG.getMachineFunction().getFunction().getCallingConv() == 830 CallingConv::GHC) 831 report_fatal_error("In GHC calling convention TLS is not supported"); 832 833 SDValue Addr; 834 switch (Model) { 835 case TLSModel::LocalExec: 836 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 837 break; 838 case TLSModel::InitialExec: 839 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 840 break; 841 case TLSModel::LocalDynamic: 842 case TLSModel::GeneralDynamic: 843 Addr = getDynamicTLSAddr(N, DAG); 844 break; 845 } 846 847 // In order to maximise the opportunity for common subexpression elimination, 848 // emit a separate ADD node for the global address offset instead of folding 849 // it in the global address node. Later peephole optimisations may choose to 850 // fold it back in when profitable. 851 if (Offset != 0) 852 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 853 DAG.getConstant(Offset, DL, XLenVT)); 854 return Addr; 855 } 856 857 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 858 SDValue CondV = Op.getOperand(0); 859 SDValue TrueV = Op.getOperand(1); 860 SDValue FalseV = Op.getOperand(2); 861 SDLoc DL(Op); 862 MVT XLenVT = Subtarget.getXLenVT(); 863 864 // If the result type is XLenVT and CondV is the output of a SETCC node 865 // which also operated on XLenVT inputs, then merge the SETCC node into the 866 // lowered RISCVISD::SELECT_CC to take advantage of the integer 867 // compare+branch instructions. i.e.: 868 // (select (setcc lhs, rhs, cc), truev, falsev) 869 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 870 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 871 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 872 SDValue LHS = CondV.getOperand(0); 873 SDValue RHS = CondV.getOperand(1); 874 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 875 ISD::CondCode CCVal = CC->get(); 876 877 normaliseSetCC(LHS, RHS, CCVal); 878 879 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 880 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 881 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 882 } 883 884 // Otherwise: 885 // (select condv, truev, falsev) 886 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 887 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 888 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 889 890 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 891 892 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 893 } 894 895 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 896 MachineFunction &MF = DAG.getMachineFunction(); 897 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 898 899 SDLoc DL(Op); 900 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 901 getPointerTy(MF.getDataLayout())); 902 903 // vastart just stores the address of the VarArgsFrameIndex slot into the 904 // memory location argument. 905 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 906 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 907 MachinePointerInfo(SV)); 908 } 909 910 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 911 SelectionDAG &DAG) const { 912 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 913 MachineFunction &MF = DAG.getMachineFunction(); 914 MachineFrameInfo &MFI = MF.getFrameInfo(); 915 MFI.setFrameAddressIsTaken(true); 916 Register FrameReg = RI.getFrameRegister(MF); 917 int XLenInBytes = Subtarget.getXLen() / 8; 918 919 EVT VT = Op.getValueType(); 920 SDLoc DL(Op); 921 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 922 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 923 while (Depth--) { 924 int Offset = -(XLenInBytes * 2); 925 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 926 DAG.getIntPtrConstant(Offset, DL)); 927 FrameAddr = 928 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 929 } 930 return FrameAddr; 931 } 932 933 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 934 SelectionDAG &DAG) const { 935 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 936 MachineFunction &MF = DAG.getMachineFunction(); 937 MachineFrameInfo &MFI = MF.getFrameInfo(); 938 MFI.setReturnAddressIsTaken(true); 939 MVT XLenVT = Subtarget.getXLenVT(); 940 int XLenInBytes = Subtarget.getXLen() / 8; 941 942 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 943 return SDValue(); 944 945 EVT VT = Op.getValueType(); 946 SDLoc DL(Op); 947 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 948 if (Depth) { 949 int Off = -XLenInBytes; 950 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 951 SDValue Offset = DAG.getConstant(Off, DL, VT); 952 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 953 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 954 MachinePointerInfo()); 955 } 956 957 // Return the value of the return address register, marking it an implicit 958 // live-in. 959 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 960 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 961 } 962 963 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 964 SelectionDAG &DAG) const { 965 SDLoc DL(Op); 966 SDValue Lo = Op.getOperand(0); 967 SDValue Hi = Op.getOperand(1); 968 SDValue Shamt = Op.getOperand(2); 969 EVT VT = Lo.getValueType(); 970 971 // if Shamt-XLEN < 0: // Shamt < XLEN 972 // Lo = Lo << Shamt 973 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 974 // else: 975 // Lo = 0 976 // Hi = Lo << (Shamt-XLEN) 977 978 SDValue Zero = DAG.getConstant(0, DL, VT); 979 SDValue One = DAG.getConstant(1, DL, VT); 980 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 981 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 982 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 983 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 984 985 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 986 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 987 SDValue ShiftRightLo = 988 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 989 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 990 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 991 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 992 993 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 994 995 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 996 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 997 998 SDValue Parts[2] = {Lo, Hi}; 999 return DAG.getMergeValues(Parts, DL); 1000 } 1001 1002 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 1003 bool IsSRA) const { 1004 SDLoc DL(Op); 1005 SDValue Lo = Op.getOperand(0); 1006 SDValue Hi = Op.getOperand(1); 1007 SDValue Shamt = Op.getOperand(2); 1008 EVT VT = Lo.getValueType(); 1009 1010 // SRA expansion: 1011 // if Shamt-XLEN < 0: // Shamt < XLEN 1012 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1013 // Hi = Hi >>s Shamt 1014 // else: 1015 // Lo = Hi >>s (Shamt-XLEN); 1016 // Hi = Hi >>s (XLEN-1) 1017 // 1018 // SRL expansion: 1019 // if Shamt-XLEN < 0: // Shamt < XLEN 1020 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 1021 // Hi = Hi >>u Shamt 1022 // else: 1023 // Lo = Hi >>u (Shamt-XLEN); 1024 // Hi = 0; 1025 1026 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 1027 1028 SDValue Zero = DAG.getConstant(0, DL, VT); 1029 SDValue One = DAG.getConstant(1, DL, VT); 1030 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 1031 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 1032 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 1033 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 1034 1035 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 1036 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 1037 SDValue ShiftLeftHi = 1038 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 1039 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 1040 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 1041 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 1042 SDValue HiFalse = 1043 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 1044 1045 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 1046 1047 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 1048 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 1049 1050 SDValue Parts[2] = {Lo, Hi}; 1051 return DAG.getMergeValues(Parts, DL); 1052 } 1053 1054 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is 1055 // illegal (currently only vXi64 RV32). 1056 // FIXME: We could also catch non-constant sign-extended i32 values and lower 1057 // them to SPLAT_VECTOR_I64 1058 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, 1059 SelectionDAG &DAG) const { 1060 SDLoc DL(Op); 1061 EVT VecVT = Op.getValueType(); 1062 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 1063 "Unexpected SPLAT_VECTOR lowering"); 1064 SDValue SplatVal = Op.getOperand(0); 1065 1066 // If we can prove that the value is a sign-extended 32-bit value, lower this 1067 // as a custom node in order to try and match RVV vector/scalar instructions. 1068 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { 1069 if (isInt<32>(CVal->getSExtValue())) 1070 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, 1071 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); 1072 } 1073 1074 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not 1075 // to accidentally sign-extend the 32-bit halves to the e64 SEW: 1076 // vmv.v.x vX, hi 1077 // vsll.vx vX, vX, /*32*/ 1078 // vmv.v.x vY, lo 1079 // vsll.vx vY, vY, /*32*/ 1080 // vsrl.vx vY, vY, /*32*/ 1081 // vor.vv vX, vX, vY 1082 SDValue One = DAG.getConstant(1, DL, MVT::i32); 1083 SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 1084 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); 1085 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); 1086 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); 1087 1088 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 1089 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); 1090 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); 1091 1092 if (isNullConstant(Hi)) 1093 return Lo; 1094 1095 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); 1096 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); 1097 1098 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); 1099 } 1100 1101 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 1102 SelectionDAG &DAG) const { 1103 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1104 SDLoc DL(Op); 1105 1106 if (Subtarget.hasStdExtV()) { 1107 // Some RVV intrinsics may claim that they want an integer operand to be 1108 // extended. 1109 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1110 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1111 if (II->ExtendedOperand) { 1112 assert(II->ExtendedOperand < Op.getNumOperands()); 1113 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1114 SDValue &ScalarOp = Operands[II->ExtendedOperand]; 1115 EVT OpVT = ScalarOp.getValueType(); 1116 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1117 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1118 // If the operand is a constant, sign extend to increase our chances 1119 // of being able to use a .vi instruction. ANY_EXTEND would become a 1120 // a zero extend and the simm5 check in isel would fail. 1121 // FIXME: Should we ignore the upper bits in isel instead? 1122 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1123 : ISD::ANY_EXTEND; 1124 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1125 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), 1126 Operands); 1127 } 1128 } 1129 } 1130 } 1131 1132 switch (IntNo) { 1133 default: 1134 return SDValue(); // Don't custom lower most intrinsics. 1135 case Intrinsic::thread_pointer: { 1136 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1137 return DAG.getRegister(RISCV::X4, PtrVT); 1138 } 1139 case Intrinsic::riscv_vmv_x_s: 1140 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); 1141 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 1142 Op.getOperand(1)); 1143 } 1144 } 1145 1146 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 1147 SelectionDAG &DAG) const { 1148 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1149 SDLoc DL(Op); 1150 1151 if (Subtarget.hasStdExtV()) { 1152 // Some RVV intrinsics may claim that they want an integer operand to be 1153 // extended. 1154 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1155 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { 1156 if (II->ExtendedOperand) { 1157 // The operands start from the second argument in INTRINSIC_W_CHAIN. 1158 unsigned ExtendOp = II->ExtendedOperand + 1; 1159 assert(ExtendOp < Op.getNumOperands()); 1160 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 1161 SDValue &ScalarOp = Operands[ExtendOp]; 1162 EVT OpVT = ScalarOp.getValueType(); 1163 if (OpVT == MVT::i8 || OpVT == MVT::i16 || 1164 (OpVT == MVT::i32 && Subtarget.is64Bit())) { 1165 // If the operand is a constant, sign extend to increase our chances 1166 // of being able to use a .vi instruction. ANY_EXTEND would become a 1167 // a zero extend and the simm5 check in isel would fail. 1168 // FIXME: Should we ignore the upper bits in isel instead? 1169 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND 1170 : ISD::ANY_EXTEND; 1171 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); 1172 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), 1173 Operands); 1174 } 1175 } 1176 } 1177 } 1178 1179 return SDValue(); 1180 } 1181 1182 // Returns the opcode of the target-specific SDNode that implements the 32-bit 1183 // form of the given Opcode. 1184 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 1185 switch (Opcode) { 1186 default: 1187 llvm_unreachable("Unexpected opcode"); 1188 case ISD::SHL: 1189 return RISCVISD::SLLW; 1190 case ISD::SRA: 1191 return RISCVISD::SRAW; 1192 case ISD::SRL: 1193 return RISCVISD::SRLW; 1194 case ISD::SDIV: 1195 return RISCVISD::DIVW; 1196 case ISD::UDIV: 1197 return RISCVISD::DIVUW; 1198 case ISD::UREM: 1199 return RISCVISD::REMUW; 1200 case ISD::ROTL: 1201 return RISCVISD::ROLW; 1202 case ISD::ROTR: 1203 return RISCVISD::RORW; 1204 case RISCVISD::GREVI: 1205 return RISCVISD::GREVIW; 1206 case RISCVISD::GORCI: 1207 return RISCVISD::GORCIW; 1208 } 1209 } 1210 1211 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 1212 // Because i32 isn't a legal type for RV64, these operations would otherwise 1213 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 1214 // later one because the fact the operation was originally of type i32 is 1215 // lost. 1216 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { 1217 SDLoc DL(N); 1218 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 1219 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1220 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 1221 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 1222 // ReplaceNodeResults requires we maintain the same type for the return value. 1223 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 1224 } 1225 1226 // Converts the given 32-bit operation to a i64 operation with signed extension 1227 // semantic to reduce the signed extension instructions. 1228 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 1229 SDLoc DL(N); 1230 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1231 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 1232 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 1233 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 1234 DAG.getValueType(MVT::i32)); 1235 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 1236 } 1237 1238 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 1239 SmallVectorImpl<SDValue> &Results, 1240 SelectionDAG &DAG) const { 1241 SDLoc DL(N); 1242 switch (N->getOpcode()) { 1243 default: 1244 llvm_unreachable("Don't know how to custom type legalize this operation!"); 1245 case ISD::STRICT_FP_TO_SINT: 1246 case ISD::STRICT_FP_TO_UINT: 1247 case ISD::FP_TO_SINT: 1248 case ISD::FP_TO_UINT: { 1249 bool IsStrict = N->isStrictFPOpcode(); 1250 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1251 "Unexpected custom legalisation"); 1252 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 1253 // If the FP type needs to be softened, emit a library call using the 'si' 1254 // version. If we left it to default legalization we'd end up with 'di'. If 1255 // the FP type doesn't need to be softened just let generic type 1256 // legalization promote the result type. 1257 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 1258 TargetLowering::TypeSoftenFloat) 1259 return; 1260 RTLIB::Libcall LC; 1261 if (N->getOpcode() == ISD::FP_TO_SINT || 1262 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 1263 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 1264 else 1265 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 1266 MakeLibCallOptions CallOptions; 1267 EVT OpVT = Op0.getValueType(); 1268 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 1269 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 1270 SDValue Result; 1271 std::tie(Result, Chain) = 1272 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 1273 Results.push_back(Result); 1274 if (IsStrict) 1275 Results.push_back(Chain); 1276 break; 1277 } 1278 case ISD::READCYCLECOUNTER: { 1279 assert(!Subtarget.is64Bit() && 1280 "READCYCLECOUNTER only has custom type legalization on riscv32"); 1281 1282 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 1283 SDValue RCW = 1284 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 1285 1286 Results.push_back( 1287 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 1288 Results.push_back(RCW.getValue(2)); 1289 break; 1290 } 1291 case ISD::ADD: 1292 case ISD::SUB: 1293 case ISD::MUL: 1294 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1295 "Unexpected custom legalisation"); 1296 if (N->getOperand(1).getOpcode() == ISD::Constant) 1297 return; 1298 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 1299 break; 1300 case ISD::SHL: 1301 case ISD::SRA: 1302 case ISD::SRL: 1303 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1304 "Unexpected custom legalisation"); 1305 if (N->getOperand(1).getOpcode() == ISD::Constant) 1306 return; 1307 Results.push_back(customLegalizeToWOp(N, DAG)); 1308 break; 1309 case ISD::ROTL: 1310 case ISD::ROTR: 1311 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1312 "Unexpected custom legalisation"); 1313 Results.push_back(customLegalizeToWOp(N, DAG)); 1314 break; 1315 case ISD::SDIV: 1316 case ISD::UDIV: 1317 case ISD::UREM: 1318 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1319 Subtarget.hasStdExtM() && "Unexpected custom legalisation"); 1320 if (N->getOperand(0).getOpcode() == ISD::Constant || 1321 N->getOperand(1).getOpcode() == ISD::Constant) 1322 return; 1323 Results.push_back(customLegalizeToWOp(N, DAG)); 1324 break; 1325 case ISD::BITCAST: { 1326 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1327 Subtarget.hasStdExtF()) || 1328 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && 1329 "Unexpected custom legalisation"); 1330 SDValue Op0 = N->getOperand(0); 1331 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { 1332 if (Op0.getValueType() != MVT::f16) 1333 return; 1334 SDValue FPConv = 1335 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); 1336 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 1337 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1338 Subtarget.hasStdExtF()) { 1339 if (Op0.getValueType() != MVT::f32) 1340 return; 1341 SDValue FPConv = 1342 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 1343 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 1344 } 1345 break; 1346 } 1347 case RISCVISD::GREVI: 1348 case RISCVISD::GORCI: { 1349 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1350 "Unexpected custom legalisation"); 1351 // This is similar to customLegalizeToWOp, except that we pass the second 1352 // operand (a TargetConstant) straight through: it is already of type 1353 // XLenVT. 1354 SDLoc DL(N); 1355 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 1356 SDValue NewOp0 = 1357 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1358 SDValue NewRes = 1359 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 1360 // ReplaceNodeResults requires we maintain the same type for the return 1361 // value. 1362 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 1363 break; 1364 } 1365 case ISD::BSWAP: 1366 case ISD::BITREVERSE: { 1367 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1368 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 1369 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 1370 N->getOperand(0)); 1371 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; 1372 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, 1373 DAG.getTargetConstant(Imm, DL, 1374 Subtarget.getXLenVT())); 1375 // ReplaceNodeResults requires we maintain the same type for the return 1376 // value. 1377 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); 1378 break; 1379 } 1380 case ISD::FSHL: 1381 case ISD::FSHR: { 1382 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1383 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 1384 SDValue NewOp0 = 1385 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1386 SDValue NewOp1 = 1387 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 1388 SDValue NewOp2 = 1389 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 1390 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 1391 // Mask the shift amount to 5 bits. 1392 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 1393 DAG.getConstant(0x1f, DL, MVT::i64)); 1394 unsigned Opc = 1395 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 1396 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 1397 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 1398 break; 1399 } 1400 case ISD::INTRINSIC_WO_CHAIN: { 1401 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 1402 switch (IntNo) { 1403 default: 1404 llvm_unreachable( 1405 "Don't know how to custom type legalize this intrinsic!"); 1406 case Intrinsic::riscv_vmv_x_s: { 1407 EVT VT = N->getValueType(0); 1408 assert((VT == MVT::i8 || VT == MVT::i16 || 1409 (Subtarget.is64Bit() && VT == MVT::i32)) && 1410 "Unexpected custom legalisation!"); 1411 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 1412 Subtarget.getXLenVT(), N->getOperand(1)); 1413 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 1414 break; 1415 } 1416 } 1417 break; 1418 } 1419 } 1420 } 1421 1422 // A structure to hold one of the bit-manipulation patterns below. Together, a 1423 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 1424 // (or (and (shl x, 1), 0xAAAAAAAA), 1425 // (and (srl x, 1), 0x55555555)) 1426 struct RISCVBitmanipPat { 1427 SDValue Op; 1428 unsigned ShAmt; 1429 bool IsSHL; 1430 1431 bool formsPairWith(const RISCVBitmanipPat &Other) const { 1432 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 1433 } 1434 }; 1435 1436 // Matches any of the following bit-manipulation patterns: 1437 // (and (shl x, 1), (0x55555555 << 1)) 1438 // (and (srl x, 1), 0x55555555) 1439 // (shl (and x, 0x55555555), 1) 1440 // (srl (and x, (0x55555555 << 1)), 1) 1441 // where the shift amount and mask may vary thus: 1442 // [1] = 0x55555555 / 0xAAAAAAAA 1443 // [2] = 0x33333333 / 0xCCCCCCCC 1444 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 1445 // [8] = 0x00FF00FF / 0xFF00FF00 1446 // [16] = 0x0000FFFF / 0xFFFFFFFF 1447 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 1448 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) { 1449 Optional<uint64_t> Mask; 1450 // Optionally consume a mask around the shift operation. 1451 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 1452 Mask = Op.getConstantOperandVal(1); 1453 Op = Op.getOperand(0); 1454 } 1455 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 1456 return None; 1457 bool IsSHL = Op.getOpcode() == ISD::SHL; 1458 1459 if (!isa<ConstantSDNode>(Op.getOperand(1))) 1460 return None; 1461 auto ShAmt = Op.getConstantOperandVal(1); 1462 1463 if (!isPowerOf2_64(ShAmt)) 1464 return None; 1465 1466 // These are the unshifted masks which we use to match bit-manipulation 1467 // patterns. They may be shifted left in certain circumstances. 1468 static const uint64_t BitmanipMasks[] = { 1469 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 1470 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL, 1471 }; 1472 1473 unsigned MaskIdx = Log2_64(ShAmt); 1474 if (MaskIdx >= array_lengthof(BitmanipMasks)) 1475 return None; 1476 1477 auto Src = Op.getOperand(0); 1478 1479 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 1480 auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 1481 1482 // The expected mask is shifted left when the AND is found around SHL 1483 // patterns. 1484 // ((x >> 1) & 0x55555555) 1485 // ((x << 1) & 0xAAAAAAAA) 1486 bool SHLExpMask = IsSHL; 1487 1488 if (!Mask) { 1489 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 1490 // the mask is all ones: consume that now. 1491 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 1492 Mask = Src.getConstantOperandVal(1); 1493 Src = Src.getOperand(0); 1494 // The expected mask is now in fact shifted left for SRL, so reverse the 1495 // decision. 1496 // ((x & 0xAAAAAAAA) >> 1) 1497 // ((x & 0x55555555) << 1) 1498 SHLExpMask = !SHLExpMask; 1499 } else { 1500 // Use a default shifted mask of all-ones if there's no AND, truncated 1501 // down to the expected width. This simplifies the logic later on. 1502 Mask = maskTrailingOnes<uint64_t>(Width); 1503 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 1504 } 1505 } 1506 1507 if (SHLExpMask) 1508 ExpMask <<= ShAmt; 1509 1510 if (Mask != ExpMask) 1511 return None; 1512 1513 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 1514 } 1515 1516 // Match the following pattern as a GREVI(W) operation 1517 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 1518 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 1519 const RISCVSubtarget &Subtarget) { 1520 EVT VT = Op.getValueType(); 1521 1522 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 1523 auto LHS = matchRISCVBitmanipPat(Op.getOperand(0)); 1524 auto RHS = matchRISCVBitmanipPat(Op.getOperand(1)); 1525 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 1526 SDLoc DL(Op); 1527 return DAG.getNode( 1528 RISCVISD::GREVI, DL, VT, LHS->Op, 1529 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 1530 } 1531 } 1532 return SDValue(); 1533 } 1534 1535 // Matches any the following pattern as a GORCI(W) operation 1536 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 1537 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 1538 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 1539 // Note that with the variant of 3., 1540 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 1541 // the inner pattern will first be matched as GREVI and then the outer 1542 // pattern will be matched to GORC via the first rule above. 1543 // 4. (or (rotl/rotr x, bitwidth/2), x) 1544 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 1545 const RISCVSubtarget &Subtarget) { 1546 EVT VT = Op.getValueType(); 1547 1548 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 1549 SDLoc DL(Op); 1550 SDValue Op0 = Op.getOperand(0); 1551 SDValue Op1 = Op.getOperand(1); 1552 1553 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 1554 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && 1555 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 1556 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); 1557 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 1558 if ((Reverse.getOpcode() == ISD::ROTL || 1559 Reverse.getOpcode() == ISD::ROTR) && 1560 Reverse.getOperand(0) == X && 1561 isa<ConstantSDNode>(Reverse.getOperand(1))) { 1562 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 1563 if (RotAmt == (VT.getSizeInBits() / 2)) 1564 return DAG.getNode( 1565 RISCVISD::GORCI, DL, VT, X, 1566 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); 1567 } 1568 return SDValue(); 1569 }; 1570 1571 // Check for either commutable permutation of (or (GREVI x, shamt), x) 1572 if (SDValue V = MatchOROfReverse(Op0, Op1)) 1573 return V; 1574 if (SDValue V = MatchOROfReverse(Op1, Op0)) 1575 return V; 1576 1577 // OR is commutable so canonicalize its OR operand to the left 1578 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 1579 std::swap(Op0, Op1); 1580 if (Op0.getOpcode() != ISD::OR) 1581 return SDValue(); 1582 SDValue OrOp0 = Op0.getOperand(0); 1583 SDValue OrOp1 = Op0.getOperand(1); 1584 auto LHS = matchRISCVBitmanipPat(OrOp0); 1585 // OR is commutable so swap the operands and try again: x might have been 1586 // on the left 1587 if (!LHS) { 1588 std::swap(OrOp0, OrOp1); 1589 LHS = matchRISCVBitmanipPat(OrOp0); 1590 } 1591 auto RHS = matchRISCVBitmanipPat(Op1); 1592 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 1593 return DAG.getNode( 1594 RISCVISD::GORCI, DL, VT, LHS->Op, 1595 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 1596 } 1597 } 1598 return SDValue(); 1599 } 1600 1601 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 1602 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 1603 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 1604 // not undo itself, but they are redundant. 1605 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 1606 unsigned ShAmt1 = N->getConstantOperandVal(1); 1607 SDValue Src = N->getOperand(0); 1608 1609 if (Src.getOpcode() != N->getOpcode()) 1610 return SDValue(); 1611 1612 unsigned ShAmt2 = Src.getConstantOperandVal(1); 1613 Src = Src.getOperand(0); 1614 1615 unsigned CombinedShAmt; 1616 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) 1617 CombinedShAmt = ShAmt1 | ShAmt2; 1618 else 1619 CombinedShAmt = ShAmt1 ^ ShAmt2; 1620 1621 if (CombinedShAmt == 0) 1622 return Src; 1623 1624 SDLoc DL(N); 1625 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, 1626 DAG.getTargetConstant(CombinedShAmt, DL, 1627 N->getOperand(1).getValueType())); 1628 } 1629 1630 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 1631 DAGCombinerInfo &DCI) const { 1632 SelectionDAG &DAG = DCI.DAG; 1633 1634 switch (N->getOpcode()) { 1635 default: 1636 break; 1637 case RISCVISD::SplitF64: { 1638 SDValue Op0 = N->getOperand(0); 1639 // If the input to SplitF64 is just BuildPairF64 then the operation is 1640 // redundant. Instead, use BuildPairF64's operands directly. 1641 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 1642 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 1643 1644 SDLoc DL(N); 1645 1646 // It's cheaper to materialise two 32-bit integers than to load a double 1647 // from the constant pool and transfer it to integer registers through the 1648 // stack. 1649 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 1650 APInt V = C->getValueAPF().bitcastToAPInt(); 1651 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 1652 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 1653 return DCI.CombineTo(N, Lo, Hi); 1654 } 1655 1656 // This is a target-specific version of a DAGCombine performed in 1657 // DAGCombiner::visitBITCAST. It performs the equivalent of: 1658 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 1659 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 1660 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 1661 !Op0.getNode()->hasOneUse()) 1662 break; 1663 SDValue NewSplitF64 = 1664 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 1665 Op0.getOperand(0)); 1666 SDValue Lo = NewSplitF64.getValue(0); 1667 SDValue Hi = NewSplitF64.getValue(1); 1668 APInt SignBit = APInt::getSignMask(32); 1669 if (Op0.getOpcode() == ISD::FNEG) { 1670 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 1671 DAG.getConstant(SignBit, DL, MVT::i32)); 1672 return DCI.CombineTo(N, Lo, NewHi); 1673 } 1674 assert(Op0.getOpcode() == ISD::FABS); 1675 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 1676 DAG.getConstant(~SignBit, DL, MVT::i32)); 1677 return DCI.CombineTo(N, Lo, NewHi); 1678 } 1679 case RISCVISD::SLLW: 1680 case RISCVISD::SRAW: 1681 case RISCVISD::SRLW: 1682 case RISCVISD::ROLW: 1683 case RISCVISD::RORW: { 1684 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 1685 SDValue LHS = N->getOperand(0); 1686 SDValue RHS = N->getOperand(1); 1687 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 1688 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 1689 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 1690 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 1691 if (N->getOpcode() != ISD::DELETED_NODE) 1692 DCI.AddToWorklist(N); 1693 return SDValue(N, 0); 1694 } 1695 break; 1696 } 1697 case RISCVISD::FSLW: 1698 case RISCVISD::FSRW: { 1699 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 1700 // read. 1701 SDValue Op0 = N->getOperand(0); 1702 SDValue Op1 = N->getOperand(1); 1703 SDValue ShAmt = N->getOperand(2); 1704 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 1705 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); 1706 if (SimplifyDemandedBits(Op0, OpMask, DCI) || 1707 SimplifyDemandedBits(Op1, OpMask, DCI) || 1708 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { 1709 if (N->getOpcode() != ISD::DELETED_NODE) 1710 DCI.AddToWorklist(N); 1711 return SDValue(N, 0); 1712 } 1713 break; 1714 } 1715 case RISCVISD::GREVIW: 1716 case RISCVISD::GORCIW: { 1717 // Only the lower 32 bits of the first operand are read 1718 SDValue Op0 = N->getOperand(0); 1719 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 1720 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 1721 if (N->getOpcode() != ISD::DELETED_NODE) 1722 DCI.AddToWorklist(N); 1723 return SDValue(N, 0); 1724 } 1725 1726 return combineGREVI_GORCI(N, DCI.DAG); 1727 } 1728 case RISCVISD::FMV_X_ANYEXTW_RV64: { 1729 SDLoc DL(N); 1730 SDValue Op0 = N->getOperand(0); 1731 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 1732 // conversion is unnecessary and can be replaced with an ANY_EXTEND 1733 // of the FMV_W_X_RV64 operand. 1734 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 1735 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 1736 "Unexpected value type!"); 1737 return Op0.getOperand(0); 1738 } 1739 1740 // This is a target-specific version of a DAGCombine performed in 1741 // DAGCombiner::visitBITCAST. It performs the equivalent of: 1742 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 1743 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 1744 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 1745 !Op0.getNode()->hasOneUse()) 1746 break; 1747 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 1748 Op0.getOperand(0)); 1749 APInt SignBit = APInt::getSignMask(32).sext(64); 1750 if (Op0.getOpcode() == ISD::FNEG) 1751 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 1752 DAG.getConstant(SignBit, DL, MVT::i64)); 1753 1754 assert(Op0.getOpcode() == ISD::FABS); 1755 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 1756 DAG.getConstant(~SignBit, DL, MVT::i64)); 1757 } 1758 case RISCVISD::GREVI: 1759 case RISCVISD::GORCI: 1760 return combineGREVI_GORCI(N, DCI.DAG); 1761 case ISD::OR: 1762 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 1763 return GREV; 1764 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 1765 return GORC; 1766 break; 1767 } 1768 1769 return SDValue(); 1770 } 1771 1772 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 1773 const SDNode *N, CombineLevel Level) const { 1774 // The following folds are only desirable if `(OP _, c1 << c2)` can be 1775 // materialised in fewer instructions than `(OP _, c1)`: 1776 // 1777 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 1778 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 1779 SDValue N0 = N->getOperand(0); 1780 EVT Ty = N0.getValueType(); 1781 if (Ty.isScalarInteger() && 1782 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 1783 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 1784 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1785 if (C1 && C2) { 1786 APInt C1Int = C1->getAPIntValue(); 1787 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 1788 1789 // We can materialise `c1 << c2` into an add immediate, so it's "free", 1790 // and the combine should happen, to potentially allow further combines 1791 // later. 1792 if (ShiftedC1Int.getMinSignedBits() <= 64 && 1793 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 1794 return true; 1795 1796 // We can materialise `c1` in an add immediate, so it's "free", and the 1797 // combine should be prevented. 1798 if (C1Int.getMinSignedBits() <= 64 && 1799 isLegalAddImmediate(C1Int.getSExtValue())) 1800 return false; 1801 1802 // Neither constant will fit into an immediate, so find materialisation 1803 // costs. 1804 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 1805 Subtarget.is64Bit()); 1806 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 1807 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 1808 1809 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 1810 // combine should be prevented. 1811 if (C1Cost < ShiftedC1Cost) 1812 return false; 1813 } 1814 } 1815 return true; 1816 } 1817 1818 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 1819 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1820 unsigned Depth) const { 1821 switch (Op.getOpcode()) { 1822 default: 1823 break; 1824 case RISCVISD::SLLW: 1825 case RISCVISD::SRAW: 1826 case RISCVISD::SRLW: 1827 case RISCVISD::DIVW: 1828 case RISCVISD::DIVUW: 1829 case RISCVISD::REMUW: 1830 case RISCVISD::ROLW: 1831 case RISCVISD::RORW: 1832 case RISCVISD::GREVIW: 1833 case RISCVISD::GORCIW: 1834 case RISCVISD::FSLW: 1835 case RISCVISD::FSRW: 1836 // TODO: As the result is sign-extended, this is conservatively correct. A 1837 // more precise answer could be calculated for SRAW depending on known 1838 // bits in the shift amount. 1839 return 33; 1840 case RISCVISD::VMV_X_S: 1841 // The number of sign bits of the scalar result is computed by obtaining the 1842 // element type of the input vector operand, substracting its width from the 1843 // XLEN, and then adding one (sign bit within the element type). 1844 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 1845 } 1846 1847 return 1; 1848 } 1849 1850 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 1851 MachineBasicBlock *BB) { 1852 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 1853 1854 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 1855 // Should the count have wrapped while it was being read, we need to try 1856 // again. 1857 // ... 1858 // read: 1859 // rdcycleh x3 # load high word of cycle 1860 // rdcycle x2 # load low word of cycle 1861 // rdcycleh x4 # load high word of cycle 1862 // bne x3, x4, read # check if high word reads match, otherwise try again 1863 // ... 1864 1865 MachineFunction &MF = *BB->getParent(); 1866 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1867 MachineFunction::iterator It = ++BB->getIterator(); 1868 1869 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 1870 MF.insert(It, LoopMBB); 1871 1872 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 1873 MF.insert(It, DoneMBB); 1874 1875 // Transfer the remainder of BB and its successor edges to DoneMBB. 1876 DoneMBB->splice(DoneMBB->begin(), BB, 1877 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 1878 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 1879 1880 BB->addSuccessor(LoopMBB); 1881 1882 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1883 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1884 Register LoReg = MI.getOperand(0).getReg(); 1885 Register HiReg = MI.getOperand(1).getReg(); 1886 DebugLoc DL = MI.getDebugLoc(); 1887 1888 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 1889 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 1890 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 1891 .addReg(RISCV::X0); 1892 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 1893 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 1894 .addReg(RISCV::X0); 1895 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 1896 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 1897 .addReg(RISCV::X0); 1898 1899 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 1900 .addReg(HiReg) 1901 .addReg(ReadAgainReg) 1902 .addMBB(LoopMBB); 1903 1904 LoopMBB->addSuccessor(LoopMBB); 1905 LoopMBB->addSuccessor(DoneMBB); 1906 1907 MI.eraseFromParent(); 1908 1909 return DoneMBB; 1910 } 1911 1912 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 1913 MachineBasicBlock *BB) { 1914 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 1915 1916 MachineFunction &MF = *BB->getParent(); 1917 DebugLoc DL = MI.getDebugLoc(); 1918 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1919 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 1920 Register LoReg = MI.getOperand(0).getReg(); 1921 Register HiReg = MI.getOperand(1).getReg(); 1922 Register SrcReg = MI.getOperand(2).getReg(); 1923 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 1924 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 1925 1926 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 1927 RI); 1928 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 1929 MachineMemOperand *MMOLo = 1930 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 1931 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 1932 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 1933 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 1934 .addFrameIndex(FI) 1935 .addImm(0) 1936 .addMemOperand(MMOLo); 1937 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 1938 .addFrameIndex(FI) 1939 .addImm(4) 1940 .addMemOperand(MMOHi); 1941 MI.eraseFromParent(); // The pseudo instruction is gone now. 1942 return BB; 1943 } 1944 1945 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 1946 MachineBasicBlock *BB) { 1947 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 1948 "Unexpected instruction"); 1949 1950 MachineFunction &MF = *BB->getParent(); 1951 DebugLoc DL = MI.getDebugLoc(); 1952 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1953 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 1954 Register DstReg = MI.getOperand(0).getReg(); 1955 Register LoReg = MI.getOperand(1).getReg(); 1956 Register HiReg = MI.getOperand(2).getReg(); 1957 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 1958 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 1959 1960 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 1961 MachineMemOperand *MMOLo = 1962 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 1963 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 1964 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 1965 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 1966 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 1967 .addFrameIndex(FI) 1968 .addImm(0) 1969 .addMemOperand(MMOLo); 1970 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 1971 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 1972 .addFrameIndex(FI) 1973 .addImm(4) 1974 .addMemOperand(MMOHi); 1975 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 1976 MI.eraseFromParent(); // The pseudo instruction is gone now. 1977 return BB; 1978 } 1979 1980 static bool isSelectPseudo(MachineInstr &MI) { 1981 switch (MI.getOpcode()) { 1982 default: 1983 return false; 1984 case RISCV::Select_GPR_Using_CC_GPR: 1985 case RISCV::Select_FPR16_Using_CC_GPR: 1986 case RISCV::Select_FPR32_Using_CC_GPR: 1987 case RISCV::Select_FPR64_Using_CC_GPR: 1988 return true; 1989 } 1990 } 1991 1992 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 1993 MachineBasicBlock *BB) { 1994 // To "insert" Select_* instructions, we actually have to insert the triangle 1995 // control-flow pattern. The incoming instructions know the destination vreg 1996 // to set, the condition code register to branch on, the true/false values to 1997 // select between, and the condcode to use to select the appropriate branch. 1998 // 1999 // We produce the following control flow: 2000 // HeadMBB 2001 // | \ 2002 // | IfFalseMBB 2003 // | / 2004 // TailMBB 2005 // 2006 // When we find a sequence of selects we attempt to optimize their emission 2007 // by sharing the control flow. Currently we only handle cases where we have 2008 // multiple selects with the exact same condition (same LHS, RHS and CC). 2009 // The selects may be interleaved with other instructions if the other 2010 // instructions meet some requirements we deem safe: 2011 // - They are debug instructions. Otherwise, 2012 // - They do not have side-effects, do not access memory and their inputs do 2013 // not depend on the results of the select pseudo-instructions. 2014 // The TrueV/FalseV operands of the selects cannot depend on the result of 2015 // previous selects in the sequence. 2016 // These conditions could be further relaxed. See the X86 target for a 2017 // related approach and more information. 2018 Register LHS = MI.getOperand(1).getReg(); 2019 Register RHS = MI.getOperand(2).getReg(); 2020 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 2021 2022 SmallVector<MachineInstr *, 4> SelectDebugValues; 2023 SmallSet<Register, 4> SelectDests; 2024 SelectDests.insert(MI.getOperand(0).getReg()); 2025 2026 MachineInstr *LastSelectPseudo = &MI; 2027 2028 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 2029 SequenceMBBI != E; ++SequenceMBBI) { 2030 if (SequenceMBBI->isDebugInstr()) 2031 continue; 2032 else if (isSelectPseudo(*SequenceMBBI)) { 2033 if (SequenceMBBI->getOperand(1).getReg() != LHS || 2034 SequenceMBBI->getOperand(2).getReg() != RHS || 2035 SequenceMBBI->getOperand(3).getImm() != CC || 2036 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 2037 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 2038 break; 2039 LastSelectPseudo = &*SequenceMBBI; 2040 SequenceMBBI->collectDebugValues(SelectDebugValues); 2041 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 2042 } else { 2043 if (SequenceMBBI->hasUnmodeledSideEffects() || 2044 SequenceMBBI->mayLoadOrStore()) 2045 break; 2046 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 2047 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 2048 })) 2049 break; 2050 } 2051 } 2052 2053 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 2054 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2055 DebugLoc DL = MI.getDebugLoc(); 2056 MachineFunction::iterator I = ++BB->getIterator(); 2057 2058 MachineBasicBlock *HeadMBB = BB; 2059 MachineFunction *F = BB->getParent(); 2060 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 2061 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 2062 2063 F->insert(I, IfFalseMBB); 2064 F->insert(I, TailMBB); 2065 2066 // Transfer debug instructions associated with the selects to TailMBB. 2067 for (MachineInstr *DebugInstr : SelectDebugValues) { 2068 TailMBB->push_back(DebugInstr->removeFromParent()); 2069 } 2070 2071 // Move all instructions after the sequence to TailMBB. 2072 TailMBB->splice(TailMBB->end(), HeadMBB, 2073 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 2074 // Update machine-CFG edges by transferring all successors of the current 2075 // block to the new block which will contain the Phi nodes for the selects. 2076 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 2077 // Set the successors for HeadMBB. 2078 HeadMBB->addSuccessor(IfFalseMBB); 2079 HeadMBB->addSuccessor(TailMBB); 2080 2081 // Insert appropriate branch. 2082 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 2083 2084 BuildMI(HeadMBB, DL, TII.get(Opcode)) 2085 .addReg(LHS) 2086 .addReg(RHS) 2087 .addMBB(TailMBB); 2088 2089 // IfFalseMBB just falls through to TailMBB. 2090 IfFalseMBB->addSuccessor(TailMBB); 2091 2092 // Create PHIs for all of the select pseudo-instructions. 2093 auto SelectMBBI = MI.getIterator(); 2094 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 2095 auto InsertionPoint = TailMBB->begin(); 2096 while (SelectMBBI != SelectEnd) { 2097 auto Next = std::next(SelectMBBI); 2098 if (isSelectPseudo(*SelectMBBI)) { 2099 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 2100 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 2101 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 2102 .addReg(SelectMBBI->getOperand(4).getReg()) 2103 .addMBB(HeadMBB) 2104 .addReg(SelectMBBI->getOperand(5).getReg()) 2105 .addMBB(IfFalseMBB); 2106 SelectMBBI->eraseFromParent(); 2107 } 2108 SelectMBBI = Next; 2109 } 2110 2111 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 2112 return TailMBB; 2113 } 2114 2115 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, 2116 int VLIndex, unsigned SEWIndex, 2117 unsigned VLMul, bool WritesElement0) { 2118 MachineFunction &MF = *BB->getParent(); 2119 DebugLoc DL = MI.getDebugLoc(); 2120 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 2121 2122 unsigned SEW = MI.getOperand(SEWIndex).getImm(); 2123 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 2124 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); 2125 2126 // LMUL should already be encoded correctly. 2127 RISCVVLMUL Multiplier = static_cast<RISCVVLMUL>(VLMul); 2128 2129 MachineRegisterInfo &MRI = MF.getRegInfo(); 2130 2131 // VL and VTYPE are alive here. 2132 MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)); 2133 2134 if (VLIndex >= 0) { 2135 // Set VL (rs1 != X0). 2136 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 2137 MIB.addReg(DestReg, RegState::Define | RegState::Dead) 2138 .addReg(MI.getOperand(VLIndex).getReg()); 2139 } else 2140 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). 2141 MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead) 2142 .addReg(RISCV::X0, RegState::Kill); 2143 2144 // Default to tail agnostic unless the destination is tied to a source. In 2145 // that case the user would have some control over the tail values. The tail 2146 // policy is also ignored on instructions that only update element 0 like 2147 // vmv.s.x or reductions so use agnostic there to match the common case. 2148 // FIXME: This is conservatively correct, but we might want to detect that 2149 // the input is undefined. 2150 bool TailAgnostic = true; 2151 if (MI.isRegTiedToUseOperand(0) && !WritesElement0) 2152 TailAgnostic = false; 2153 2154 // For simplicity we reuse the vtype representation here. 2155 MIB.addImm(RISCVVType::encodeVTYPE(Multiplier, ElementWidth, 2156 /*TailAgnostic*/ TailAgnostic, 2157 /*MaskAgnostic*/ false)); 2158 2159 // Remove (now) redundant operands from pseudo 2160 MI.getOperand(SEWIndex).setImm(-1); 2161 if (VLIndex >= 0) { 2162 MI.getOperand(VLIndex).setReg(RISCV::NoRegister); 2163 MI.getOperand(VLIndex).setIsKill(false); 2164 } 2165 2166 return BB; 2167 } 2168 2169 MachineBasicBlock * 2170 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 2171 MachineBasicBlock *BB) const { 2172 2173 if (const RISCVVPseudosTable::PseudoInfo *RVV = 2174 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode())) { 2175 int VLIndex = RVV->getVLIndex(); 2176 int SEWIndex = RVV->getSEWIndex(); 2177 bool WritesElement0 = RVV->writesElement0(); 2178 2179 assert(SEWIndex >= 0 && "SEWIndex must be >= 0"); 2180 return addVSetVL(MI, BB, VLIndex, SEWIndex, RVV->VLMul, WritesElement0); 2181 } 2182 2183 switch (MI.getOpcode()) { 2184 default: 2185 llvm_unreachable("Unexpected instr type to insert"); 2186 case RISCV::ReadCycleWide: 2187 assert(!Subtarget.is64Bit() && 2188 "ReadCycleWrite is only to be used on riscv32"); 2189 return emitReadCycleWidePseudo(MI, BB); 2190 case RISCV::Select_GPR_Using_CC_GPR: 2191 case RISCV::Select_FPR16_Using_CC_GPR: 2192 case RISCV::Select_FPR32_Using_CC_GPR: 2193 case RISCV::Select_FPR64_Using_CC_GPR: 2194 return emitSelectPseudo(MI, BB); 2195 case RISCV::BuildPairF64Pseudo: 2196 return emitBuildPairF64Pseudo(MI, BB); 2197 case RISCV::SplitF64Pseudo: 2198 return emitSplitF64Pseudo(MI, BB); 2199 } 2200 } 2201 2202 // Calling Convention Implementation. 2203 // The expectations for frontend ABI lowering vary from target to target. 2204 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 2205 // details, but this is a longer term goal. For now, we simply try to keep the 2206 // role of the frontend as simple and well-defined as possible. The rules can 2207 // be summarised as: 2208 // * Never split up large scalar arguments. We handle them here. 2209 // * If a hardfloat calling convention is being used, and the struct may be 2210 // passed in a pair of registers (fp+fp, int+fp), and both registers are 2211 // available, then pass as two separate arguments. If either the GPRs or FPRs 2212 // are exhausted, then pass according to the rule below. 2213 // * If a struct could never be passed in registers or directly in a stack 2214 // slot (as it is larger than 2*XLEN and the floating point rules don't 2215 // apply), then pass it using a pointer with the byval attribute. 2216 // * If a struct is less than 2*XLEN, then coerce to either a two-element 2217 // word-sized array or a 2*XLEN scalar (depending on alignment). 2218 // * The frontend can determine whether a struct is returned by reference or 2219 // not based on its size and fields. If it will be returned by reference, the 2220 // frontend must modify the prototype so a pointer with the sret annotation is 2221 // passed as the first argument. This is not necessary for large scalar 2222 // returns. 2223 // * Struct return values and varargs should be coerced to structs containing 2224 // register-size fields in the same situations they would be for fixed 2225 // arguments. 2226 2227 static const MCPhysReg ArgGPRs[] = { 2228 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 2229 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 2230 }; 2231 static const MCPhysReg ArgFPR16s[] = { 2232 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 2233 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 2234 }; 2235 static const MCPhysReg ArgFPR32s[] = { 2236 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 2237 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 2238 }; 2239 static const MCPhysReg ArgFPR64s[] = { 2240 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 2241 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 2242 }; 2243 // This is an interim calling convention and it may be changed in the future. 2244 static const MCPhysReg ArgVRs[] = { 2245 RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, RISCV::V20, 2246 RISCV::V21, RISCV::V22, RISCV::V23 2247 }; 2248 static const MCPhysReg ArgVRM2s[] = { 2249 RISCV::V16M2, RISCV::V18M2, RISCV::V20M2, RISCV::V22M2 2250 }; 2251 static const MCPhysReg ArgVRM4s[] = {RISCV::V16M4, RISCV::V20M4}; 2252 static const MCPhysReg ArgVRM8s[] = {RISCV::V16M8}; 2253 2254 // Pass a 2*XLEN argument that has been split into two XLEN values through 2255 // registers or the stack as necessary. 2256 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 2257 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 2258 MVT ValVT2, MVT LocVT2, 2259 ISD::ArgFlagsTy ArgFlags2) { 2260 unsigned XLenInBytes = XLen / 8; 2261 if (Register Reg = State.AllocateReg(ArgGPRs)) { 2262 // At least one half can be passed via register. 2263 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 2264 VA1.getLocVT(), CCValAssign::Full)); 2265 } else { 2266 // Both halves must be passed on the stack, with proper alignment. 2267 Align StackAlign = 2268 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 2269 State.addLoc( 2270 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 2271 State.AllocateStack(XLenInBytes, StackAlign), 2272 VA1.getLocVT(), CCValAssign::Full)); 2273 State.addLoc(CCValAssign::getMem( 2274 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 2275 LocVT2, CCValAssign::Full)); 2276 return false; 2277 } 2278 2279 if (Register Reg = State.AllocateReg(ArgGPRs)) { 2280 // The second half can also be passed via register. 2281 State.addLoc( 2282 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 2283 } else { 2284 // The second half is passed via the stack, without additional alignment. 2285 State.addLoc(CCValAssign::getMem( 2286 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 2287 LocVT2, CCValAssign::Full)); 2288 } 2289 2290 return false; 2291 } 2292 2293 // Implements the RISC-V calling convention. Returns true upon failure. 2294 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 2295 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 2296 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 2297 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 2298 Optional<unsigned> FirstMaskArgument) { 2299 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 2300 assert(XLen == 32 || XLen == 64); 2301 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 2302 2303 // Any return value split in to more than two values can't be returned 2304 // directly. 2305 if (IsRet && ValNo > 1) 2306 return true; 2307 2308 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 2309 // variadic argument, or if no F16/F32 argument registers are available. 2310 bool UseGPRForF16_F32 = true; 2311 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 2312 // variadic argument, or if no F64 argument registers are available. 2313 bool UseGPRForF64 = true; 2314 2315 switch (ABI) { 2316 default: 2317 llvm_unreachable("Unexpected ABI"); 2318 case RISCVABI::ABI_ILP32: 2319 case RISCVABI::ABI_LP64: 2320 break; 2321 case RISCVABI::ABI_ILP32F: 2322 case RISCVABI::ABI_LP64F: 2323 UseGPRForF16_F32 = !IsFixed; 2324 break; 2325 case RISCVABI::ABI_ILP32D: 2326 case RISCVABI::ABI_LP64D: 2327 UseGPRForF16_F32 = !IsFixed; 2328 UseGPRForF64 = !IsFixed; 2329 break; 2330 } 2331 2332 // FPR16, FPR32, and FPR64 alias each other. 2333 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 2334 UseGPRForF16_F32 = true; 2335 UseGPRForF64 = true; 2336 } 2337 2338 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 2339 // similar local variables rather than directly checking against the target 2340 // ABI. 2341 2342 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 2343 LocVT = XLenVT; 2344 LocInfo = CCValAssign::BCvt; 2345 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 2346 LocVT = MVT::i64; 2347 LocInfo = CCValAssign::BCvt; 2348 } 2349 2350 // If this is a variadic argument, the RISC-V calling convention requires 2351 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 2352 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 2353 // be used regardless of whether the original argument was split during 2354 // legalisation or not. The argument will not be passed by registers if the 2355 // original type is larger than 2*XLEN, so the register alignment rule does 2356 // not apply. 2357 unsigned TwoXLenInBytes = (2 * XLen) / 8; 2358 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 2359 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 2360 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 2361 // Skip 'odd' register if necessary. 2362 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 2363 State.AllocateReg(ArgGPRs); 2364 } 2365 2366 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 2367 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 2368 State.getPendingArgFlags(); 2369 2370 assert(PendingLocs.size() == PendingArgFlags.size() && 2371 "PendingLocs and PendingArgFlags out of sync"); 2372 2373 // Handle passing f64 on RV32D with a soft float ABI or when floating point 2374 // registers are exhausted. 2375 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 2376 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 2377 "Can't lower f64 if it is split"); 2378 // Depending on available argument GPRS, f64 may be passed in a pair of 2379 // GPRs, split between a GPR and the stack, or passed completely on the 2380 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 2381 // cases. 2382 Register Reg = State.AllocateReg(ArgGPRs); 2383 LocVT = MVT::i32; 2384 if (!Reg) { 2385 unsigned StackOffset = State.AllocateStack(8, Align(8)); 2386 State.addLoc( 2387 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 2388 return false; 2389 } 2390 if (!State.AllocateReg(ArgGPRs)) 2391 State.AllocateStack(4, Align(4)); 2392 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2393 return false; 2394 } 2395 2396 // Split arguments might be passed indirectly, so keep track of the pending 2397 // values. 2398 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 2399 LocVT = XLenVT; 2400 LocInfo = CCValAssign::Indirect; 2401 PendingLocs.push_back( 2402 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 2403 PendingArgFlags.push_back(ArgFlags); 2404 if (!ArgFlags.isSplitEnd()) { 2405 return false; 2406 } 2407 } 2408 2409 // If the split argument only had two elements, it should be passed directly 2410 // in registers or on the stack. 2411 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 2412 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 2413 // Apply the normal calling convention rules to the first half of the 2414 // split argument. 2415 CCValAssign VA = PendingLocs[0]; 2416 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 2417 PendingLocs.clear(); 2418 PendingArgFlags.clear(); 2419 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 2420 ArgFlags); 2421 } 2422 2423 // Allocate to a register if possible, or else a stack slot. 2424 Register Reg; 2425 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 2426 Reg = State.AllocateReg(ArgFPR16s); 2427 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 2428 Reg = State.AllocateReg(ArgFPR32s); 2429 else if (ValVT == MVT::f64 && !UseGPRForF64) 2430 Reg = State.AllocateReg(ArgFPR64s); 2431 else if (ValVT.isScalableVector()) { 2432 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 2433 if (RC == &RISCV::VRRegClass) { 2434 // Assign the first mask argument to V0. 2435 // This is an interim calling convention and it may be changed in the 2436 // future. 2437 if (FirstMaskArgument.hasValue() && 2438 ValNo == FirstMaskArgument.getValue()) { 2439 Reg = State.AllocateReg(RISCV::V0); 2440 } else { 2441 Reg = State.AllocateReg(ArgVRs); 2442 } 2443 } else if (RC == &RISCV::VRM2RegClass) { 2444 Reg = State.AllocateReg(ArgVRM2s); 2445 } else if (RC == &RISCV::VRM4RegClass) { 2446 Reg = State.AllocateReg(ArgVRM4s); 2447 } else if (RC == &RISCV::VRM8RegClass) { 2448 Reg = State.AllocateReg(ArgVRM8s); 2449 } else { 2450 llvm_unreachable("Unhandled class register for ValueType"); 2451 } 2452 if (!Reg) { 2453 LocInfo = CCValAssign::Indirect; 2454 // Try using a GPR to pass the address 2455 Reg = State.AllocateReg(ArgGPRs); 2456 LocVT = XLenVT; 2457 } 2458 } else 2459 Reg = State.AllocateReg(ArgGPRs); 2460 unsigned StackOffset = 2461 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 2462 2463 // If we reach this point and PendingLocs is non-empty, we must be at the 2464 // end of a split argument that must be passed indirectly. 2465 if (!PendingLocs.empty()) { 2466 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 2467 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 2468 2469 for (auto &It : PendingLocs) { 2470 if (Reg) 2471 It.convertToReg(Reg); 2472 else 2473 It.convertToMem(StackOffset); 2474 State.addLoc(It); 2475 } 2476 PendingLocs.clear(); 2477 PendingArgFlags.clear(); 2478 return false; 2479 } 2480 2481 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 2482 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && 2483 "Expected an XLenVT or scalable vector types at this stage"); 2484 2485 if (Reg) { 2486 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2487 return false; 2488 } 2489 2490 // When a floating-point value is passed on the stack, no bit-conversion is 2491 // needed. 2492 if (ValVT.isFloatingPoint()) { 2493 LocVT = ValVT; 2494 LocInfo = CCValAssign::Full; 2495 } 2496 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 2497 return false; 2498 } 2499 2500 template <typename ArgTy> 2501 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 2502 for (const auto &ArgIdx : enumerate(Args)) { 2503 MVT ArgVT = ArgIdx.value().VT; 2504 if (ArgVT.isScalableVector() && 2505 ArgVT.getVectorElementType().SimpleTy == MVT::i1) 2506 return ArgIdx.index(); 2507 } 2508 return None; 2509 } 2510 2511 void RISCVTargetLowering::analyzeInputArgs( 2512 MachineFunction &MF, CCState &CCInfo, 2513 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 2514 unsigned NumArgs = Ins.size(); 2515 FunctionType *FType = MF.getFunction().getFunctionType(); 2516 2517 Optional<unsigned> FirstMaskArgument; 2518 if (Subtarget.hasStdExtV()) 2519 FirstMaskArgument = preAssignMask(Ins); 2520 2521 for (unsigned i = 0; i != NumArgs; ++i) { 2522 MVT ArgVT = Ins[i].VT; 2523 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 2524 2525 Type *ArgTy = nullptr; 2526 if (IsRet) 2527 ArgTy = FType->getReturnType(); 2528 else if (Ins[i].isOrigArg()) 2529 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 2530 2531 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 2532 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 2533 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 2534 FirstMaskArgument)) { 2535 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 2536 << EVT(ArgVT).getEVTString() << '\n'); 2537 llvm_unreachable(nullptr); 2538 } 2539 } 2540 } 2541 2542 void RISCVTargetLowering::analyzeOutputArgs( 2543 MachineFunction &MF, CCState &CCInfo, 2544 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 2545 CallLoweringInfo *CLI) const { 2546 unsigned NumArgs = Outs.size(); 2547 2548 Optional<unsigned> FirstMaskArgument; 2549 if (Subtarget.hasStdExtV()) 2550 FirstMaskArgument = preAssignMask(Outs); 2551 2552 for (unsigned i = 0; i != NumArgs; i++) { 2553 MVT ArgVT = Outs[i].VT; 2554 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 2555 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 2556 2557 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 2558 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 2559 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 2560 FirstMaskArgument)) { 2561 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 2562 << EVT(ArgVT).getEVTString() << "\n"); 2563 llvm_unreachable(nullptr); 2564 } 2565 } 2566 } 2567 2568 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 2569 // values. 2570 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 2571 const CCValAssign &VA, const SDLoc &DL) { 2572 switch (VA.getLocInfo()) { 2573 default: 2574 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2575 case CCValAssign::Full: 2576 break; 2577 case CCValAssign::BCvt: 2578 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 2579 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 2580 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 2581 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 2582 else 2583 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 2584 break; 2585 } 2586 return Val; 2587 } 2588 2589 // The caller is responsible for loading the full value if the argument is 2590 // passed with CCValAssign::Indirect. 2591 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 2592 const CCValAssign &VA, const SDLoc &DL, 2593 const RISCVTargetLowering &TLI) { 2594 MachineFunction &MF = DAG.getMachineFunction(); 2595 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2596 EVT LocVT = VA.getLocVT(); 2597 SDValue Val; 2598 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 2599 Register VReg = RegInfo.createVirtualRegister(RC); 2600 RegInfo.addLiveIn(VA.getLocReg(), VReg); 2601 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 2602 2603 if (VA.getLocInfo() == CCValAssign::Indirect) 2604 return Val; 2605 2606 return convertLocVTToValVT(DAG, Val, VA, DL); 2607 } 2608 2609 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 2610 const CCValAssign &VA, const SDLoc &DL) { 2611 EVT LocVT = VA.getLocVT(); 2612 2613 switch (VA.getLocInfo()) { 2614 default: 2615 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2616 case CCValAssign::Full: 2617 break; 2618 case CCValAssign::BCvt: 2619 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 2620 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 2621 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 2622 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 2623 else 2624 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 2625 break; 2626 } 2627 return Val; 2628 } 2629 2630 // The caller is responsible for loading the full value if the argument is 2631 // passed with CCValAssign::Indirect. 2632 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 2633 const CCValAssign &VA, const SDLoc &DL) { 2634 MachineFunction &MF = DAG.getMachineFunction(); 2635 MachineFrameInfo &MFI = MF.getFrameInfo(); 2636 EVT LocVT = VA.getLocVT(); 2637 EVT ValVT = VA.getValVT(); 2638 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 2639 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 2640 VA.getLocMemOffset(), /*Immutable=*/true); 2641 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2642 SDValue Val; 2643 2644 ISD::LoadExtType ExtType; 2645 switch (VA.getLocInfo()) { 2646 default: 2647 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2648 case CCValAssign::Full: 2649 case CCValAssign::Indirect: 2650 case CCValAssign::BCvt: 2651 ExtType = ISD::NON_EXTLOAD; 2652 break; 2653 } 2654 Val = DAG.getExtLoad( 2655 ExtType, DL, LocVT, Chain, FIN, 2656 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 2657 return Val; 2658 } 2659 2660 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 2661 const CCValAssign &VA, const SDLoc &DL) { 2662 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 2663 "Unexpected VA"); 2664 MachineFunction &MF = DAG.getMachineFunction(); 2665 MachineFrameInfo &MFI = MF.getFrameInfo(); 2666 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2667 2668 if (VA.isMemLoc()) { 2669 // f64 is passed on the stack. 2670 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 2671 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 2672 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 2673 MachinePointerInfo::getFixedStack(MF, FI)); 2674 } 2675 2676 assert(VA.isRegLoc() && "Expected register VA assignment"); 2677 2678 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 2679 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 2680 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 2681 SDValue Hi; 2682 if (VA.getLocReg() == RISCV::X17) { 2683 // Second half of f64 is passed on the stack. 2684 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 2685 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 2686 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 2687 MachinePointerInfo::getFixedStack(MF, FI)); 2688 } else { 2689 // Second half of f64 is passed in another GPR. 2690 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 2691 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 2692 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 2693 } 2694 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 2695 } 2696 2697 // FastCC has less than 1% performance improvement for some particular 2698 // benchmark. But theoretically, it may has benenfit for some cases. 2699 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 2700 CCValAssign::LocInfo LocInfo, 2701 ISD::ArgFlagsTy ArgFlags, CCState &State) { 2702 2703 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 2704 // X5 and X6 might be used for save-restore libcall. 2705 static const MCPhysReg GPRList[] = { 2706 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 2707 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 2708 RISCV::X29, RISCV::X30, RISCV::X31}; 2709 if (unsigned Reg = State.AllocateReg(GPRList)) { 2710 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2711 return false; 2712 } 2713 } 2714 2715 if (LocVT == MVT::f16) { 2716 static const MCPhysReg FPR16List[] = { 2717 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 2718 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 2719 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 2720 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 2721 if (unsigned Reg = State.AllocateReg(FPR16List)) { 2722 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2723 return false; 2724 } 2725 } 2726 2727 if (LocVT == MVT::f32) { 2728 static const MCPhysReg FPR32List[] = { 2729 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 2730 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 2731 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 2732 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 2733 if (unsigned Reg = State.AllocateReg(FPR32List)) { 2734 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2735 return false; 2736 } 2737 } 2738 2739 if (LocVT == MVT::f64) { 2740 static const MCPhysReg FPR64List[] = { 2741 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 2742 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 2743 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 2744 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 2745 if (unsigned Reg = State.AllocateReg(FPR64List)) { 2746 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2747 return false; 2748 } 2749 } 2750 2751 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 2752 unsigned Offset4 = State.AllocateStack(4, Align(4)); 2753 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 2754 return false; 2755 } 2756 2757 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 2758 unsigned Offset5 = State.AllocateStack(8, Align(8)); 2759 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 2760 return false; 2761 } 2762 2763 return true; // CC didn't match. 2764 } 2765 2766 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 2767 CCValAssign::LocInfo LocInfo, 2768 ISD::ArgFlagsTy ArgFlags, CCState &State) { 2769 2770 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 2771 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 2772 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 2773 static const MCPhysReg GPRList[] = { 2774 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 2775 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 2776 if (unsigned Reg = State.AllocateReg(GPRList)) { 2777 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2778 return false; 2779 } 2780 } 2781 2782 if (LocVT == MVT::f32) { 2783 // Pass in STG registers: F1, ..., F6 2784 // fs0 ... fs5 2785 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 2786 RISCV::F18_F, RISCV::F19_F, 2787 RISCV::F20_F, RISCV::F21_F}; 2788 if (unsigned Reg = State.AllocateReg(FPR32List)) { 2789 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2790 return false; 2791 } 2792 } 2793 2794 if (LocVT == MVT::f64) { 2795 // Pass in STG registers: D1, ..., D6 2796 // fs6 ... fs11 2797 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 2798 RISCV::F24_D, RISCV::F25_D, 2799 RISCV::F26_D, RISCV::F27_D}; 2800 if (unsigned Reg = State.AllocateReg(FPR64List)) { 2801 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2802 return false; 2803 } 2804 } 2805 2806 report_fatal_error("No registers left in GHC calling convention"); 2807 return true; 2808 } 2809 2810 // Transform physical registers into virtual registers. 2811 SDValue RISCVTargetLowering::LowerFormalArguments( 2812 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 2813 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 2814 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2815 2816 MachineFunction &MF = DAG.getMachineFunction(); 2817 2818 switch (CallConv) { 2819 default: 2820 report_fatal_error("Unsupported calling convention"); 2821 case CallingConv::C: 2822 case CallingConv::Fast: 2823 break; 2824 case CallingConv::GHC: 2825 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 2826 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 2827 report_fatal_error( 2828 "GHC calling convention requires the F and D instruction set extensions"); 2829 } 2830 2831 const Function &Func = MF.getFunction(); 2832 if (Func.hasFnAttribute("interrupt")) { 2833 if (!Func.arg_empty()) 2834 report_fatal_error( 2835 "Functions with the interrupt attribute cannot have arguments!"); 2836 2837 StringRef Kind = 2838 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 2839 2840 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 2841 report_fatal_error( 2842 "Function interrupt attribute argument not supported!"); 2843 } 2844 2845 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2846 MVT XLenVT = Subtarget.getXLenVT(); 2847 unsigned XLenInBytes = Subtarget.getXLen() / 8; 2848 // Used with vargs to acumulate store chains. 2849 std::vector<SDValue> OutChains; 2850 2851 // Assign locations to all of the incoming arguments. 2852 SmallVector<CCValAssign, 16> ArgLocs; 2853 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 2854 2855 if (CallConv == CallingConv::Fast) 2856 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 2857 else if (CallConv == CallingConv::GHC) 2858 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 2859 else 2860 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 2861 2862 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2863 CCValAssign &VA = ArgLocs[i]; 2864 SDValue ArgValue; 2865 // Passing f64 on RV32D with a soft float ABI must be handled as a special 2866 // case. 2867 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 2868 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 2869 else if (VA.isRegLoc()) 2870 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 2871 else 2872 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 2873 2874 if (VA.getLocInfo() == CCValAssign::Indirect) { 2875 // If the original argument was split and passed by reference (e.g. i128 2876 // on RV32), we need to load all parts of it here (using the same 2877 // address). 2878 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 2879 MachinePointerInfo())); 2880 unsigned ArgIndex = Ins[i].OrigArgIndex; 2881 assert(Ins[i].PartOffset == 0); 2882 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 2883 CCValAssign &PartVA = ArgLocs[i + 1]; 2884 unsigned PartOffset = Ins[i + 1].PartOffset; 2885 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 2886 DAG.getIntPtrConstant(PartOffset, DL)); 2887 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 2888 MachinePointerInfo())); 2889 ++i; 2890 } 2891 continue; 2892 } 2893 InVals.push_back(ArgValue); 2894 } 2895 2896 if (IsVarArg) { 2897 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 2898 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 2899 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 2900 MachineFrameInfo &MFI = MF.getFrameInfo(); 2901 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2902 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 2903 2904 // Offset of the first variable argument from stack pointer, and size of 2905 // the vararg save area. For now, the varargs save area is either zero or 2906 // large enough to hold a0-a7. 2907 int VaArgOffset, VarArgsSaveSize; 2908 2909 // If all registers are allocated, then all varargs must be passed on the 2910 // stack and we don't need to save any argregs. 2911 if (ArgRegs.size() == Idx) { 2912 VaArgOffset = CCInfo.getNextStackOffset(); 2913 VarArgsSaveSize = 0; 2914 } else { 2915 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 2916 VaArgOffset = -VarArgsSaveSize; 2917 } 2918 2919 // Record the frame index of the first variable argument 2920 // which is a value necessary to VASTART. 2921 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 2922 RVFI->setVarArgsFrameIndex(FI); 2923 2924 // If saving an odd number of registers then create an extra stack slot to 2925 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 2926 // offsets to even-numbered registered remain 2*XLEN-aligned. 2927 if (Idx % 2) { 2928 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 2929 VarArgsSaveSize += XLenInBytes; 2930 } 2931 2932 // Copy the integer registers that may have been used for passing varargs 2933 // to the vararg save area. 2934 for (unsigned I = Idx; I < ArgRegs.size(); 2935 ++I, VaArgOffset += XLenInBytes) { 2936 const Register Reg = RegInfo.createVirtualRegister(RC); 2937 RegInfo.addLiveIn(ArgRegs[I], Reg); 2938 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 2939 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 2940 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2941 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 2942 MachinePointerInfo::getFixedStack(MF, FI)); 2943 cast<StoreSDNode>(Store.getNode()) 2944 ->getMemOperand() 2945 ->setValue((Value *)nullptr); 2946 OutChains.push_back(Store); 2947 } 2948 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 2949 } 2950 2951 // All stores are grouped in one node to allow the matching between 2952 // the size of Ins and InVals. This only happens for vararg functions. 2953 if (!OutChains.empty()) { 2954 OutChains.push_back(Chain); 2955 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 2956 } 2957 2958 return Chain; 2959 } 2960 2961 /// isEligibleForTailCallOptimization - Check whether the call is eligible 2962 /// for tail call optimization. 2963 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 2964 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 2965 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 2966 const SmallVector<CCValAssign, 16> &ArgLocs) const { 2967 2968 auto &Callee = CLI.Callee; 2969 auto CalleeCC = CLI.CallConv; 2970 auto &Outs = CLI.Outs; 2971 auto &Caller = MF.getFunction(); 2972 auto CallerCC = Caller.getCallingConv(); 2973 2974 // Exception-handling functions need a special set of instructions to 2975 // indicate a return to the hardware. Tail-calling another function would 2976 // probably break this. 2977 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 2978 // should be expanded as new function attributes are introduced. 2979 if (Caller.hasFnAttribute("interrupt")) 2980 return false; 2981 2982 // Do not tail call opt if the stack is used to pass parameters. 2983 if (CCInfo.getNextStackOffset() != 0) 2984 return false; 2985 2986 // Do not tail call opt if any parameters need to be passed indirectly. 2987 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 2988 // passed indirectly. So the address of the value will be passed in a 2989 // register, or if not available, then the address is put on the stack. In 2990 // order to pass indirectly, space on the stack often needs to be allocated 2991 // in order to store the value. In this case the CCInfo.getNextStackOffset() 2992 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 2993 // are passed CCValAssign::Indirect. 2994 for (auto &VA : ArgLocs) 2995 if (VA.getLocInfo() == CCValAssign::Indirect) 2996 return false; 2997 2998 // Do not tail call opt if either caller or callee uses struct return 2999 // semantics. 3000 auto IsCallerStructRet = Caller.hasStructRetAttr(); 3001 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 3002 if (IsCallerStructRet || IsCalleeStructRet) 3003 return false; 3004 3005 // Externally-defined functions with weak linkage should not be 3006 // tail-called. The behaviour of branch instructions in this situation (as 3007 // used for tail calls) is implementation-defined, so we cannot rely on the 3008 // linker replacing the tail call with a return. 3009 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3010 const GlobalValue *GV = G->getGlobal(); 3011 if (GV->hasExternalWeakLinkage()) 3012 return false; 3013 } 3014 3015 // The callee has to preserve all registers the caller needs to preserve. 3016 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3017 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 3018 if (CalleeCC != CallerCC) { 3019 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 3020 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 3021 return false; 3022 } 3023 3024 // Byval parameters hand the function a pointer directly into the stack area 3025 // we want to reuse during a tail call. Working around this *is* possible 3026 // but less efficient and uglier in LowerCall. 3027 for (auto &Arg : Outs) 3028 if (Arg.Flags.isByVal()) 3029 return false; 3030 3031 return true; 3032 } 3033 3034 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 3035 // and output parameter nodes. 3036 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 3037 SmallVectorImpl<SDValue> &InVals) const { 3038 SelectionDAG &DAG = CLI.DAG; 3039 SDLoc &DL = CLI.DL; 3040 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 3041 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 3042 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 3043 SDValue Chain = CLI.Chain; 3044 SDValue Callee = CLI.Callee; 3045 bool &IsTailCall = CLI.IsTailCall; 3046 CallingConv::ID CallConv = CLI.CallConv; 3047 bool IsVarArg = CLI.IsVarArg; 3048 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3049 MVT XLenVT = Subtarget.getXLenVT(); 3050 3051 MachineFunction &MF = DAG.getMachineFunction(); 3052 3053 // Analyze the operands of the call, assigning locations to each operand. 3054 SmallVector<CCValAssign, 16> ArgLocs; 3055 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 3056 3057 if (CallConv == CallingConv::Fast) 3058 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 3059 else if (CallConv == CallingConv::GHC) 3060 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 3061 else 3062 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 3063 3064 // Check if it's really possible to do a tail call. 3065 if (IsTailCall) 3066 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 3067 3068 if (IsTailCall) 3069 ++NumTailCalls; 3070 else if (CLI.CB && CLI.CB->isMustTailCall()) 3071 report_fatal_error("failed to perform tail call elimination on a call " 3072 "site marked musttail"); 3073 3074 // Get a count of how many bytes are to be pushed on the stack. 3075 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 3076 3077 // Create local copies for byval args 3078 SmallVector<SDValue, 8> ByValArgs; 3079 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 3080 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3081 if (!Flags.isByVal()) 3082 continue; 3083 3084 SDValue Arg = OutVals[i]; 3085 unsigned Size = Flags.getByValSize(); 3086 Align Alignment = Flags.getNonZeroByValAlign(); 3087 3088 int FI = 3089 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 3090 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 3091 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 3092 3093 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 3094 /*IsVolatile=*/false, 3095 /*AlwaysInline=*/false, IsTailCall, 3096 MachinePointerInfo(), MachinePointerInfo()); 3097 ByValArgs.push_back(FIPtr); 3098 } 3099 3100 if (!IsTailCall) 3101 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 3102 3103 // Copy argument values to their designated locations. 3104 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 3105 SmallVector<SDValue, 8> MemOpChains; 3106 SDValue StackPtr; 3107 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 3108 CCValAssign &VA = ArgLocs[i]; 3109 SDValue ArgValue = OutVals[i]; 3110 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3111 3112 // Handle passing f64 on RV32D with a soft float ABI as a special case. 3113 bool IsF64OnRV32DSoftABI = 3114 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 3115 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 3116 SDValue SplitF64 = DAG.getNode( 3117 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 3118 SDValue Lo = SplitF64.getValue(0); 3119 SDValue Hi = SplitF64.getValue(1); 3120 3121 Register RegLo = VA.getLocReg(); 3122 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 3123 3124 if (RegLo == RISCV::X17) { 3125 // Second half of f64 is passed on the stack. 3126 // Work out the address of the stack slot. 3127 if (!StackPtr.getNode()) 3128 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 3129 // Emit the store. 3130 MemOpChains.push_back( 3131 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 3132 } else { 3133 // Second half of f64 is passed in another GPR. 3134 assert(RegLo < RISCV::X31 && "Invalid register pair"); 3135 Register RegHigh = RegLo + 1; 3136 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 3137 } 3138 continue; 3139 } 3140 3141 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 3142 // as any other MemLoc. 3143 3144 // Promote the value if needed. 3145 // For now, only handle fully promoted and indirect arguments. 3146 if (VA.getLocInfo() == CCValAssign::Indirect) { 3147 // Store the argument in a stack slot and pass its address. 3148 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 3149 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 3150 MemOpChains.push_back( 3151 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 3152 MachinePointerInfo::getFixedStack(MF, FI))); 3153 // If the original argument was split (e.g. i128), we need 3154 // to store all parts of it here (and pass just one address). 3155 unsigned ArgIndex = Outs[i].OrigArgIndex; 3156 assert(Outs[i].PartOffset == 0); 3157 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 3158 SDValue PartValue = OutVals[i + 1]; 3159 unsigned PartOffset = Outs[i + 1].PartOffset; 3160 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 3161 DAG.getIntPtrConstant(PartOffset, DL)); 3162 MemOpChains.push_back( 3163 DAG.getStore(Chain, DL, PartValue, Address, 3164 MachinePointerInfo::getFixedStack(MF, FI))); 3165 ++i; 3166 } 3167 ArgValue = SpillSlot; 3168 } else { 3169 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 3170 } 3171 3172 // Use local copy if it is a byval arg. 3173 if (Flags.isByVal()) 3174 ArgValue = ByValArgs[j++]; 3175 3176 if (VA.isRegLoc()) { 3177 // Queue up the argument copies and emit them at the end. 3178 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 3179 } else { 3180 assert(VA.isMemLoc() && "Argument not register or memory"); 3181 assert(!IsTailCall && "Tail call not allowed if stack is used " 3182 "for passing parameters"); 3183 3184 // Work out the address of the stack slot. 3185 if (!StackPtr.getNode()) 3186 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 3187 SDValue Address = 3188 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 3189 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 3190 3191 // Emit the store. 3192 MemOpChains.push_back( 3193 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 3194 } 3195 } 3196 3197 // Join the stores, which are independent of one another. 3198 if (!MemOpChains.empty()) 3199 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 3200 3201 SDValue Glue; 3202 3203 // Build a sequence of copy-to-reg nodes, chained and glued together. 3204 for (auto &Reg : RegsToPass) { 3205 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 3206 Glue = Chain.getValue(1); 3207 } 3208 3209 // Validate that none of the argument registers have been marked as 3210 // reserved, if so report an error. Do the same for the return address if this 3211 // is not a tailcall. 3212 validateCCReservedRegs(RegsToPass, MF); 3213 if (!IsTailCall && 3214 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 3215 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3216 MF.getFunction(), 3217 "Return address register required, but has been reserved."}); 3218 3219 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 3220 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 3221 // split it and then direct call can be matched by PseudoCALL. 3222 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 3223 const GlobalValue *GV = S->getGlobal(); 3224 3225 unsigned OpFlags = RISCVII::MO_CALL; 3226 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 3227 OpFlags = RISCVII::MO_PLT; 3228 3229 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 3230 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3231 unsigned OpFlags = RISCVII::MO_CALL; 3232 3233 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 3234 nullptr)) 3235 OpFlags = RISCVII::MO_PLT; 3236 3237 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 3238 } 3239 3240 // The first call operand is the chain and the second is the target address. 3241 SmallVector<SDValue, 8> Ops; 3242 Ops.push_back(Chain); 3243 Ops.push_back(Callee); 3244 3245 // Add argument registers to the end of the list so that they are 3246 // known live into the call. 3247 for (auto &Reg : RegsToPass) 3248 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 3249 3250 if (!IsTailCall) { 3251 // Add a register mask operand representing the call-preserved registers. 3252 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3253 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 3254 assert(Mask && "Missing call preserved mask for calling convention"); 3255 Ops.push_back(DAG.getRegisterMask(Mask)); 3256 } 3257 3258 // Glue the call to the argument copies, if any. 3259 if (Glue.getNode()) 3260 Ops.push_back(Glue); 3261 3262 // Emit the call. 3263 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 3264 3265 if (IsTailCall) { 3266 MF.getFrameInfo().setHasTailCall(); 3267 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 3268 } 3269 3270 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 3271 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 3272 Glue = Chain.getValue(1); 3273 3274 // Mark the end of the call, which is glued to the call itself. 3275 Chain = DAG.getCALLSEQ_END(Chain, 3276 DAG.getConstant(NumBytes, DL, PtrVT, true), 3277 DAG.getConstant(0, DL, PtrVT, true), 3278 Glue, DL); 3279 Glue = Chain.getValue(1); 3280 3281 // Assign locations to each value returned by this call. 3282 SmallVector<CCValAssign, 16> RVLocs; 3283 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 3284 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 3285 3286 // Copy all of the result registers out of their specified physreg. 3287 for (auto &VA : RVLocs) { 3288 // Copy the value out 3289 SDValue RetValue = 3290 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 3291 // Glue the RetValue to the end of the call sequence 3292 Chain = RetValue.getValue(1); 3293 Glue = RetValue.getValue(2); 3294 3295 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 3296 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 3297 SDValue RetValue2 = 3298 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 3299 Chain = RetValue2.getValue(1); 3300 Glue = RetValue2.getValue(2); 3301 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 3302 RetValue2); 3303 } 3304 3305 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 3306 3307 InVals.push_back(RetValue); 3308 } 3309 3310 return Chain; 3311 } 3312 3313 bool RISCVTargetLowering::CanLowerReturn( 3314 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 3315 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 3316 SmallVector<CCValAssign, 16> RVLocs; 3317 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 3318 3319 Optional<unsigned> FirstMaskArgument; 3320 if (Subtarget.hasStdExtV()) 3321 FirstMaskArgument = preAssignMask(Outs); 3322 3323 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 3324 MVT VT = Outs[i].VT; 3325 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3326 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 3327 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 3328 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 3329 *this, FirstMaskArgument)) 3330 return false; 3331 } 3332 return true; 3333 } 3334 3335 SDValue 3336 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 3337 bool IsVarArg, 3338 const SmallVectorImpl<ISD::OutputArg> &Outs, 3339 const SmallVectorImpl<SDValue> &OutVals, 3340 const SDLoc &DL, SelectionDAG &DAG) const { 3341 const MachineFunction &MF = DAG.getMachineFunction(); 3342 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 3343 3344 // Stores the assignment of the return value to a location. 3345 SmallVector<CCValAssign, 16> RVLocs; 3346 3347 // Info about the registers and stack slot. 3348 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 3349 *DAG.getContext()); 3350 3351 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 3352 nullptr); 3353 3354 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 3355 report_fatal_error("GHC functions return void only"); 3356 3357 SDValue Glue; 3358 SmallVector<SDValue, 4> RetOps(1, Chain); 3359 3360 // Copy the result values into the output registers. 3361 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 3362 SDValue Val = OutVals[i]; 3363 CCValAssign &VA = RVLocs[i]; 3364 assert(VA.isRegLoc() && "Can only return in registers!"); 3365 3366 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 3367 // Handle returning f64 on RV32D with a soft float ABI. 3368 assert(VA.isRegLoc() && "Expected return via registers"); 3369 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 3370 DAG.getVTList(MVT::i32, MVT::i32), Val); 3371 SDValue Lo = SplitF64.getValue(0); 3372 SDValue Hi = SplitF64.getValue(1); 3373 Register RegLo = VA.getLocReg(); 3374 assert(RegLo < RISCV::X31 && "Invalid register pair"); 3375 Register RegHi = RegLo + 1; 3376 3377 if (STI.isRegisterReservedByUser(RegLo) || 3378 STI.isRegisterReservedByUser(RegHi)) 3379 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3380 MF.getFunction(), 3381 "Return value register required, but has been reserved."}); 3382 3383 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 3384 Glue = Chain.getValue(1); 3385 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 3386 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 3387 Glue = Chain.getValue(1); 3388 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 3389 } else { 3390 // Handle a 'normal' return. 3391 Val = convertValVTToLocVT(DAG, Val, VA, DL); 3392 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 3393 3394 if (STI.isRegisterReservedByUser(VA.getLocReg())) 3395 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3396 MF.getFunction(), 3397 "Return value register required, but has been reserved."}); 3398 3399 // Guarantee that all emitted copies are stuck together. 3400 Glue = Chain.getValue(1); 3401 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 3402 } 3403 } 3404 3405 RetOps[0] = Chain; // Update chain. 3406 3407 // Add the glue node if we have it. 3408 if (Glue.getNode()) { 3409 RetOps.push_back(Glue); 3410 } 3411 3412 // Interrupt service routines use different return instructions. 3413 const Function &Func = DAG.getMachineFunction().getFunction(); 3414 if (Func.hasFnAttribute("interrupt")) { 3415 if (!Func.getReturnType()->isVoidTy()) 3416 report_fatal_error( 3417 "Functions with the interrupt attribute must have void return type!"); 3418 3419 MachineFunction &MF = DAG.getMachineFunction(); 3420 StringRef Kind = 3421 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 3422 3423 unsigned RetOpc; 3424 if (Kind == "user") 3425 RetOpc = RISCVISD::URET_FLAG; 3426 else if (Kind == "supervisor") 3427 RetOpc = RISCVISD::SRET_FLAG; 3428 else 3429 RetOpc = RISCVISD::MRET_FLAG; 3430 3431 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 3432 } 3433 3434 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 3435 } 3436 3437 void RISCVTargetLowering::validateCCReservedRegs( 3438 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 3439 MachineFunction &MF) const { 3440 const Function &F = MF.getFunction(); 3441 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 3442 3443 if (llvm::any_of(Regs, [&STI](auto Reg) { 3444 return STI.isRegisterReservedByUser(Reg.first); 3445 })) 3446 F.getContext().diagnose(DiagnosticInfoUnsupported{ 3447 F, "Argument register required, but has been reserved."}); 3448 } 3449 3450 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 3451 return CI->isTailCall(); 3452 } 3453 3454 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 3455 #define NODE_NAME_CASE(NODE) \ 3456 case RISCVISD::NODE: \ 3457 return "RISCVISD::" #NODE; 3458 // clang-format off 3459 switch ((RISCVISD::NodeType)Opcode) { 3460 case RISCVISD::FIRST_NUMBER: 3461 break; 3462 NODE_NAME_CASE(RET_FLAG) 3463 NODE_NAME_CASE(URET_FLAG) 3464 NODE_NAME_CASE(SRET_FLAG) 3465 NODE_NAME_CASE(MRET_FLAG) 3466 NODE_NAME_CASE(CALL) 3467 NODE_NAME_CASE(SELECT_CC) 3468 NODE_NAME_CASE(BuildPairF64) 3469 NODE_NAME_CASE(SplitF64) 3470 NODE_NAME_CASE(TAIL) 3471 NODE_NAME_CASE(SLLW) 3472 NODE_NAME_CASE(SRAW) 3473 NODE_NAME_CASE(SRLW) 3474 NODE_NAME_CASE(DIVW) 3475 NODE_NAME_CASE(DIVUW) 3476 NODE_NAME_CASE(REMUW) 3477 NODE_NAME_CASE(ROLW) 3478 NODE_NAME_CASE(RORW) 3479 NODE_NAME_CASE(FSLW) 3480 NODE_NAME_CASE(FSRW) 3481 NODE_NAME_CASE(FMV_H_X) 3482 NODE_NAME_CASE(FMV_X_ANYEXTH) 3483 NODE_NAME_CASE(FMV_W_X_RV64) 3484 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 3485 NODE_NAME_CASE(READ_CYCLE_WIDE) 3486 NODE_NAME_CASE(GREVI) 3487 NODE_NAME_CASE(GREVIW) 3488 NODE_NAME_CASE(GORCI) 3489 NODE_NAME_CASE(GORCIW) 3490 NODE_NAME_CASE(VMV_X_S) 3491 NODE_NAME_CASE(SPLAT_VECTOR_I64) 3492 } 3493 // clang-format on 3494 return nullptr; 3495 #undef NODE_NAME_CASE 3496 } 3497 3498 /// getConstraintType - Given a constraint letter, return the type of 3499 /// constraint it is for this target. 3500 RISCVTargetLowering::ConstraintType 3501 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 3502 if (Constraint.size() == 1) { 3503 switch (Constraint[0]) { 3504 default: 3505 break; 3506 case 'f': 3507 return C_RegisterClass; 3508 case 'I': 3509 case 'J': 3510 case 'K': 3511 return C_Immediate; 3512 case 'A': 3513 return C_Memory; 3514 } 3515 } 3516 return TargetLowering::getConstraintType(Constraint); 3517 } 3518 3519 std::pair<unsigned, const TargetRegisterClass *> 3520 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 3521 StringRef Constraint, 3522 MVT VT) const { 3523 // First, see if this is a constraint that directly corresponds to a 3524 // RISCV register class. 3525 if (Constraint.size() == 1) { 3526 switch (Constraint[0]) { 3527 case 'r': 3528 return std::make_pair(0U, &RISCV::GPRRegClass); 3529 case 'f': 3530 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 3531 return std::make_pair(0U, &RISCV::FPR16RegClass); 3532 if (Subtarget.hasStdExtF() && VT == MVT::f32) 3533 return std::make_pair(0U, &RISCV::FPR32RegClass); 3534 if (Subtarget.hasStdExtD() && VT == MVT::f64) 3535 return std::make_pair(0U, &RISCV::FPR64RegClass); 3536 break; 3537 default: 3538 break; 3539 } 3540 } 3541 3542 // Clang will correctly decode the usage of register name aliases into their 3543 // official names. However, other frontends like `rustc` do not. This allows 3544 // users of these frontends to use the ABI names for registers in LLVM-style 3545 // register constraints. 3546 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 3547 .Case("{zero}", RISCV::X0) 3548 .Case("{ra}", RISCV::X1) 3549 .Case("{sp}", RISCV::X2) 3550 .Case("{gp}", RISCV::X3) 3551 .Case("{tp}", RISCV::X4) 3552 .Case("{t0}", RISCV::X5) 3553 .Case("{t1}", RISCV::X6) 3554 .Case("{t2}", RISCV::X7) 3555 .Cases("{s0}", "{fp}", RISCV::X8) 3556 .Case("{s1}", RISCV::X9) 3557 .Case("{a0}", RISCV::X10) 3558 .Case("{a1}", RISCV::X11) 3559 .Case("{a2}", RISCV::X12) 3560 .Case("{a3}", RISCV::X13) 3561 .Case("{a4}", RISCV::X14) 3562 .Case("{a5}", RISCV::X15) 3563 .Case("{a6}", RISCV::X16) 3564 .Case("{a7}", RISCV::X17) 3565 .Case("{s2}", RISCV::X18) 3566 .Case("{s3}", RISCV::X19) 3567 .Case("{s4}", RISCV::X20) 3568 .Case("{s5}", RISCV::X21) 3569 .Case("{s6}", RISCV::X22) 3570 .Case("{s7}", RISCV::X23) 3571 .Case("{s8}", RISCV::X24) 3572 .Case("{s9}", RISCV::X25) 3573 .Case("{s10}", RISCV::X26) 3574 .Case("{s11}", RISCV::X27) 3575 .Case("{t3}", RISCV::X28) 3576 .Case("{t4}", RISCV::X29) 3577 .Case("{t5}", RISCV::X30) 3578 .Case("{t6}", RISCV::X31) 3579 .Default(RISCV::NoRegister); 3580 if (XRegFromAlias != RISCV::NoRegister) 3581 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 3582 3583 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 3584 // TableGen record rather than the AsmName to choose registers for InlineAsm 3585 // constraints, plus we want to match those names to the widest floating point 3586 // register type available, manually select floating point registers here. 3587 // 3588 // The second case is the ABI name of the register, so that frontends can also 3589 // use the ABI names in register constraint lists. 3590 if (Subtarget.hasStdExtF()) { 3591 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 3592 .Cases("{f0}", "{ft0}", RISCV::F0_F) 3593 .Cases("{f1}", "{ft1}", RISCV::F1_F) 3594 .Cases("{f2}", "{ft2}", RISCV::F2_F) 3595 .Cases("{f3}", "{ft3}", RISCV::F3_F) 3596 .Cases("{f4}", "{ft4}", RISCV::F4_F) 3597 .Cases("{f5}", "{ft5}", RISCV::F5_F) 3598 .Cases("{f6}", "{ft6}", RISCV::F6_F) 3599 .Cases("{f7}", "{ft7}", RISCV::F7_F) 3600 .Cases("{f8}", "{fs0}", RISCV::F8_F) 3601 .Cases("{f9}", "{fs1}", RISCV::F9_F) 3602 .Cases("{f10}", "{fa0}", RISCV::F10_F) 3603 .Cases("{f11}", "{fa1}", RISCV::F11_F) 3604 .Cases("{f12}", "{fa2}", RISCV::F12_F) 3605 .Cases("{f13}", "{fa3}", RISCV::F13_F) 3606 .Cases("{f14}", "{fa4}", RISCV::F14_F) 3607 .Cases("{f15}", "{fa5}", RISCV::F15_F) 3608 .Cases("{f16}", "{fa6}", RISCV::F16_F) 3609 .Cases("{f17}", "{fa7}", RISCV::F17_F) 3610 .Cases("{f18}", "{fs2}", RISCV::F18_F) 3611 .Cases("{f19}", "{fs3}", RISCV::F19_F) 3612 .Cases("{f20}", "{fs4}", RISCV::F20_F) 3613 .Cases("{f21}", "{fs5}", RISCV::F21_F) 3614 .Cases("{f22}", "{fs6}", RISCV::F22_F) 3615 .Cases("{f23}", "{fs7}", RISCV::F23_F) 3616 .Cases("{f24}", "{fs8}", RISCV::F24_F) 3617 .Cases("{f25}", "{fs9}", RISCV::F25_F) 3618 .Cases("{f26}", "{fs10}", RISCV::F26_F) 3619 .Cases("{f27}", "{fs11}", RISCV::F27_F) 3620 .Cases("{f28}", "{ft8}", RISCV::F28_F) 3621 .Cases("{f29}", "{ft9}", RISCV::F29_F) 3622 .Cases("{f30}", "{ft10}", RISCV::F30_F) 3623 .Cases("{f31}", "{ft11}", RISCV::F31_F) 3624 .Default(RISCV::NoRegister); 3625 if (FReg != RISCV::NoRegister) { 3626 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 3627 if (Subtarget.hasStdExtD()) { 3628 unsigned RegNo = FReg - RISCV::F0_F; 3629 unsigned DReg = RISCV::F0_D + RegNo; 3630 return std::make_pair(DReg, &RISCV::FPR64RegClass); 3631 } 3632 return std::make_pair(FReg, &RISCV::FPR32RegClass); 3633 } 3634 } 3635 3636 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 3637 } 3638 3639 unsigned 3640 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 3641 // Currently only support length 1 constraints. 3642 if (ConstraintCode.size() == 1) { 3643 switch (ConstraintCode[0]) { 3644 case 'A': 3645 return InlineAsm::Constraint_A; 3646 default: 3647 break; 3648 } 3649 } 3650 3651 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 3652 } 3653 3654 void RISCVTargetLowering::LowerAsmOperandForConstraint( 3655 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 3656 SelectionDAG &DAG) const { 3657 // Currently only support length 1 constraints. 3658 if (Constraint.length() == 1) { 3659 switch (Constraint[0]) { 3660 case 'I': 3661 // Validate & create a 12-bit signed immediate operand. 3662 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3663 uint64_t CVal = C->getSExtValue(); 3664 if (isInt<12>(CVal)) 3665 Ops.push_back( 3666 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 3667 } 3668 return; 3669 case 'J': 3670 // Validate & create an integer zero operand. 3671 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 3672 if (C->getZExtValue() == 0) 3673 Ops.push_back( 3674 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 3675 return; 3676 case 'K': 3677 // Validate & create a 5-bit unsigned immediate operand. 3678 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3679 uint64_t CVal = C->getZExtValue(); 3680 if (isUInt<5>(CVal)) 3681 Ops.push_back( 3682 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 3683 } 3684 return; 3685 default: 3686 break; 3687 } 3688 } 3689 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 3690 } 3691 3692 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 3693 Instruction *Inst, 3694 AtomicOrdering Ord) const { 3695 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 3696 return Builder.CreateFence(Ord); 3697 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 3698 return Builder.CreateFence(AtomicOrdering::Release); 3699 return nullptr; 3700 } 3701 3702 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 3703 Instruction *Inst, 3704 AtomicOrdering Ord) const { 3705 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 3706 return Builder.CreateFence(AtomicOrdering::Acquire); 3707 return nullptr; 3708 } 3709 3710 TargetLowering::AtomicExpansionKind 3711 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 3712 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 3713 // point operations can't be used in an lr/sc sequence without breaking the 3714 // forward-progress guarantee. 3715 if (AI->isFloatingPointOperation()) 3716 return AtomicExpansionKind::CmpXChg; 3717 3718 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 3719 if (Size == 8 || Size == 16) 3720 return AtomicExpansionKind::MaskedIntrinsic; 3721 return AtomicExpansionKind::None; 3722 } 3723 3724 static Intrinsic::ID 3725 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 3726 if (XLen == 32) { 3727 switch (BinOp) { 3728 default: 3729 llvm_unreachable("Unexpected AtomicRMW BinOp"); 3730 case AtomicRMWInst::Xchg: 3731 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 3732 case AtomicRMWInst::Add: 3733 return Intrinsic::riscv_masked_atomicrmw_add_i32; 3734 case AtomicRMWInst::Sub: 3735 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 3736 case AtomicRMWInst::Nand: 3737 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 3738 case AtomicRMWInst::Max: 3739 return Intrinsic::riscv_masked_atomicrmw_max_i32; 3740 case AtomicRMWInst::Min: 3741 return Intrinsic::riscv_masked_atomicrmw_min_i32; 3742 case AtomicRMWInst::UMax: 3743 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 3744 case AtomicRMWInst::UMin: 3745 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 3746 } 3747 } 3748 3749 if (XLen == 64) { 3750 switch (BinOp) { 3751 default: 3752 llvm_unreachable("Unexpected AtomicRMW BinOp"); 3753 case AtomicRMWInst::Xchg: 3754 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 3755 case AtomicRMWInst::Add: 3756 return Intrinsic::riscv_masked_atomicrmw_add_i64; 3757 case AtomicRMWInst::Sub: 3758 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 3759 case AtomicRMWInst::Nand: 3760 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 3761 case AtomicRMWInst::Max: 3762 return Intrinsic::riscv_masked_atomicrmw_max_i64; 3763 case AtomicRMWInst::Min: 3764 return Intrinsic::riscv_masked_atomicrmw_min_i64; 3765 case AtomicRMWInst::UMax: 3766 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 3767 case AtomicRMWInst::UMin: 3768 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 3769 } 3770 } 3771 3772 llvm_unreachable("Unexpected XLen\n"); 3773 } 3774 3775 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 3776 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 3777 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 3778 unsigned XLen = Subtarget.getXLen(); 3779 Value *Ordering = 3780 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 3781 Type *Tys[] = {AlignedAddr->getType()}; 3782 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 3783 AI->getModule(), 3784 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 3785 3786 if (XLen == 64) { 3787 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 3788 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 3789 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 3790 } 3791 3792 Value *Result; 3793 3794 // Must pass the shift amount needed to sign extend the loaded value prior 3795 // to performing a signed comparison for min/max. ShiftAmt is the number of 3796 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 3797 // is the number of bits to left+right shift the value in order to 3798 // sign-extend. 3799 if (AI->getOperation() == AtomicRMWInst::Min || 3800 AI->getOperation() == AtomicRMWInst::Max) { 3801 const DataLayout &DL = AI->getModule()->getDataLayout(); 3802 unsigned ValWidth = 3803 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 3804 Value *SextShamt = 3805 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 3806 Result = Builder.CreateCall(LrwOpScwLoop, 3807 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 3808 } else { 3809 Result = 3810 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 3811 } 3812 3813 if (XLen == 64) 3814 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 3815 return Result; 3816 } 3817 3818 TargetLowering::AtomicExpansionKind 3819 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 3820 AtomicCmpXchgInst *CI) const { 3821 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 3822 if (Size == 8 || Size == 16) 3823 return AtomicExpansionKind::MaskedIntrinsic; 3824 return AtomicExpansionKind::None; 3825 } 3826 3827 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 3828 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 3829 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 3830 unsigned XLen = Subtarget.getXLen(); 3831 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 3832 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 3833 if (XLen == 64) { 3834 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 3835 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 3836 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 3837 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 3838 } 3839 Type *Tys[] = {AlignedAddr->getType()}; 3840 Function *MaskedCmpXchg = 3841 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 3842 Value *Result = Builder.CreateCall( 3843 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 3844 if (XLen == 64) 3845 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 3846 return Result; 3847 } 3848 3849 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 3850 EVT VT) const { 3851 VT = VT.getScalarType(); 3852 3853 if (!VT.isSimple()) 3854 return false; 3855 3856 switch (VT.getSimpleVT().SimpleTy) { 3857 case MVT::f16: 3858 return Subtarget.hasStdExtZfh(); 3859 case MVT::f32: 3860 return Subtarget.hasStdExtF(); 3861 case MVT::f64: 3862 return Subtarget.hasStdExtD(); 3863 default: 3864 break; 3865 } 3866 3867 return false; 3868 } 3869 3870 Register RISCVTargetLowering::getExceptionPointerRegister( 3871 const Constant *PersonalityFn) const { 3872 return RISCV::X10; 3873 } 3874 3875 Register RISCVTargetLowering::getExceptionSelectorRegister( 3876 const Constant *PersonalityFn) const { 3877 return RISCV::X11; 3878 } 3879 3880 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 3881 // Return false to suppress the unnecessary extensions if the LibCall 3882 // arguments or return value is f32 type for LP64 ABI. 3883 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 3884 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 3885 return false; 3886 3887 return true; 3888 } 3889 3890 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 3891 SDValue C) const { 3892 // Check integral scalar types. 3893 if (VT.isScalarInteger()) { 3894 // Do not perform the transformation on riscv32 with the M extension. 3895 if (!Subtarget.is64Bit() && Subtarget.hasStdExtM()) 3896 return false; 3897 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 3898 if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t)) 3899 return false; 3900 int64_t Imm = ConstNode->getSExtValue(); 3901 if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) || 3902 isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm)) 3903 return true; 3904 } 3905 } 3906 3907 return false; 3908 } 3909 3910 #define GET_REGISTER_MATCHER 3911 #include "RISCVGenAsmMatcher.inc" 3912 3913 Register 3914 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 3915 const MachineFunction &MF) const { 3916 Register Reg = MatchRegisterAltName(RegName); 3917 if (Reg == RISCV::NoRegister) 3918 Reg = MatchRegisterName(RegName); 3919 if (Reg == RISCV::NoRegister) 3920 report_fatal_error( 3921 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 3922 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 3923 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 3924 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 3925 StringRef(RegName) + "\".")); 3926 return Reg; 3927 } 3928 3929 namespace llvm { 3930 namespace RISCVVIntrinsicsTable { 3931 3932 #define GET_RISCVVIntrinsicsTable_IMPL 3933 #include "RISCVGenSearchableTables.inc" 3934 3935 } // namespace RISCVVIntrinsicsTable 3936 } // namespace llvm 3937