1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVRegisterInfo.h" 18 #include "RISCVSubtarget.h" 19 #include "RISCVTargetMachine.h" 20 #include "Utils/RISCVMatInt.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IntrinsicsRISCV.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/MathExtras.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "riscv-lower" 41 42 STATISTIC(NumTailCalls, "Number of tail calls"); 43 44 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 45 const RISCVSubtarget &STI) 46 : TargetLowering(TM), Subtarget(STI) { 47 48 if (Subtarget.isRV32E()) 49 report_fatal_error("Codegen not yet implemented for RV32E"); 50 51 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 52 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 53 54 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 55 !Subtarget.hasStdExtF()) { 56 errs() << "Hard-float 'f' ABI can't be used for a target that " 57 "doesn't support the F instruction set extension (ignoring " 58 "target-abi)\n"; 59 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 60 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 61 !Subtarget.hasStdExtD()) { 62 errs() << "Hard-float 'd' ABI can't be used for a target that " 63 "doesn't support the D instruction set extension (ignoring " 64 "target-abi)\n"; 65 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 66 } 67 68 switch (ABI) { 69 default: 70 report_fatal_error("Don't know how to lower this ABI"); 71 case RISCVABI::ABI_ILP32: 72 case RISCVABI::ABI_ILP32F: 73 case RISCVABI::ABI_ILP32D: 74 case RISCVABI::ABI_LP64: 75 case RISCVABI::ABI_LP64F: 76 case RISCVABI::ABI_LP64D: 77 break; 78 } 79 80 MVT XLenVT = Subtarget.getXLenVT(); 81 82 // Set up the register classes. 83 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 84 85 if (Subtarget.hasStdExtF()) 86 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 87 if (Subtarget.hasStdExtD()) 88 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 89 90 // Compute derived properties from the register classes. 91 computeRegisterProperties(STI.getRegisterInfo()); 92 93 setStackPointerRegisterToSaveRestore(RISCV::X2); 94 95 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 96 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 97 98 // TODO: add all necessary setOperationAction calls. 99 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 100 101 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 102 setOperationAction(ISD::BR_CC, XLenVT, Expand); 103 setOperationAction(ISD::SELECT, XLenVT, Custom); 104 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 105 106 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 107 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 108 109 setOperationAction(ISD::VASTART, MVT::Other, Custom); 110 setOperationAction(ISD::VAARG, MVT::Other, Expand); 111 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 112 setOperationAction(ISD::VAEND, MVT::Other, Expand); 113 114 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 115 if (!Subtarget.hasStdExtZbb()) { 116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 118 } 119 120 if (Subtarget.is64Bit()) { 121 setOperationAction(ISD::ADD, MVT::i32, Custom); 122 setOperationAction(ISD::SUB, MVT::i32, Custom); 123 setOperationAction(ISD::SHL, MVT::i32, Custom); 124 setOperationAction(ISD::SRA, MVT::i32, Custom); 125 setOperationAction(ISD::SRL, MVT::i32, Custom); 126 } 127 128 if (!Subtarget.hasStdExtM()) { 129 setOperationAction(ISD::MUL, XLenVT, Expand); 130 setOperationAction(ISD::MULHS, XLenVT, Expand); 131 setOperationAction(ISD::MULHU, XLenVT, Expand); 132 setOperationAction(ISD::SDIV, XLenVT, Expand); 133 setOperationAction(ISD::UDIV, XLenVT, Expand); 134 setOperationAction(ISD::SREM, XLenVT, Expand); 135 setOperationAction(ISD::UREM, XLenVT, Expand); 136 } 137 138 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 139 setOperationAction(ISD::MUL, MVT::i32, Custom); 140 setOperationAction(ISD::SDIV, MVT::i32, Custom); 141 setOperationAction(ISD::UDIV, MVT::i32, Custom); 142 setOperationAction(ISD::UREM, MVT::i32, Custom); 143 } 144 145 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 146 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 147 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 148 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 149 150 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 151 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 152 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 153 154 if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) { 155 setOperationAction(ISD::ROTL, XLenVT, Expand); 156 setOperationAction(ISD::ROTR, XLenVT, Expand); 157 } 158 159 if (!Subtarget.hasStdExtZbp()) 160 setOperationAction(ISD::BSWAP, XLenVT, Expand); 161 162 if (!Subtarget.hasStdExtZbb()) { 163 setOperationAction(ISD::CTTZ, XLenVT, Expand); 164 setOperationAction(ISD::CTLZ, XLenVT, Expand); 165 setOperationAction(ISD::CTPOP, XLenVT, Expand); 166 } 167 168 if (Subtarget.hasStdExtZbp()) 169 setOperationAction(ISD::BITREVERSE, XLenVT, Legal); 170 171 if (Subtarget.hasStdExtZbt()) { 172 setOperationAction(ISD::FSHL, XLenVT, Legal); 173 setOperationAction(ISD::FSHR, XLenVT, Legal); 174 } 175 176 ISD::CondCode FPCCToExtend[] = { 177 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 178 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 179 ISD::SETGE, ISD::SETNE}; 180 181 ISD::NodeType FPOpToExtend[] = { 182 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 183 ISD::FP_TO_FP16}; 184 185 if (Subtarget.hasStdExtF()) { 186 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 187 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 188 for (auto CC : FPCCToExtend) 189 setCondCodeAction(CC, MVT::f32, Expand); 190 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 191 setOperationAction(ISD::SELECT, MVT::f32, Custom); 192 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 193 for (auto Op : FPOpToExtend) 194 setOperationAction(Op, MVT::f32, Expand); 195 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 196 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 197 } 198 199 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 200 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 201 202 if (Subtarget.hasStdExtD()) { 203 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 204 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 205 for (auto CC : FPCCToExtend) 206 setCondCodeAction(CC, MVT::f64, Expand); 207 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 208 setOperationAction(ISD::SELECT, MVT::f64, Custom); 209 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 210 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 211 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 212 for (auto Op : FPOpToExtend) 213 setOperationAction(Op, MVT::f64, Expand); 214 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 215 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 216 } 217 218 if (Subtarget.is64Bit()) { 219 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 220 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 221 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 222 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 223 } 224 225 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 226 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 227 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 228 229 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 230 231 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 232 // Unfortunately this can't be determined just from the ISA naming string. 233 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 234 Subtarget.is64Bit() ? Legal : Custom); 235 236 setOperationAction(ISD::TRAP, MVT::Other, Legal); 237 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 238 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 239 240 if (Subtarget.hasStdExtA()) { 241 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 242 setMinCmpXchgSizeInBits(32); 243 } else { 244 setMaxAtomicSizeInBitsSupported(0); 245 } 246 247 setBooleanContents(ZeroOrOneBooleanContent); 248 249 // Function alignments. 250 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 251 setMinFunctionAlignment(FunctionAlignment); 252 setPrefFunctionAlignment(FunctionAlignment); 253 254 // Effectively disable jump table generation. 255 setMinimumJumpTableEntries(INT_MAX); 256 257 // Jumps are expensive, compared to logic 258 setJumpIsExpensive(); 259 260 // We can use any register for comparisons 261 setHasMultipleConditionRegisters(); 262 263 if (Subtarget.hasStdExtZbp()) { 264 setTargetDAGCombine(ISD::OR); 265 } 266 } 267 268 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 269 EVT VT) const { 270 if (!VT.isVector()) 271 return getPointerTy(DL); 272 return VT.changeVectorElementTypeToInteger(); 273 } 274 275 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 276 const CallInst &I, 277 MachineFunction &MF, 278 unsigned Intrinsic) const { 279 switch (Intrinsic) { 280 default: 281 return false; 282 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 283 case Intrinsic::riscv_masked_atomicrmw_add_i32: 284 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 285 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 286 case Intrinsic::riscv_masked_atomicrmw_max_i32: 287 case Intrinsic::riscv_masked_atomicrmw_min_i32: 288 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 289 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 290 case Intrinsic::riscv_masked_cmpxchg_i32: 291 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 292 Info.opc = ISD::INTRINSIC_W_CHAIN; 293 Info.memVT = MVT::getVT(PtrTy->getElementType()); 294 Info.ptrVal = I.getArgOperand(0); 295 Info.offset = 0; 296 Info.align = Align(4); 297 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 298 MachineMemOperand::MOVolatile; 299 return true; 300 } 301 } 302 303 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 304 const AddrMode &AM, Type *Ty, 305 unsigned AS, 306 Instruction *I) const { 307 // No global is ever allowed as a base. 308 if (AM.BaseGV) 309 return false; 310 311 // Require a 12-bit signed offset. 312 if (!isInt<12>(AM.BaseOffs)) 313 return false; 314 315 switch (AM.Scale) { 316 case 0: // "r+i" or just "i", depending on HasBaseReg. 317 break; 318 case 1: 319 if (!AM.HasBaseReg) // allow "r+i". 320 break; 321 return false; // disallow "r+r" or "r+r+i". 322 default: 323 return false; 324 } 325 326 return true; 327 } 328 329 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 330 return isInt<12>(Imm); 331 } 332 333 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 334 return isInt<12>(Imm); 335 } 336 337 // On RV32, 64-bit integers are split into their high and low parts and held 338 // in two different registers, so the trunc is free since the low register can 339 // just be used. 340 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 341 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 342 return false; 343 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 344 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 345 return (SrcBits == 64 && DestBits == 32); 346 } 347 348 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 349 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 350 !SrcVT.isInteger() || !DstVT.isInteger()) 351 return false; 352 unsigned SrcBits = SrcVT.getSizeInBits(); 353 unsigned DestBits = DstVT.getSizeInBits(); 354 return (SrcBits == 64 && DestBits == 32); 355 } 356 357 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 358 // Zexts are free if they can be combined with a load. 359 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 360 EVT MemVT = LD->getMemoryVT(); 361 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 362 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 363 (LD->getExtensionType() == ISD::NON_EXTLOAD || 364 LD->getExtensionType() == ISD::ZEXTLOAD)) 365 return true; 366 } 367 368 return TargetLowering::isZExtFree(Val, VT2); 369 } 370 371 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 372 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 373 } 374 375 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 376 return Subtarget.hasStdExtZbb(); 377 } 378 379 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 380 return Subtarget.hasStdExtZbb(); 381 } 382 383 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 384 bool ForCodeSize) const { 385 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 386 return false; 387 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 388 return false; 389 if (Imm.isNegZero()) 390 return false; 391 return Imm.isZero(); 392 } 393 394 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 395 return (VT == MVT::f32 && Subtarget.hasStdExtF()) || 396 (VT == MVT::f64 && Subtarget.hasStdExtD()); 397 } 398 399 // Changes the condition code and swaps operands if necessary, so the SetCC 400 // operation matches one of the comparisons supported directly in the RISC-V 401 // ISA. 402 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 403 switch (CC) { 404 default: 405 break; 406 case ISD::SETGT: 407 case ISD::SETLE: 408 case ISD::SETUGT: 409 case ISD::SETULE: 410 CC = ISD::getSetCCSwappedOperands(CC); 411 std::swap(LHS, RHS); 412 break; 413 } 414 } 415 416 // Return the RISC-V branch opcode that matches the given DAG integer 417 // condition code. The CondCode must be one of those supported by the RISC-V 418 // ISA (see normaliseSetCC). 419 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 420 switch (CC) { 421 default: 422 llvm_unreachable("Unsupported CondCode"); 423 case ISD::SETEQ: 424 return RISCV::BEQ; 425 case ISD::SETNE: 426 return RISCV::BNE; 427 case ISD::SETLT: 428 return RISCV::BLT; 429 case ISD::SETGE: 430 return RISCV::BGE; 431 case ISD::SETULT: 432 return RISCV::BLTU; 433 case ISD::SETUGE: 434 return RISCV::BGEU; 435 } 436 } 437 438 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 439 SelectionDAG &DAG) const { 440 switch (Op.getOpcode()) { 441 default: 442 report_fatal_error("unimplemented operand"); 443 case ISD::GlobalAddress: 444 return lowerGlobalAddress(Op, DAG); 445 case ISD::BlockAddress: 446 return lowerBlockAddress(Op, DAG); 447 case ISD::ConstantPool: 448 return lowerConstantPool(Op, DAG); 449 case ISD::GlobalTLSAddress: 450 return lowerGlobalTLSAddress(Op, DAG); 451 case ISD::SELECT: 452 return lowerSELECT(Op, DAG); 453 case ISD::VASTART: 454 return lowerVASTART(Op, DAG); 455 case ISD::FRAMEADDR: 456 return lowerFRAMEADDR(Op, DAG); 457 case ISD::RETURNADDR: 458 return lowerRETURNADDR(Op, DAG); 459 case ISD::SHL_PARTS: 460 return lowerShiftLeftParts(Op, DAG); 461 case ISD::SRA_PARTS: 462 return lowerShiftRightParts(Op, DAG, true); 463 case ISD::SRL_PARTS: 464 return lowerShiftRightParts(Op, DAG, false); 465 case ISD::BITCAST: { 466 assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() && 467 "Unexpected custom legalisation"); 468 SDLoc DL(Op); 469 SDValue Op0 = Op.getOperand(0); 470 if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32) 471 return SDValue(); 472 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 473 SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 474 return FPConv; 475 } 476 case ISD::INTRINSIC_WO_CHAIN: 477 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 478 } 479 } 480 481 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 482 SelectionDAG &DAG, unsigned Flags) { 483 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 484 } 485 486 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 487 SelectionDAG &DAG, unsigned Flags) { 488 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 489 Flags); 490 } 491 492 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 493 SelectionDAG &DAG, unsigned Flags) { 494 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 495 N->getOffset(), Flags); 496 } 497 498 template <class NodeTy> 499 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 500 bool IsLocal) const { 501 SDLoc DL(N); 502 EVT Ty = getPointerTy(DAG.getDataLayout()); 503 504 if (isPositionIndependent()) { 505 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 506 if (IsLocal) 507 // Use PC-relative addressing to access the symbol. This generates the 508 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 509 // %pcrel_lo(auipc)). 510 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 511 512 // Use PC-relative addressing to access the GOT for this symbol, then load 513 // the address from the GOT. This generates the pattern (PseudoLA sym), 514 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 515 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 516 } 517 518 switch (getTargetMachine().getCodeModel()) { 519 default: 520 report_fatal_error("Unsupported code model for lowering"); 521 case CodeModel::Small: { 522 // Generate a sequence for accessing addresses within the first 2 GiB of 523 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 524 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 525 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 526 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 527 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 528 } 529 case CodeModel::Medium: { 530 // Generate a sequence for accessing addresses within any 2GiB range within 531 // the address space. This generates the pattern (PseudoLLA sym), which 532 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 533 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 534 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 535 } 536 } 537 } 538 539 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 540 SelectionDAG &DAG) const { 541 SDLoc DL(Op); 542 EVT Ty = Op.getValueType(); 543 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 544 int64_t Offset = N->getOffset(); 545 MVT XLenVT = Subtarget.getXLenVT(); 546 547 const GlobalValue *GV = N->getGlobal(); 548 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 549 SDValue Addr = getAddr(N, DAG, IsLocal); 550 551 // In order to maximise the opportunity for common subexpression elimination, 552 // emit a separate ADD node for the global address offset instead of folding 553 // it in the global address node. Later peephole optimisations may choose to 554 // fold it back in when profitable. 555 if (Offset != 0) 556 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 557 DAG.getConstant(Offset, DL, XLenVT)); 558 return Addr; 559 } 560 561 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 562 SelectionDAG &DAG) const { 563 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 564 565 return getAddr(N, DAG); 566 } 567 568 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 569 SelectionDAG &DAG) const { 570 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 571 572 return getAddr(N, DAG); 573 } 574 575 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 576 SelectionDAG &DAG, 577 bool UseGOT) const { 578 SDLoc DL(N); 579 EVT Ty = getPointerTy(DAG.getDataLayout()); 580 const GlobalValue *GV = N->getGlobal(); 581 MVT XLenVT = Subtarget.getXLenVT(); 582 583 if (UseGOT) { 584 // Use PC-relative addressing to access the GOT for this TLS symbol, then 585 // load the address from the GOT and add the thread pointer. This generates 586 // the pattern (PseudoLA_TLS_IE sym), which expands to 587 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 588 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 589 SDValue Load = 590 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 591 592 // Add the thread pointer. 593 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 594 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 595 } 596 597 // Generate a sequence for accessing the address relative to the thread 598 // pointer, with the appropriate adjustment for the thread pointer offset. 599 // This generates the pattern 600 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 601 SDValue AddrHi = 602 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 603 SDValue AddrAdd = 604 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 605 SDValue AddrLo = 606 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 607 608 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 609 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 610 SDValue MNAdd = SDValue( 611 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 612 0); 613 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 614 } 615 616 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 617 SelectionDAG &DAG) const { 618 SDLoc DL(N); 619 EVT Ty = getPointerTy(DAG.getDataLayout()); 620 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 621 const GlobalValue *GV = N->getGlobal(); 622 623 // Use a PC-relative addressing mode to access the global dynamic GOT address. 624 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 625 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 626 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 627 SDValue Load = 628 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 629 630 // Prepare argument list to generate call. 631 ArgListTy Args; 632 ArgListEntry Entry; 633 Entry.Node = Load; 634 Entry.Ty = CallTy; 635 Args.push_back(Entry); 636 637 // Setup call to __tls_get_addr. 638 TargetLowering::CallLoweringInfo CLI(DAG); 639 CLI.setDebugLoc(DL) 640 .setChain(DAG.getEntryNode()) 641 .setLibCallee(CallingConv::C, CallTy, 642 DAG.getExternalSymbol("__tls_get_addr", Ty), 643 std::move(Args)); 644 645 return LowerCallTo(CLI).first; 646 } 647 648 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 649 SelectionDAG &DAG) const { 650 SDLoc DL(Op); 651 EVT Ty = Op.getValueType(); 652 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 653 int64_t Offset = N->getOffset(); 654 MVT XLenVT = Subtarget.getXLenVT(); 655 656 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 657 658 SDValue Addr; 659 switch (Model) { 660 case TLSModel::LocalExec: 661 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 662 break; 663 case TLSModel::InitialExec: 664 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 665 break; 666 case TLSModel::LocalDynamic: 667 case TLSModel::GeneralDynamic: 668 Addr = getDynamicTLSAddr(N, DAG); 669 break; 670 } 671 672 // In order to maximise the opportunity for common subexpression elimination, 673 // emit a separate ADD node for the global address offset instead of folding 674 // it in the global address node. Later peephole optimisations may choose to 675 // fold it back in when profitable. 676 if (Offset != 0) 677 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 678 DAG.getConstant(Offset, DL, XLenVT)); 679 return Addr; 680 } 681 682 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 683 SDValue CondV = Op.getOperand(0); 684 SDValue TrueV = Op.getOperand(1); 685 SDValue FalseV = Op.getOperand(2); 686 SDLoc DL(Op); 687 MVT XLenVT = Subtarget.getXLenVT(); 688 689 // If the result type is XLenVT and CondV is the output of a SETCC node 690 // which also operated on XLenVT inputs, then merge the SETCC node into the 691 // lowered RISCVISD::SELECT_CC to take advantage of the integer 692 // compare+branch instructions. i.e.: 693 // (select (setcc lhs, rhs, cc), truev, falsev) 694 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 695 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 696 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 697 SDValue LHS = CondV.getOperand(0); 698 SDValue RHS = CondV.getOperand(1); 699 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 700 ISD::CondCode CCVal = CC->get(); 701 702 normaliseSetCC(LHS, RHS, CCVal); 703 704 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 705 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 706 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 707 } 708 709 // Otherwise: 710 // (select condv, truev, falsev) 711 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 712 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 713 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 714 715 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 716 717 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 718 } 719 720 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 721 MachineFunction &MF = DAG.getMachineFunction(); 722 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 723 724 SDLoc DL(Op); 725 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 726 getPointerTy(MF.getDataLayout())); 727 728 // vastart just stores the address of the VarArgsFrameIndex slot into the 729 // memory location argument. 730 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 731 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 732 MachinePointerInfo(SV)); 733 } 734 735 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 736 SelectionDAG &DAG) const { 737 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 738 MachineFunction &MF = DAG.getMachineFunction(); 739 MachineFrameInfo &MFI = MF.getFrameInfo(); 740 MFI.setFrameAddressIsTaken(true); 741 Register FrameReg = RI.getFrameRegister(MF); 742 int XLenInBytes = Subtarget.getXLen() / 8; 743 744 EVT VT = Op.getValueType(); 745 SDLoc DL(Op); 746 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 747 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 748 while (Depth--) { 749 int Offset = -(XLenInBytes * 2); 750 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 751 DAG.getIntPtrConstant(Offset, DL)); 752 FrameAddr = 753 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 754 } 755 return FrameAddr; 756 } 757 758 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 759 SelectionDAG &DAG) const { 760 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 761 MachineFunction &MF = DAG.getMachineFunction(); 762 MachineFrameInfo &MFI = MF.getFrameInfo(); 763 MFI.setReturnAddressIsTaken(true); 764 MVT XLenVT = Subtarget.getXLenVT(); 765 int XLenInBytes = Subtarget.getXLen() / 8; 766 767 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 768 return SDValue(); 769 770 EVT VT = Op.getValueType(); 771 SDLoc DL(Op); 772 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 773 if (Depth) { 774 int Off = -XLenInBytes; 775 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 776 SDValue Offset = DAG.getConstant(Off, DL, VT); 777 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 778 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 779 MachinePointerInfo()); 780 } 781 782 // Return the value of the return address register, marking it an implicit 783 // live-in. 784 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 785 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 786 } 787 788 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 789 SelectionDAG &DAG) const { 790 SDLoc DL(Op); 791 SDValue Lo = Op.getOperand(0); 792 SDValue Hi = Op.getOperand(1); 793 SDValue Shamt = Op.getOperand(2); 794 EVT VT = Lo.getValueType(); 795 796 // if Shamt-XLEN < 0: // Shamt < XLEN 797 // Lo = Lo << Shamt 798 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 799 // else: 800 // Lo = 0 801 // Hi = Lo << (Shamt-XLEN) 802 803 SDValue Zero = DAG.getConstant(0, DL, VT); 804 SDValue One = DAG.getConstant(1, DL, VT); 805 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 806 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 807 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 808 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 809 810 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 811 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 812 SDValue ShiftRightLo = 813 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 814 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 815 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 816 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 817 818 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 819 820 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 821 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 822 823 SDValue Parts[2] = {Lo, Hi}; 824 return DAG.getMergeValues(Parts, DL); 825 } 826 827 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 828 bool IsSRA) const { 829 SDLoc DL(Op); 830 SDValue Lo = Op.getOperand(0); 831 SDValue Hi = Op.getOperand(1); 832 SDValue Shamt = Op.getOperand(2); 833 EVT VT = Lo.getValueType(); 834 835 // SRA expansion: 836 // if Shamt-XLEN < 0: // Shamt < XLEN 837 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 838 // Hi = Hi >>s Shamt 839 // else: 840 // Lo = Hi >>s (Shamt-XLEN); 841 // Hi = Hi >>s (XLEN-1) 842 // 843 // SRL expansion: 844 // if Shamt-XLEN < 0: // Shamt < XLEN 845 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 846 // Hi = Hi >>u Shamt 847 // else: 848 // Lo = Hi >>u (Shamt-XLEN); 849 // Hi = 0; 850 851 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 852 853 SDValue Zero = DAG.getConstant(0, DL, VT); 854 SDValue One = DAG.getConstant(1, DL, VT); 855 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 856 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 857 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 858 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 859 860 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 861 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 862 SDValue ShiftLeftHi = 863 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 864 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 865 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 866 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 867 SDValue HiFalse = 868 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 869 870 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 871 872 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 873 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 874 875 SDValue Parts[2] = {Lo, Hi}; 876 return DAG.getMergeValues(Parts, DL); 877 } 878 879 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 880 SelectionDAG &DAG) const { 881 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 882 SDLoc DL(Op); 883 switch (IntNo) { 884 default: 885 return SDValue(); // Don't custom lower most intrinsics. 886 case Intrinsic::thread_pointer: { 887 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 888 return DAG.getRegister(RISCV::X4, PtrVT); 889 } 890 } 891 } 892 893 // Returns the opcode of the target-specific SDNode that implements the 32-bit 894 // form of the given Opcode. 895 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 896 switch (Opcode) { 897 default: 898 llvm_unreachable("Unexpected opcode"); 899 case ISD::SHL: 900 return RISCVISD::SLLW; 901 case ISD::SRA: 902 return RISCVISD::SRAW; 903 case ISD::SRL: 904 return RISCVISD::SRLW; 905 case ISD::SDIV: 906 return RISCVISD::DIVW; 907 case ISD::UDIV: 908 return RISCVISD::DIVUW; 909 case ISD::UREM: 910 return RISCVISD::REMUW; 911 case RISCVISD::GREVI: 912 return RISCVISD::GREVIW; 913 case RISCVISD::GORCI: 914 return RISCVISD::GORCIW; 915 } 916 } 917 918 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 919 // Because i32 isn't a legal type for RV64, these operations would otherwise 920 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 921 // later one because the fact the operation was originally of type i32 is 922 // lost. 923 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { 924 SDLoc DL(N); 925 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 926 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 927 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 928 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 929 // ReplaceNodeResults requires we maintain the same type for the return value. 930 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 931 } 932 933 // Converts the given 32-bit operation to a i64 operation with signed extension 934 // semantic to reduce the signed extension instructions. 935 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 936 SDLoc DL(N); 937 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 938 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 939 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 940 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 941 DAG.getValueType(MVT::i32)); 942 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 943 } 944 945 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 946 SmallVectorImpl<SDValue> &Results, 947 SelectionDAG &DAG) const { 948 SDLoc DL(N); 949 switch (N->getOpcode()) { 950 default: 951 llvm_unreachable("Don't know how to custom type legalize this operation!"); 952 case ISD::STRICT_FP_TO_SINT: 953 case ISD::STRICT_FP_TO_UINT: 954 case ISD::FP_TO_SINT: 955 case ISD::FP_TO_UINT: { 956 bool IsStrict = N->isStrictFPOpcode(); 957 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 958 "Unexpected custom legalisation"); 959 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 960 // If the FP type needs to be softened, emit a library call using the 'si' 961 // version. If we left it to default legalization we'd end up with 'di'. If 962 // the FP type doesn't need to be softened just let generic type 963 // legalization promote the result type. 964 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 965 TargetLowering::TypeSoftenFloat) 966 return; 967 RTLIB::Libcall LC; 968 if (N->getOpcode() == ISD::FP_TO_SINT || 969 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 970 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 971 else 972 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 973 MakeLibCallOptions CallOptions; 974 EVT OpVT = Op0.getValueType(); 975 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 976 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 977 SDValue Result; 978 std::tie(Result, Chain) = 979 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 980 Results.push_back(Result); 981 if (IsStrict) 982 Results.push_back(Chain); 983 break; 984 } 985 case ISD::READCYCLECOUNTER: { 986 assert(!Subtarget.is64Bit() && 987 "READCYCLECOUNTER only has custom type legalization on riscv32"); 988 989 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 990 SDValue RCW = 991 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 992 993 Results.push_back( 994 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 995 Results.push_back(RCW.getValue(2)); 996 break; 997 } 998 case ISD::ADD: 999 case ISD::SUB: 1000 case ISD::MUL: 1001 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1002 "Unexpected custom legalisation"); 1003 if (N->getOperand(1).getOpcode() == ISD::Constant) 1004 return; 1005 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 1006 break; 1007 case ISD::SHL: 1008 case ISD::SRA: 1009 case ISD::SRL: 1010 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1011 "Unexpected custom legalisation"); 1012 if (N->getOperand(1).getOpcode() == ISD::Constant) 1013 return; 1014 Results.push_back(customLegalizeToWOp(N, DAG)); 1015 break; 1016 case ISD::SDIV: 1017 case ISD::UDIV: 1018 case ISD::UREM: 1019 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1020 Subtarget.hasStdExtM() && "Unexpected custom legalisation"); 1021 if (N->getOperand(0).getOpcode() == ISD::Constant || 1022 N->getOperand(1).getOpcode() == ISD::Constant) 1023 return; 1024 Results.push_back(customLegalizeToWOp(N, DAG)); 1025 break; 1026 case ISD::BITCAST: { 1027 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1028 Subtarget.hasStdExtF() && "Unexpected custom legalisation"); 1029 SDValue Op0 = N->getOperand(0); 1030 if (Op0.getValueType() != MVT::f32) 1031 return; 1032 SDValue FPConv = 1033 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 1034 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 1035 break; 1036 } 1037 case RISCVISD::GREVI: 1038 case RISCVISD::GORCI: { 1039 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 1040 "Unexpected custom legalisation"); 1041 // This is similar to customLegalizeToWOp, except that we pass the second 1042 // operand (a TargetConstant) straight through: it is already of type 1043 // XLenVT. 1044 SDLoc DL(N); 1045 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 1046 SDValue NewOp0 = 1047 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 1048 SDValue NewRes = 1049 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); 1050 // ReplaceNodeResults requires we maintain the same type for the return 1051 // value. 1052 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 1053 break; 1054 } 1055 } 1056 } 1057 1058 // A structure to hold one of the bit-manipulation patterns below. Together, a 1059 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 1060 // (or (and (shl x, 1), 0xAAAAAAAA), 1061 // (and (srl x, 1), 0x55555555)) 1062 struct RISCVBitmanipPat { 1063 SDValue Op; 1064 unsigned ShAmt; 1065 bool IsSHL; 1066 1067 bool formsPairWith(const RISCVBitmanipPat &Other) const { 1068 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 1069 } 1070 }; 1071 1072 // Matches any of the following bit-manipulation patterns: 1073 // (and (shl x, 1), (0x55555555 << 1)) 1074 // (and (srl x, 1), 0x55555555) 1075 // (shl (and x, 0x55555555), 1) 1076 // (srl (and x, (0x55555555 << 1)), 1) 1077 // where the shift amount and mask may vary thus: 1078 // [1] = 0x55555555 / 0xAAAAAAAA 1079 // [2] = 0x33333333 / 0xCCCCCCCC 1080 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 1081 // [8] = 0x00FF00FF / 0xFF00FF00 1082 // [16] = 0x0000FFFF / 0xFFFFFFFF 1083 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 1084 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) { 1085 Optional<uint64_t> Mask; 1086 // Optionally consume a mask around the shift operation. 1087 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 1088 Mask = Op.getConstantOperandVal(1); 1089 Op = Op.getOperand(0); 1090 } 1091 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 1092 return None; 1093 bool IsSHL = Op.getOpcode() == ISD::SHL; 1094 1095 if (!isa<ConstantSDNode>(Op.getOperand(1))) 1096 return None; 1097 auto ShAmt = Op.getConstantOperandVal(1); 1098 1099 if (!isPowerOf2_64(ShAmt)) 1100 return None; 1101 1102 // These are the unshifted masks which we use to match bit-manipulation 1103 // patterns. They may be shifted left in certain circumstances. 1104 static const uint64_t BitmanipMasks[] = { 1105 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 1106 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL, 1107 }; 1108 1109 unsigned MaskIdx = Log2_64(ShAmt); 1110 if (MaskIdx >= array_lengthof(BitmanipMasks)) 1111 return None; 1112 1113 auto Src = Op.getOperand(0); 1114 1115 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 1116 auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 1117 1118 // The expected mask is shifted left when the AND is found around SHL 1119 // patterns. 1120 // ((x >> 1) & 0x55555555) 1121 // ((x << 1) & 0xAAAAAAAA) 1122 bool SHLExpMask = IsSHL; 1123 1124 if (!Mask) { 1125 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 1126 // the mask is all ones: consume that now. 1127 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 1128 Mask = Src.getConstantOperandVal(1); 1129 Src = Src.getOperand(0); 1130 // The expected mask is now in fact shifted left for SRL, so reverse the 1131 // decision. 1132 // ((x & 0xAAAAAAAA) >> 1) 1133 // ((x & 0x55555555) << 1) 1134 SHLExpMask = !SHLExpMask; 1135 } else { 1136 // Use a default shifted mask of all-ones if there's no AND, truncated 1137 // down to the expected width. This simplifies the logic later on. 1138 Mask = maskTrailingOnes<uint64_t>(Width); 1139 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 1140 } 1141 } 1142 1143 if (SHLExpMask) 1144 ExpMask <<= ShAmt; 1145 1146 if (Mask != ExpMask) 1147 return None; 1148 1149 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 1150 } 1151 1152 // Match the following pattern as a GREVI(W) operation 1153 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 1154 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 1155 const RISCVSubtarget &Subtarget) { 1156 if (Op.getSimpleValueType() == Subtarget.getXLenVT() || 1157 (Subtarget.is64Bit() && Op.getSimpleValueType() == MVT::i32)) { 1158 auto LHS = matchRISCVBitmanipPat(Op.getOperand(0)); 1159 auto RHS = matchRISCVBitmanipPat(Op.getOperand(1)); 1160 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 1161 SDLoc DL(Op); 1162 return DAG.getNode( 1163 RISCVISD::GREVI, DL, Op.getValueType(), LHS->Op, 1164 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 1165 } 1166 } 1167 return SDValue(); 1168 } 1169 1170 // Matches any the following pattern as a GORCI(W) operation 1171 // 1. (or (GREVI x, shamt), x) 1172 // 2. (or x, (GREVI x, shamt)) 1173 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 1174 // Note that with the variant of 3., 1175 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 1176 // the inner pattern will first be matched as GREVI and then the outer 1177 // pattern will be matched to GORC via the first rule above. 1178 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 1179 const RISCVSubtarget &Subtarget) { 1180 if (Op.getSimpleValueType() == Subtarget.getXLenVT() || 1181 (Subtarget.is64Bit() && Op.getSimpleValueType() == MVT::i32)) { 1182 SDLoc DL(Op); 1183 SDValue Op0 = Op.getOperand(0); 1184 SDValue Op1 = Op.getOperand(1); 1185 1186 // Check for either commutable permutation of (or (GREVI x, shamt), x) 1187 for (const auto &OpPair : 1188 {std::make_pair(Op0, Op1), std::make_pair(Op1, Op0)}) { 1189 if (OpPair.first.getOpcode() == RISCVISD::GREVI && 1190 OpPair.first.getOperand(0) == OpPair.second) 1191 return DAG.getNode(RISCVISD::GORCI, DL, Op.getValueType(), 1192 OpPair.second, OpPair.first.getOperand(1)); 1193 } 1194 1195 // OR is commutable so canonicalize its OR operand to the left 1196 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 1197 std::swap(Op0, Op1); 1198 if (Op0.getOpcode() != ISD::OR) 1199 return SDValue(); 1200 SDValue OrOp0 = Op0.getOperand(0); 1201 SDValue OrOp1 = Op0.getOperand(1); 1202 auto LHS = matchRISCVBitmanipPat(OrOp0); 1203 // OR is commutable so swap the operands and try again: x might have been 1204 // on the left 1205 if (!LHS) { 1206 std::swap(OrOp0, OrOp1); 1207 LHS = matchRISCVBitmanipPat(OrOp0); 1208 } 1209 auto RHS = matchRISCVBitmanipPat(Op1); 1210 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 1211 return DAG.getNode( 1212 RISCVISD::GORCI, DL, Op.getValueType(), LHS->Op, 1213 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); 1214 } 1215 } 1216 return SDValue(); 1217 } 1218 1219 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 1220 DAGCombinerInfo &DCI) const { 1221 SelectionDAG &DAG = DCI.DAG; 1222 1223 switch (N->getOpcode()) { 1224 default: 1225 break; 1226 case RISCVISD::SplitF64: { 1227 SDValue Op0 = N->getOperand(0); 1228 // If the input to SplitF64 is just BuildPairF64 then the operation is 1229 // redundant. Instead, use BuildPairF64's operands directly. 1230 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 1231 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 1232 1233 SDLoc DL(N); 1234 1235 // It's cheaper to materialise two 32-bit integers than to load a double 1236 // from the constant pool and transfer it to integer registers through the 1237 // stack. 1238 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 1239 APInt V = C->getValueAPF().bitcastToAPInt(); 1240 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 1241 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 1242 return DCI.CombineTo(N, Lo, Hi); 1243 } 1244 1245 // This is a target-specific version of a DAGCombine performed in 1246 // DAGCombiner::visitBITCAST. It performs the equivalent of: 1247 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 1248 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 1249 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 1250 !Op0.getNode()->hasOneUse()) 1251 break; 1252 SDValue NewSplitF64 = 1253 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 1254 Op0.getOperand(0)); 1255 SDValue Lo = NewSplitF64.getValue(0); 1256 SDValue Hi = NewSplitF64.getValue(1); 1257 APInt SignBit = APInt::getSignMask(32); 1258 if (Op0.getOpcode() == ISD::FNEG) { 1259 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 1260 DAG.getConstant(SignBit, DL, MVT::i32)); 1261 return DCI.CombineTo(N, Lo, NewHi); 1262 } 1263 assert(Op0.getOpcode() == ISD::FABS); 1264 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 1265 DAG.getConstant(~SignBit, DL, MVT::i32)); 1266 return DCI.CombineTo(N, Lo, NewHi); 1267 } 1268 case RISCVISD::SLLW: 1269 case RISCVISD::SRAW: 1270 case RISCVISD::SRLW: { 1271 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 1272 SDValue LHS = N->getOperand(0); 1273 SDValue RHS = N->getOperand(1); 1274 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 1275 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 1276 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || 1277 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { 1278 if (N->getOpcode() != ISD::DELETED_NODE) 1279 DCI.AddToWorklist(N); 1280 return SDValue(N, 0); 1281 } 1282 break; 1283 } 1284 case RISCVISD::GREVIW: 1285 case RISCVISD::GORCIW: { 1286 // Only the lower 32 bits of the first operand are read 1287 SDValue Op0 = N->getOperand(0); 1288 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); 1289 if (SimplifyDemandedBits(Op0, Mask, DCI)) { 1290 if (N->getOpcode() != ISD::DELETED_NODE) 1291 DCI.AddToWorklist(N); 1292 return SDValue(N, 0); 1293 } 1294 break; 1295 } 1296 case RISCVISD::FMV_X_ANYEXTW_RV64: { 1297 SDLoc DL(N); 1298 SDValue Op0 = N->getOperand(0); 1299 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 1300 // conversion is unnecessary and can be replaced with an ANY_EXTEND 1301 // of the FMV_W_X_RV64 operand. 1302 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 1303 assert(Op0.getOperand(0).getValueType() == MVT::i64 && 1304 "Unexpected value type!"); 1305 return Op0.getOperand(0); 1306 } 1307 1308 // This is a target-specific version of a DAGCombine performed in 1309 // DAGCombiner::visitBITCAST. It performs the equivalent of: 1310 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 1311 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 1312 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 1313 !Op0.getNode()->hasOneUse()) 1314 break; 1315 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 1316 Op0.getOperand(0)); 1317 APInt SignBit = APInt::getSignMask(32).sext(64); 1318 if (Op0.getOpcode() == ISD::FNEG) 1319 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 1320 DAG.getConstant(SignBit, DL, MVT::i64)); 1321 1322 assert(Op0.getOpcode() == ISD::FABS); 1323 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 1324 DAG.getConstant(~SignBit, DL, MVT::i64)); 1325 } 1326 case ISD::OR: 1327 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) 1328 return GREV; 1329 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) 1330 return GORC; 1331 break; 1332 } 1333 1334 return SDValue(); 1335 } 1336 1337 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 1338 const SDNode *N, CombineLevel Level) const { 1339 // The following folds are only desirable if `(OP _, c1 << c2)` can be 1340 // materialised in fewer instructions than `(OP _, c1)`: 1341 // 1342 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 1343 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 1344 SDValue N0 = N->getOperand(0); 1345 EVT Ty = N0.getValueType(); 1346 if (Ty.isScalarInteger() && 1347 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 1348 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 1349 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1350 if (C1 && C2) { 1351 APInt C1Int = C1->getAPIntValue(); 1352 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 1353 1354 // We can materialise `c1 << c2` into an add immediate, so it's "free", 1355 // and the combine should happen, to potentially allow further combines 1356 // later. 1357 if (ShiftedC1Int.getMinSignedBits() <= 64 && 1358 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 1359 return true; 1360 1361 // We can materialise `c1` in an add immediate, so it's "free", and the 1362 // combine should be prevented. 1363 if (C1Int.getMinSignedBits() <= 64 && 1364 isLegalAddImmediate(C1Int.getSExtValue())) 1365 return false; 1366 1367 // Neither constant will fit into an immediate, so find materialisation 1368 // costs. 1369 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 1370 Subtarget.is64Bit()); 1371 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 1372 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 1373 1374 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 1375 // combine should be prevented. 1376 if (C1Cost < ShiftedC1Cost) 1377 return false; 1378 } 1379 } 1380 return true; 1381 } 1382 1383 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 1384 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1385 unsigned Depth) const { 1386 switch (Op.getOpcode()) { 1387 default: 1388 break; 1389 case RISCVISD::SLLW: 1390 case RISCVISD::SRAW: 1391 case RISCVISD::SRLW: 1392 case RISCVISD::DIVW: 1393 case RISCVISD::DIVUW: 1394 case RISCVISD::REMUW: 1395 case RISCVISD::GREVIW: 1396 case RISCVISD::GORCIW: 1397 // TODO: As the result is sign-extended, this is conservatively correct. A 1398 // more precise answer could be calculated for SRAW depending on known 1399 // bits in the shift amount. 1400 return 33; 1401 } 1402 1403 return 1; 1404 } 1405 1406 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 1407 MachineBasicBlock *BB) { 1408 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 1409 1410 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 1411 // Should the count have wrapped while it was being read, we need to try 1412 // again. 1413 // ... 1414 // read: 1415 // rdcycleh x3 # load high word of cycle 1416 // rdcycle x2 # load low word of cycle 1417 // rdcycleh x4 # load high word of cycle 1418 // bne x3, x4, read # check if high word reads match, otherwise try again 1419 // ... 1420 1421 MachineFunction &MF = *BB->getParent(); 1422 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1423 MachineFunction::iterator It = ++BB->getIterator(); 1424 1425 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 1426 MF.insert(It, LoopMBB); 1427 1428 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 1429 MF.insert(It, DoneMBB); 1430 1431 // Transfer the remainder of BB and its successor edges to DoneMBB. 1432 DoneMBB->splice(DoneMBB->begin(), BB, 1433 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 1434 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 1435 1436 BB->addSuccessor(LoopMBB); 1437 1438 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1439 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1440 Register LoReg = MI.getOperand(0).getReg(); 1441 Register HiReg = MI.getOperand(1).getReg(); 1442 DebugLoc DL = MI.getDebugLoc(); 1443 1444 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 1445 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 1446 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 1447 .addReg(RISCV::X0); 1448 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 1449 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 1450 .addReg(RISCV::X0); 1451 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 1452 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 1453 .addReg(RISCV::X0); 1454 1455 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 1456 .addReg(HiReg) 1457 .addReg(ReadAgainReg) 1458 .addMBB(LoopMBB); 1459 1460 LoopMBB->addSuccessor(LoopMBB); 1461 LoopMBB->addSuccessor(DoneMBB); 1462 1463 MI.eraseFromParent(); 1464 1465 return DoneMBB; 1466 } 1467 1468 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 1469 MachineBasicBlock *BB) { 1470 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 1471 1472 MachineFunction &MF = *BB->getParent(); 1473 DebugLoc DL = MI.getDebugLoc(); 1474 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1475 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 1476 Register LoReg = MI.getOperand(0).getReg(); 1477 Register HiReg = MI.getOperand(1).getReg(); 1478 Register SrcReg = MI.getOperand(2).getReg(); 1479 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 1480 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 1481 1482 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 1483 RI); 1484 MachineMemOperand *MMO = 1485 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 1486 MachineMemOperand::MOLoad, 8, Align(8)); 1487 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 1488 .addFrameIndex(FI) 1489 .addImm(0) 1490 .addMemOperand(MMO); 1491 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 1492 .addFrameIndex(FI) 1493 .addImm(4) 1494 .addMemOperand(MMO); 1495 MI.eraseFromParent(); // The pseudo instruction is gone now. 1496 return BB; 1497 } 1498 1499 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 1500 MachineBasicBlock *BB) { 1501 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 1502 "Unexpected instruction"); 1503 1504 MachineFunction &MF = *BB->getParent(); 1505 DebugLoc DL = MI.getDebugLoc(); 1506 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1507 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 1508 Register DstReg = MI.getOperand(0).getReg(); 1509 Register LoReg = MI.getOperand(1).getReg(); 1510 Register HiReg = MI.getOperand(2).getReg(); 1511 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 1512 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 1513 1514 MachineMemOperand *MMO = 1515 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 1516 MachineMemOperand::MOStore, 8, Align(8)); 1517 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 1518 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 1519 .addFrameIndex(FI) 1520 .addImm(0) 1521 .addMemOperand(MMO); 1522 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 1523 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 1524 .addFrameIndex(FI) 1525 .addImm(4) 1526 .addMemOperand(MMO); 1527 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 1528 MI.eraseFromParent(); // The pseudo instruction is gone now. 1529 return BB; 1530 } 1531 1532 static bool isSelectPseudo(MachineInstr &MI) { 1533 switch (MI.getOpcode()) { 1534 default: 1535 return false; 1536 case RISCV::Select_GPR_Using_CC_GPR: 1537 case RISCV::Select_FPR32_Using_CC_GPR: 1538 case RISCV::Select_FPR64_Using_CC_GPR: 1539 return true; 1540 } 1541 } 1542 1543 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 1544 MachineBasicBlock *BB) { 1545 // To "insert" Select_* instructions, we actually have to insert the triangle 1546 // control-flow pattern. The incoming instructions know the destination vreg 1547 // to set, the condition code register to branch on, the true/false values to 1548 // select between, and the condcode to use to select the appropriate branch. 1549 // 1550 // We produce the following control flow: 1551 // HeadMBB 1552 // | \ 1553 // | IfFalseMBB 1554 // | / 1555 // TailMBB 1556 // 1557 // When we find a sequence of selects we attempt to optimize their emission 1558 // by sharing the control flow. Currently we only handle cases where we have 1559 // multiple selects with the exact same condition (same LHS, RHS and CC). 1560 // The selects may be interleaved with other instructions if the other 1561 // instructions meet some requirements we deem safe: 1562 // - They are debug instructions. Otherwise, 1563 // - They do not have side-effects, do not access memory and their inputs do 1564 // not depend on the results of the select pseudo-instructions. 1565 // The TrueV/FalseV operands of the selects cannot depend on the result of 1566 // previous selects in the sequence. 1567 // These conditions could be further relaxed. See the X86 target for a 1568 // related approach and more information. 1569 Register LHS = MI.getOperand(1).getReg(); 1570 Register RHS = MI.getOperand(2).getReg(); 1571 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 1572 1573 SmallVector<MachineInstr *, 4> SelectDebugValues; 1574 SmallSet<Register, 4> SelectDests; 1575 SelectDests.insert(MI.getOperand(0).getReg()); 1576 1577 MachineInstr *LastSelectPseudo = &MI; 1578 1579 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 1580 SequenceMBBI != E; ++SequenceMBBI) { 1581 if (SequenceMBBI->isDebugInstr()) 1582 continue; 1583 else if (isSelectPseudo(*SequenceMBBI)) { 1584 if (SequenceMBBI->getOperand(1).getReg() != LHS || 1585 SequenceMBBI->getOperand(2).getReg() != RHS || 1586 SequenceMBBI->getOperand(3).getImm() != CC || 1587 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 1588 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 1589 break; 1590 LastSelectPseudo = &*SequenceMBBI; 1591 SequenceMBBI->collectDebugValues(SelectDebugValues); 1592 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 1593 } else { 1594 if (SequenceMBBI->hasUnmodeledSideEffects() || 1595 SequenceMBBI->mayLoadOrStore()) 1596 break; 1597 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 1598 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 1599 })) 1600 break; 1601 } 1602 } 1603 1604 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 1605 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1606 DebugLoc DL = MI.getDebugLoc(); 1607 MachineFunction::iterator I = ++BB->getIterator(); 1608 1609 MachineBasicBlock *HeadMBB = BB; 1610 MachineFunction *F = BB->getParent(); 1611 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 1612 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 1613 1614 F->insert(I, IfFalseMBB); 1615 F->insert(I, TailMBB); 1616 1617 // Transfer debug instructions associated with the selects to TailMBB. 1618 for (MachineInstr *DebugInstr : SelectDebugValues) { 1619 TailMBB->push_back(DebugInstr->removeFromParent()); 1620 } 1621 1622 // Move all instructions after the sequence to TailMBB. 1623 TailMBB->splice(TailMBB->end(), HeadMBB, 1624 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 1625 // Update machine-CFG edges by transferring all successors of the current 1626 // block to the new block which will contain the Phi nodes for the selects. 1627 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 1628 // Set the successors for HeadMBB. 1629 HeadMBB->addSuccessor(IfFalseMBB); 1630 HeadMBB->addSuccessor(TailMBB); 1631 1632 // Insert appropriate branch. 1633 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 1634 1635 BuildMI(HeadMBB, DL, TII.get(Opcode)) 1636 .addReg(LHS) 1637 .addReg(RHS) 1638 .addMBB(TailMBB); 1639 1640 // IfFalseMBB just falls through to TailMBB. 1641 IfFalseMBB->addSuccessor(TailMBB); 1642 1643 // Create PHIs for all of the select pseudo-instructions. 1644 auto SelectMBBI = MI.getIterator(); 1645 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 1646 auto InsertionPoint = TailMBB->begin(); 1647 while (SelectMBBI != SelectEnd) { 1648 auto Next = std::next(SelectMBBI); 1649 if (isSelectPseudo(*SelectMBBI)) { 1650 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 1651 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 1652 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 1653 .addReg(SelectMBBI->getOperand(4).getReg()) 1654 .addMBB(HeadMBB) 1655 .addReg(SelectMBBI->getOperand(5).getReg()) 1656 .addMBB(IfFalseMBB); 1657 SelectMBBI->eraseFromParent(); 1658 } 1659 SelectMBBI = Next; 1660 } 1661 1662 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 1663 return TailMBB; 1664 } 1665 1666 MachineBasicBlock * 1667 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1668 MachineBasicBlock *BB) const { 1669 switch (MI.getOpcode()) { 1670 default: 1671 llvm_unreachable("Unexpected instr type to insert"); 1672 case RISCV::ReadCycleWide: 1673 assert(!Subtarget.is64Bit() && 1674 "ReadCycleWrite is only to be used on riscv32"); 1675 return emitReadCycleWidePseudo(MI, BB); 1676 case RISCV::Select_GPR_Using_CC_GPR: 1677 case RISCV::Select_FPR32_Using_CC_GPR: 1678 case RISCV::Select_FPR64_Using_CC_GPR: 1679 return emitSelectPseudo(MI, BB); 1680 case RISCV::BuildPairF64Pseudo: 1681 return emitBuildPairF64Pseudo(MI, BB); 1682 case RISCV::SplitF64Pseudo: 1683 return emitSplitF64Pseudo(MI, BB); 1684 } 1685 } 1686 1687 // Calling Convention Implementation. 1688 // The expectations for frontend ABI lowering vary from target to target. 1689 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 1690 // details, but this is a longer term goal. For now, we simply try to keep the 1691 // role of the frontend as simple and well-defined as possible. The rules can 1692 // be summarised as: 1693 // * Never split up large scalar arguments. We handle them here. 1694 // * If a hardfloat calling convention is being used, and the struct may be 1695 // passed in a pair of registers (fp+fp, int+fp), and both registers are 1696 // available, then pass as two separate arguments. If either the GPRs or FPRs 1697 // are exhausted, then pass according to the rule below. 1698 // * If a struct could never be passed in registers or directly in a stack 1699 // slot (as it is larger than 2*XLEN and the floating point rules don't 1700 // apply), then pass it using a pointer with the byval attribute. 1701 // * If a struct is less than 2*XLEN, then coerce to either a two-element 1702 // word-sized array or a 2*XLEN scalar (depending on alignment). 1703 // * The frontend can determine whether a struct is returned by reference or 1704 // not based on its size and fields. If it will be returned by reference, the 1705 // frontend must modify the prototype so a pointer with the sret annotation is 1706 // passed as the first argument. This is not necessary for large scalar 1707 // returns. 1708 // * Struct return values and varargs should be coerced to structs containing 1709 // register-size fields in the same situations they would be for fixed 1710 // arguments. 1711 1712 static const MCPhysReg ArgGPRs[] = { 1713 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 1714 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 1715 }; 1716 static const MCPhysReg ArgFPR32s[] = { 1717 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 1718 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 1719 }; 1720 static const MCPhysReg ArgFPR64s[] = { 1721 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 1722 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 1723 }; 1724 1725 // Pass a 2*XLEN argument that has been split into two XLEN values through 1726 // registers or the stack as necessary. 1727 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 1728 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 1729 MVT ValVT2, MVT LocVT2, 1730 ISD::ArgFlagsTy ArgFlags2) { 1731 unsigned XLenInBytes = XLen / 8; 1732 if (Register Reg = State.AllocateReg(ArgGPRs)) { 1733 // At least one half can be passed via register. 1734 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 1735 VA1.getLocVT(), CCValAssign::Full)); 1736 } else { 1737 // Both halves must be passed on the stack, with proper alignment. 1738 Align StackAlign = 1739 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 1740 State.addLoc( 1741 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 1742 State.AllocateStack(XLenInBytes, StackAlign), 1743 VA1.getLocVT(), CCValAssign::Full)); 1744 State.addLoc(CCValAssign::getMem( 1745 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 1746 LocVT2, CCValAssign::Full)); 1747 return false; 1748 } 1749 1750 if (Register Reg = State.AllocateReg(ArgGPRs)) { 1751 // The second half can also be passed via register. 1752 State.addLoc( 1753 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 1754 } else { 1755 // The second half is passed via the stack, without additional alignment. 1756 State.addLoc(CCValAssign::getMem( 1757 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 1758 LocVT2, CCValAssign::Full)); 1759 } 1760 1761 return false; 1762 } 1763 1764 // Implements the RISC-V calling convention. Returns true upon failure. 1765 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 1766 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 1767 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 1768 bool IsRet, Type *OrigTy) { 1769 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 1770 assert(XLen == 32 || XLen == 64); 1771 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 1772 1773 // Any return value split in to more than two values can't be returned 1774 // directly. 1775 if (IsRet && ValNo > 1) 1776 return true; 1777 1778 // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a 1779 // variadic argument, or if no F32 argument registers are available. 1780 bool UseGPRForF32 = true; 1781 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 1782 // variadic argument, or if no F64 argument registers are available. 1783 bool UseGPRForF64 = true; 1784 1785 switch (ABI) { 1786 default: 1787 llvm_unreachable("Unexpected ABI"); 1788 case RISCVABI::ABI_ILP32: 1789 case RISCVABI::ABI_LP64: 1790 break; 1791 case RISCVABI::ABI_ILP32F: 1792 case RISCVABI::ABI_LP64F: 1793 UseGPRForF32 = !IsFixed; 1794 break; 1795 case RISCVABI::ABI_ILP32D: 1796 case RISCVABI::ABI_LP64D: 1797 UseGPRForF32 = !IsFixed; 1798 UseGPRForF64 = !IsFixed; 1799 break; 1800 } 1801 1802 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) 1803 UseGPRForF32 = true; 1804 if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s)) 1805 UseGPRForF64 = true; 1806 1807 // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local 1808 // variables rather than directly checking against the target ABI. 1809 1810 if (UseGPRForF32 && ValVT == MVT::f32) { 1811 LocVT = XLenVT; 1812 LocInfo = CCValAssign::BCvt; 1813 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 1814 LocVT = MVT::i64; 1815 LocInfo = CCValAssign::BCvt; 1816 } 1817 1818 // If this is a variadic argument, the RISC-V calling convention requires 1819 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 1820 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 1821 // be used regardless of whether the original argument was split during 1822 // legalisation or not. The argument will not be passed by registers if the 1823 // original type is larger than 2*XLEN, so the register alignment rule does 1824 // not apply. 1825 unsigned TwoXLenInBytes = (2 * XLen) / 8; 1826 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 1827 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 1828 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 1829 // Skip 'odd' register if necessary. 1830 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 1831 State.AllocateReg(ArgGPRs); 1832 } 1833 1834 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 1835 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 1836 State.getPendingArgFlags(); 1837 1838 assert(PendingLocs.size() == PendingArgFlags.size() && 1839 "PendingLocs and PendingArgFlags out of sync"); 1840 1841 // Handle passing f64 on RV32D with a soft float ABI or when floating point 1842 // registers are exhausted. 1843 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 1844 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 1845 "Can't lower f64 if it is split"); 1846 // Depending on available argument GPRS, f64 may be passed in a pair of 1847 // GPRs, split between a GPR and the stack, or passed completely on the 1848 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 1849 // cases. 1850 Register Reg = State.AllocateReg(ArgGPRs); 1851 LocVT = MVT::i32; 1852 if (!Reg) { 1853 unsigned StackOffset = State.AllocateStack(8, Align(8)); 1854 State.addLoc( 1855 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 1856 return false; 1857 } 1858 if (!State.AllocateReg(ArgGPRs)) 1859 State.AllocateStack(4, Align(4)); 1860 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1861 return false; 1862 } 1863 1864 // Split arguments might be passed indirectly, so keep track of the pending 1865 // values. 1866 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 1867 LocVT = XLenVT; 1868 LocInfo = CCValAssign::Indirect; 1869 PendingLocs.push_back( 1870 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 1871 PendingArgFlags.push_back(ArgFlags); 1872 if (!ArgFlags.isSplitEnd()) { 1873 return false; 1874 } 1875 } 1876 1877 // If the split argument only had two elements, it should be passed directly 1878 // in registers or on the stack. 1879 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 1880 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 1881 // Apply the normal calling convention rules to the first half of the 1882 // split argument. 1883 CCValAssign VA = PendingLocs[0]; 1884 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 1885 PendingLocs.clear(); 1886 PendingArgFlags.clear(); 1887 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 1888 ArgFlags); 1889 } 1890 1891 // Allocate to a register if possible, or else a stack slot. 1892 Register Reg; 1893 if (ValVT == MVT::f32 && !UseGPRForF32) 1894 Reg = State.AllocateReg(ArgFPR32s); 1895 else if (ValVT == MVT::f64 && !UseGPRForF64) 1896 Reg = State.AllocateReg(ArgFPR64s); 1897 else 1898 Reg = State.AllocateReg(ArgGPRs); 1899 unsigned StackOffset = 1900 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 1901 1902 // If we reach this point and PendingLocs is non-empty, we must be at the 1903 // end of a split argument that must be passed indirectly. 1904 if (!PendingLocs.empty()) { 1905 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 1906 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 1907 1908 for (auto &It : PendingLocs) { 1909 if (Reg) 1910 It.convertToReg(Reg); 1911 else 1912 It.convertToMem(StackOffset); 1913 State.addLoc(It); 1914 } 1915 PendingLocs.clear(); 1916 PendingArgFlags.clear(); 1917 return false; 1918 } 1919 1920 assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) && 1921 "Expected an XLenVT at this stage"); 1922 1923 if (Reg) { 1924 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1925 return false; 1926 } 1927 1928 // When an f32 or f64 is passed on the stack, no bit-conversion is needed. 1929 if (ValVT == MVT::f32 || ValVT == MVT::f64) { 1930 LocVT = ValVT; 1931 LocInfo = CCValAssign::Full; 1932 } 1933 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 1934 return false; 1935 } 1936 1937 void RISCVTargetLowering::analyzeInputArgs( 1938 MachineFunction &MF, CCState &CCInfo, 1939 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 1940 unsigned NumArgs = Ins.size(); 1941 FunctionType *FType = MF.getFunction().getFunctionType(); 1942 1943 for (unsigned i = 0; i != NumArgs; ++i) { 1944 MVT ArgVT = Ins[i].VT; 1945 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 1946 1947 Type *ArgTy = nullptr; 1948 if (IsRet) 1949 ArgTy = FType->getReturnType(); 1950 else if (Ins[i].isOrigArg()) 1951 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 1952 1953 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 1954 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 1955 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { 1956 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 1957 << EVT(ArgVT).getEVTString() << '\n'); 1958 llvm_unreachable(nullptr); 1959 } 1960 } 1961 } 1962 1963 void RISCVTargetLowering::analyzeOutputArgs( 1964 MachineFunction &MF, CCState &CCInfo, 1965 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 1966 CallLoweringInfo *CLI) const { 1967 unsigned NumArgs = Outs.size(); 1968 1969 for (unsigned i = 0; i != NumArgs; i++) { 1970 MVT ArgVT = Outs[i].VT; 1971 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1972 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 1973 1974 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 1975 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 1976 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 1977 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 1978 << EVT(ArgVT).getEVTString() << "\n"); 1979 llvm_unreachable(nullptr); 1980 } 1981 } 1982 } 1983 1984 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 1985 // values. 1986 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 1987 const CCValAssign &VA, const SDLoc &DL) { 1988 switch (VA.getLocInfo()) { 1989 default: 1990 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1991 case CCValAssign::Full: 1992 break; 1993 case CCValAssign::BCvt: 1994 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 1995 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 1996 break; 1997 } 1998 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 1999 break; 2000 } 2001 return Val; 2002 } 2003 2004 // The caller is responsible for loading the full value if the argument is 2005 // passed with CCValAssign::Indirect. 2006 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 2007 const CCValAssign &VA, const SDLoc &DL) { 2008 MachineFunction &MF = DAG.getMachineFunction(); 2009 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2010 EVT LocVT = VA.getLocVT(); 2011 SDValue Val; 2012 const TargetRegisterClass *RC; 2013 2014 switch (LocVT.getSimpleVT().SimpleTy) { 2015 default: 2016 llvm_unreachable("Unexpected register type"); 2017 case MVT::i32: 2018 case MVT::i64: 2019 RC = &RISCV::GPRRegClass; 2020 break; 2021 case MVT::f32: 2022 RC = &RISCV::FPR32RegClass; 2023 break; 2024 case MVT::f64: 2025 RC = &RISCV::FPR64RegClass; 2026 break; 2027 } 2028 2029 Register VReg = RegInfo.createVirtualRegister(RC); 2030 RegInfo.addLiveIn(VA.getLocReg(), VReg); 2031 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 2032 2033 if (VA.getLocInfo() == CCValAssign::Indirect) 2034 return Val; 2035 2036 return convertLocVTToValVT(DAG, Val, VA, DL); 2037 } 2038 2039 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 2040 const CCValAssign &VA, const SDLoc &DL) { 2041 EVT LocVT = VA.getLocVT(); 2042 2043 switch (VA.getLocInfo()) { 2044 default: 2045 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2046 case CCValAssign::Full: 2047 break; 2048 case CCValAssign::BCvt: 2049 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 2050 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 2051 break; 2052 } 2053 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 2054 break; 2055 } 2056 return Val; 2057 } 2058 2059 // The caller is responsible for loading the full value if the argument is 2060 // passed with CCValAssign::Indirect. 2061 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 2062 const CCValAssign &VA, const SDLoc &DL) { 2063 MachineFunction &MF = DAG.getMachineFunction(); 2064 MachineFrameInfo &MFI = MF.getFrameInfo(); 2065 EVT LocVT = VA.getLocVT(); 2066 EVT ValVT = VA.getValVT(); 2067 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 2068 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 2069 VA.getLocMemOffset(), /*Immutable=*/true); 2070 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2071 SDValue Val; 2072 2073 ISD::LoadExtType ExtType; 2074 switch (VA.getLocInfo()) { 2075 default: 2076 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 2077 case CCValAssign::Full: 2078 case CCValAssign::Indirect: 2079 case CCValAssign::BCvt: 2080 ExtType = ISD::NON_EXTLOAD; 2081 break; 2082 } 2083 Val = DAG.getExtLoad( 2084 ExtType, DL, LocVT, Chain, FIN, 2085 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 2086 return Val; 2087 } 2088 2089 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 2090 const CCValAssign &VA, const SDLoc &DL) { 2091 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 2092 "Unexpected VA"); 2093 MachineFunction &MF = DAG.getMachineFunction(); 2094 MachineFrameInfo &MFI = MF.getFrameInfo(); 2095 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2096 2097 if (VA.isMemLoc()) { 2098 // f64 is passed on the stack. 2099 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 2100 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 2101 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 2102 MachinePointerInfo::getFixedStack(MF, FI)); 2103 } 2104 2105 assert(VA.isRegLoc() && "Expected register VA assignment"); 2106 2107 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 2108 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 2109 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 2110 SDValue Hi; 2111 if (VA.getLocReg() == RISCV::X17) { 2112 // Second half of f64 is passed on the stack. 2113 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 2114 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 2115 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 2116 MachinePointerInfo::getFixedStack(MF, FI)); 2117 } else { 2118 // Second half of f64 is passed in another GPR. 2119 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 2120 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 2121 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 2122 } 2123 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 2124 } 2125 2126 // FastCC has less than 1% performance improvement for some particular 2127 // benchmark. But theoretically, it may has benenfit for some cases. 2128 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 2129 CCValAssign::LocInfo LocInfo, 2130 ISD::ArgFlagsTy ArgFlags, CCState &State) { 2131 2132 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 2133 // X5 and X6 might be used for save-restore libcall. 2134 static const MCPhysReg GPRList[] = { 2135 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 2136 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 2137 RISCV::X29, RISCV::X30, RISCV::X31}; 2138 if (unsigned Reg = State.AllocateReg(GPRList)) { 2139 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2140 return false; 2141 } 2142 } 2143 2144 if (LocVT == MVT::f32) { 2145 static const MCPhysReg FPR32List[] = { 2146 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 2147 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 2148 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 2149 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 2150 if (unsigned Reg = State.AllocateReg(FPR32List)) { 2151 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2152 return false; 2153 } 2154 } 2155 2156 if (LocVT == MVT::f64) { 2157 static const MCPhysReg FPR64List[] = { 2158 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 2159 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 2160 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 2161 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 2162 if (unsigned Reg = State.AllocateReg(FPR64List)) { 2163 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 2164 return false; 2165 } 2166 } 2167 2168 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 2169 unsigned Offset4 = State.AllocateStack(4, Align(4)); 2170 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 2171 return false; 2172 } 2173 2174 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 2175 unsigned Offset5 = State.AllocateStack(8, Align(8)); 2176 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 2177 return false; 2178 } 2179 2180 return true; // CC didn't match. 2181 } 2182 2183 // Transform physical registers into virtual registers. 2184 SDValue RISCVTargetLowering::LowerFormalArguments( 2185 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 2186 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 2187 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2188 2189 switch (CallConv) { 2190 default: 2191 report_fatal_error("Unsupported calling convention"); 2192 case CallingConv::C: 2193 case CallingConv::Fast: 2194 break; 2195 } 2196 2197 MachineFunction &MF = DAG.getMachineFunction(); 2198 2199 const Function &Func = MF.getFunction(); 2200 if (Func.hasFnAttribute("interrupt")) { 2201 if (!Func.arg_empty()) 2202 report_fatal_error( 2203 "Functions with the interrupt attribute cannot have arguments!"); 2204 2205 StringRef Kind = 2206 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 2207 2208 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 2209 report_fatal_error( 2210 "Function interrupt attribute argument not supported!"); 2211 } 2212 2213 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2214 MVT XLenVT = Subtarget.getXLenVT(); 2215 unsigned XLenInBytes = Subtarget.getXLen() / 8; 2216 // Used with vargs to acumulate store chains. 2217 std::vector<SDValue> OutChains; 2218 2219 // Assign locations to all of the incoming arguments. 2220 SmallVector<CCValAssign, 16> ArgLocs; 2221 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 2222 2223 if (CallConv == CallingConv::Fast) 2224 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 2225 else 2226 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 2227 2228 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2229 CCValAssign &VA = ArgLocs[i]; 2230 SDValue ArgValue; 2231 // Passing f64 on RV32D with a soft float ABI must be handled as a special 2232 // case. 2233 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 2234 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 2235 else if (VA.isRegLoc()) 2236 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); 2237 else 2238 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 2239 2240 if (VA.getLocInfo() == CCValAssign::Indirect) { 2241 // If the original argument was split and passed by reference (e.g. i128 2242 // on RV32), we need to load all parts of it here (using the same 2243 // address). 2244 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 2245 MachinePointerInfo())); 2246 unsigned ArgIndex = Ins[i].OrigArgIndex; 2247 assert(Ins[i].PartOffset == 0); 2248 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 2249 CCValAssign &PartVA = ArgLocs[i + 1]; 2250 unsigned PartOffset = Ins[i + 1].PartOffset; 2251 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 2252 DAG.getIntPtrConstant(PartOffset, DL)); 2253 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 2254 MachinePointerInfo())); 2255 ++i; 2256 } 2257 continue; 2258 } 2259 InVals.push_back(ArgValue); 2260 } 2261 2262 if (IsVarArg) { 2263 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 2264 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 2265 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 2266 MachineFrameInfo &MFI = MF.getFrameInfo(); 2267 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2268 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 2269 2270 // Offset of the first variable argument from stack pointer, and size of 2271 // the vararg save area. For now, the varargs save area is either zero or 2272 // large enough to hold a0-a7. 2273 int VaArgOffset, VarArgsSaveSize; 2274 2275 // If all registers are allocated, then all varargs must be passed on the 2276 // stack and we don't need to save any argregs. 2277 if (ArgRegs.size() == Idx) { 2278 VaArgOffset = CCInfo.getNextStackOffset(); 2279 VarArgsSaveSize = 0; 2280 } else { 2281 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 2282 VaArgOffset = -VarArgsSaveSize; 2283 } 2284 2285 // Record the frame index of the first variable argument 2286 // which is a value necessary to VASTART. 2287 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 2288 RVFI->setVarArgsFrameIndex(FI); 2289 2290 // If saving an odd number of registers then create an extra stack slot to 2291 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 2292 // offsets to even-numbered registered remain 2*XLEN-aligned. 2293 if (Idx % 2) { 2294 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 2295 VarArgsSaveSize += XLenInBytes; 2296 } 2297 2298 // Copy the integer registers that may have been used for passing varargs 2299 // to the vararg save area. 2300 for (unsigned I = Idx; I < ArgRegs.size(); 2301 ++I, VaArgOffset += XLenInBytes) { 2302 const Register Reg = RegInfo.createVirtualRegister(RC); 2303 RegInfo.addLiveIn(ArgRegs[I], Reg); 2304 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 2305 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 2306 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2307 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 2308 MachinePointerInfo::getFixedStack(MF, FI)); 2309 cast<StoreSDNode>(Store.getNode()) 2310 ->getMemOperand() 2311 ->setValue((Value *)nullptr); 2312 OutChains.push_back(Store); 2313 } 2314 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 2315 } 2316 2317 // All stores are grouped in one node to allow the matching between 2318 // the size of Ins and InVals. This only happens for vararg functions. 2319 if (!OutChains.empty()) { 2320 OutChains.push_back(Chain); 2321 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 2322 } 2323 2324 return Chain; 2325 } 2326 2327 /// isEligibleForTailCallOptimization - Check whether the call is eligible 2328 /// for tail call optimization. 2329 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 2330 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 2331 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 2332 const SmallVector<CCValAssign, 16> &ArgLocs) const { 2333 2334 auto &Callee = CLI.Callee; 2335 auto CalleeCC = CLI.CallConv; 2336 auto &Outs = CLI.Outs; 2337 auto &Caller = MF.getFunction(); 2338 auto CallerCC = Caller.getCallingConv(); 2339 2340 // Exception-handling functions need a special set of instructions to 2341 // indicate a return to the hardware. Tail-calling another function would 2342 // probably break this. 2343 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 2344 // should be expanded as new function attributes are introduced. 2345 if (Caller.hasFnAttribute("interrupt")) 2346 return false; 2347 2348 // Do not tail call opt if the stack is used to pass parameters. 2349 if (CCInfo.getNextStackOffset() != 0) 2350 return false; 2351 2352 // Do not tail call opt if any parameters need to be passed indirectly. 2353 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 2354 // passed indirectly. So the address of the value will be passed in a 2355 // register, or if not available, then the address is put on the stack. In 2356 // order to pass indirectly, space on the stack often needs to be allocated 2357 // in order to store the value. In this case the CCInfo.getNextStackOffset() 2358 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 2359 // are passed CCValAssign::Indirect. 2360 for (auto &VA : ArgLocs) 2361 if (VA.getLocInfo() == CCValAssign::Indirect) 2362 return false; 2363 2364 // Do not tail call opt if either caller or callee uses struct return 2365 // semantics. 2366 auto IsCallerStructRet = Caller.hasStructRetAttr(); 2367 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 2368 if (IsCallerStructRet || IsCalleeStructRet) 2369 return false; 2370 2371 // Externally-defined functions with weak linkage should not be 2372 // tail-called. The behaviour of branch instructions in this situation (as 2373 // used for tail calls) is implementation-defined, so we cannot rely on the 2374 // linker replacing the tail call with a return. 2375 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2376 const GlobalValue *GV = G->getGlobal(); 2377 if (GV->hasExternalWeakLinkage()) 2378 return false; 2379 } 2380 2381 // The callee has to preserve all registers the caller needs to preserve. 2382 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2383 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 2384 if (CalleeCC != CallerCC) { 2385 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 2386 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 2387 return false; 2388 } 2389 2390 // Byval parameters hand the function a pointer directly into the stack area 2391 // we want to reuse during a tail call. Working around this *is* possible 2392 // but less efficient and uglier in LowerCall. 2393 for (auto &Arg : Outs) 2394 if (Arg.Flags.isByVal()) 2395 return false; 2396 2397 return true; 2398 } 2399 2400 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 2401 // and output parameter nodes. 2402 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 2403 SmallVectorImpl<SDValue> &InVals) const { 2404 SelectionDAG &DAG = CLI.DAG; 2405 SDLoc &DL = CLI.DL; 2406 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 2407 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 2408 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 2409 SDValue Chain = CLI.Chain; 2410 SDValue Callee = CLI.Callee; 2411 bool &IsTailCall = CLI.IsTailCall; 2412 CallingConv::ID CallConv = CLI.CallConv; 2413 bool IsVarArg = CLI.IsVarArg; 2414 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2415 MVT XLenVT = Subtarget.getXLenVT(); 2416 2417 MachineFunction &MF = DAG.getMachineFunction(); 2418 2419 // Analyze the operands of the call, assigning locations to each operand. 2420 SmallVector<CCValAssign, 16> ArgLocs; 2421 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 2422 2423 if (CallConv == CallingConv::Fast) 2424 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 2425 else 2426 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 2427 2428 // Check if it's really possible to do a tail call. 2429 if (IsTailCall) 2430 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 2431 2432 if (IsTailCall) 2433 ++NumTailCalls; 2434 else if (CLI.CB && CLI.CB->isMustTailCall()) 2435 report_fatal_error("failed to perform tail call elimination on a call " 2436 "site marked musttail"); 2437 2438 // Get a count of how many bytes are to be pushed on the stack. 2439 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 2440 2441 // Create local copies for byval args 2442 SmallVector<SDValue, 8> ByValArgs; 2443 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2444 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2445 if (!Flags.isByVal()) 2446 continue; 2447 2448 SDValue Arg = OutVals[i]; 2449 unsigned Size = Flags.getByValSize(); 2450 Align Alignment = Flags.getNonZeroByValAlign(); 2451 2452 int FI = 2453 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 2454 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2455 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 2456 2457 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 2458 /*IsVolatile=*/false, 2459 /*AlwaysInline=*/false, IsTailCall, 2460 MachinePointerInfo(), MachinePointerInfo()); 2461 ByValArgs.push_back(FIPtr); 2462 } 2463 2464 if (!IsTailCall) 2465 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 2466 2467 // Copy argument values to their designated locations. 2468 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 2469 SmallVector<SDValue, 8> MemOpChains; 2470 SDValue StackPtr; 2471 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 2472 CCValAssign &VA = ArgLocs[i]; 2473 SDValue ArgValue = OutVals[i]; 2474 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2475 2476 // Handle passing f64 on RV32D with a soft float ABI as a special case. 2477 bool IsF64OnRV32DSoftABI = 2478 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 2479 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 2480 SDValue SplitF64 = DAG.getNode( 2481 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 2482 SDValue Lo = SplitF64.getValue(0); 2483 SDValue Hi = SplitF64.getValue(1); 2484 2485 Register RegLo = VA.getLocReg(); 2486 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 2487 2488 if (RegLo == RISCV::X17) { 2489 // Second half of f64 is passed on the stack. 2490 // Work out the address of the stack slot. 2491 if (!StackPtr.getNode()) 2492 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 2493 // Emit the store. 2494 MemOpChains.push_back( 2495 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 2496 } else { 2497 // Second half of f64 is passed in another GPR. 2498 assert(RegLo < RISCV::X31 && "Invalid register pair"); 2499 Register RegHigh = RegLo + 1; 2500 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 2501 } 2502 continue; 2503 } 2504 2505 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 2506 // as any other MemLoc. 2507 2508 // Promote the value if needed. 2509 // For now, only handle fully promoted and indirect arguments. 2510 if (VA.getLocInfo() == CCValAssign::Indirect) { 2511 // Store the argument in a stack slot and pass its address. 2512 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 2513 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 2514 MemOpChains.push_back( 2515 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 2516 MachinePointerInfo::getFixedStack(MF, FI))); 2517 // If the original argument was split (e.g. i128), we need 2518 // to store all parts of it here (and pass just one address). 2519 unsigned ArgIndex = Outs[i].OrigArgIndex; 2520 assert(Outs[i].PartOffset == 0); 2521 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 2522 SDValue PartValue = OutVals[i + 1]; 2523 unsigned PartOffset = Outs[i + 1].PartOffset; 2524 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 2525 DAG.getIntPtrConstant(PartOffset, DL)); 2526 MemOpChains.push_back( 2527 DAG.getStore(Chain, DL, PartValue, Address, 2528 MachinePointerInfo::getFixedStack(MF, FI))); 2529 ++i; 2530 } 2531 ArgValue = SpillSlot; 2532 } else { 2533 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 2534 } 2535 2536 // Use local copy if it is a byval arg. 2537 if (Flags.isByVal()) 2538 ArgValue = ByValArgs[j++]; 2539 2540 if (VA.isRegLoc()) { 2541 // Queue up the argument copies and emit them at the end. 2542 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 2543 } else { 2544 assert(VA.isMemLoc() && "Argument not register or memory"); 2545 assert(!IsTailCall && "Tail call not allowed if stack is used " 2546 "for passing parameters"); 2547 2548 // Work out the address of the stack slot. 2549 if (!StackPtr.getNode()) 2550 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 2551 SDValue Address = 2552 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 2553 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 2554 2555 // Emit the store. 2556 MemOpChains.push_back( 2557 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 2558 } 2559 } 2560 2561 // Join the stores, which are independent of one another. 2562 if (!MemOpChains.empty()) 2563 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 2564 2565 SDValue Glue; 2566 2567 // Build a sequence of copy-to-reg nodes, chained and glued together. 2568 for (auto &Reg : RegsToPass) { 2569 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 2570 Glue = Chain.getValue(1); 2571 } 2572 2573 // Validate that none of the argument registers have been marked as 2574 // reserved, if so report an error. Do the same for the return address if this 2575 // is not a tailcall. 2576 validateCCReservedRegs(RegsToPass, MF); 2577 if (!IsTailCall && 2578 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 2579 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 2580 MF.getFunction(), 2581 "Return address register required, but has been reserved."}); 2582 2583 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 2584 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 2585 // split it and then direct call can be matched by PseudoCALL. 2586 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 2587 const GlobalValue *GV = S->getGlobal(); 2588 2589 unsigned OpFlags = RISCVII::MO_CALL; 2590 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 2591 OpFlags = RISCVII::MO_PLT; 2592 2593 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 2594 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2595 unsigned OpFlags = RISCVII::MO_CALL; 2596 2597 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 2598 nullptr)) 2599 OpFlags = RISCVII::MO_PLT; 2600 2601 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 2602 } 2603 2604 // The first call operand is the chain and the second is the target address. 2605 SmallVector<SDValue, 8> Ops; 2606 Ops.push_back(Chain); 2607 Ops.push_back(Callee); 2608 2609 // Add argument registers to the end of the list so that they are 2610 // known live into the call. 2611 for (auto &Reg : RegsToPass) 2612 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 2613 2614 if (!IsTailCall) { 2615 // Add a register mask operand representing the call-preserved registers. 2616 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2617 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 2618 assert(Mask && "Missing call preserved mask for calling convention"); 2619 Ops.push_back(DAG.getRegisterMask(Mask)); 2620 } 2621 2622 // Glue the call to the argument copies, if any. 2623 if (Glue.getNode()) 2624 Ops.push_back(Glue); 2625 2626 // Emit the call. 2627 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 2628 2629 if (IsTailCall) { 2630 MF.getFrameInfo().setHasTailCall(); 2631 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 2632 } 2633 2634 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 2635 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 2636 Glue = Chain.getValue(1); 2637 2638 // Mark the end of the call, which is glued to the call itself. 2639 Chain = DAG.getCALLSEQ_END(Chain, 2640 DAG.getConstant(NumBytes, DL, PtrVT, true), 2641 DAG.getConstant(0, DL, PtrVT, true), 2642 Glue, DL); 2643 Glue = Chain.getValue(1); 2644 2645 // Assign locations to each value returned by this call. 2646 SmallVector<CCValAssign, 16> RVLocs; 2647 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 2648 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 2649 2650 // Copy all of the result registers out of their specified physreg. 2651 for (auto &VA : RVLocs) { 2652 // Copy the value out 2653 SDValue RetValue = 2654 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 2655 // Glue the RetValue to the end of the call sequence 2656 Chain = RetValue.getValue(1); 2657 Glue = RetValue.getValue(2); 2658 2659 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 2660 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 2661 SDValue RetValue2 = 2662 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 2663 Chain = RetValue2.getValue(1); 2664 Glue = RetValue2.getValue(2); 2665 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 2666 RetValue2); 2667 } 2668 2669 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 2670 2671 InVals.push_back(RetValue); 2672 } 2673 2674 return Chain; 2675 } 2676 2677 bool RISCVTargetLowering::CanLowerReturn( 2678 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 2679 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 2680 SmallVector<CCValAssign, 16> RVLocs; 2681 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 2682 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2683 MVT VT = Outs[i].VT; 2684 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 2685 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 2686 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 2687 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) 2688 return false; 2689 } 2690 return true; 2691 } 2692 2693 SDValue 2694 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2695 bool IsVarArg, 2696 const SmallVectorImpl<ISD::OutputArg> &Outs, 2697 const SmallVectorImpl<SDValue> &OutVals, 2698 const SDLoc &DL, SelectionDAG &DAG) const { 2699 const MachineFunction &MF = DAG.getMachineFunction(); 2700 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 2701 2702 // Stores the assignment of the return value to a location. 2703 SmallVector<CCValAssign, 16> RVLocs; 2704 2705 // Info about the registers and stack slot. 2706 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 2707 *DAG.getContext()); 2708 2709 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 2710 nullptr); 2711 2712 SDValue Glue; 2713 SmallVector<SDValue, 4> RetOps(1, Chain); 2714 2715 // Copy the result values into the output registers. 2716 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 2717 SDValue Val = OutVals[i]; 2718 CCValAssign &VA = RVLocs[i]; 2719 assert(VA.isRegLoc() && "Can only return in registers!"); 2720 2721 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 2722 // Handle returning f64 on RV32D with a soft float ABI. 2723 assert(VA.isRegLoc() && "Expected return via registers"); 2724 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 2725 DAG.getVTList(MVT::i32, MVT::i32), Val); 2726 SDValue Lo = SplitF64.getValue(0); 2727 SDValue Hi = SplitF64.getValue(1); 2728 Register RegLo = VA.getLocReg(); 2729 assert(RegLo < RISCV::X31 && "Invalid register pair"); 2730 Register RegHi = RegLo + 1; 2731 2732 if (STI.isRegisterReservedByUser(RegLo) || 2733 STI.isRegisterReservedByUser(RegHi)) 2734 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 2735 MF.getFunction(), 2736 "Return value register required, but has been reserved."}); 2737 2738 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 2739 Glue = Chain.getValue(1); 2740 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 2741 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 2742 Glue = Chain.getValue(1); 2743 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 2744 } else { 2745 // Handle a 'normal' return. 2746 Val = convertValVTToLocVT(DAG, Val, VA, DL); 2747 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 2748 2749 if (STI.isRegisterReservedByUser(VA.getLocReg())) 2750 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 2751 MF.getFunction(), 2752 "Return value register required, but has been reserved."}); 2753 2754 // Guarantee that all emitted copies are stuck together. 2755 Glue = Chain.getValue(1); 2756 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2757 } 2758 } 2759 2760 RetOps[0] = Chain; // Update chain. 2761 2762 // Add the glue node if we have it. 2763 if (Glue.getNode()) { 2764 RetOps.push_back(Glue); 2765 } 2766 2767 // Interrupt service routines use different return instructions. 2768 const Function &Func = DAG.getMachineFunction().getFunction(); 2769 if (Func.hasFnAttribute("interrupt")) { 2770 if (!Func.getReturnType()->isVoidTy()) 2771 report_fatal_error( 2772 "Functions with the interrupt attribute must have void return type!"); 2773 2774 MachineFunction &MF = DAG.getMachineFunction(); 2775 StringRef Kind = 2776 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 2777 2778 unsigned RetOpc; 2779 if (Kind == "user") 2780 RetOpc = RISCVISD::URET_FLAG; 2781 else if (Kind == "supervisor") 2782 RetOpc = RISCVISD::SRET_FLAG; 2783 else 2784 RetOpc = RISCVISD::MRET_FLAG; 2785 2786 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 2787 } 2788 2789 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 2790 } 2791 2792 void RISCVTargetLowering::validateCCReservedRegs( 2793 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 2794 MachineFunction &MF) const { 2795 const Function &F = MF.getFunction(); 2796 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 2797 2798 if (std::any_of(std::begin(Regs), std::end(Regs), [&STI](auto Reg) { 2799 return STI.isRegisterReservedByUser(Reg.first); 2800 })) 2801 F.getContext().diagnose(DiagnosticInfoUnsupported{ 2802 F, "Argument register required, but has been reserved."}); 2803 } 2804 2805 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 2806 return CI->isTailCall(); 2807 } 2808 2809 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 2810 #define NODE_NAME_CASE(NODE) \ 2811 case RISCVISD::NODE: \ 2812 return "RISCVISD::" #NODE; 2813 // clang-format off 2814 switch ((RISCVISD::NodeType)Opcode) { 2815 case RISCVISD::FIRST_NUMBER: 2816 break; 2817 NODE_NAME_CASE(RET_FLAG) 2818 NODE_NAME_CASE(URET_FLAG) 2819 NODE_NAME_CASE(SRET_FLAG) 2820 NODE_NAME_CASE(MRET_FLAG) 2821 NODE_NAME_CASE(CALL) 2822 NODE_NAME_CASE(SELECT_CC) 2823 NODE_NAME_CASE(BuildPairF64) 2824 NODE_NAME_CASE(SplitF64) 2825 NODE_NAME_CASE(TAIL) 2826 NODE_NAME_CASE(SLLW) 2827 NODE_NAME_CASE(SRAW) 2828 NODE_NAME_CASE(SRLW) 2829 NODE_NAME_CASE(DIVW) 2830 NODE_NAME_CASE(DIVUW) 2831 NODE_NAME_CASE(REMUW) 2832 NODE_NAME_CASE(FMV_W_X_RV64) 2833 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 2834 NODE_NAME_CASE(READ_CYCLE_WIDE) 2835 NODE_NAME_CASE(GREVI) 2836 NODE_NAME_CASE(GREVIW) 2837 NODE_NAME_CASE(GORCI) 2838 NODE_NAME_CASE(GORCIW) 2839 } 2840 // clang-format on 2841 return nullptr; 2842 #undef NODE_NAME_CASE 2843 } 2844 2845 /// getConstraintType - Given a constraint letter, return the type of 2846 /// constraint it is for this target. 2847 RISCVTargetLowering::ConstraintType 2848 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 2849 if (Constraint.size() == 1) { 2850 switch (Constraint[0]) { 2851 default: 2852 break; 2853 case 'f': 2854 return C_RegisterClass; 2855 case 'I': 2856 case 'J': 2857 case 'K': 2858 return C_Immediate; 2859 case 'A': 2860 return C_Memory; 2861 } 2862 } 2863 return TargetLowering::getConstraintType(Constraint); 2864 } 2865 2866 std::pair<unsigned, const TargetRegisterClass *> 2867 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 2868 StringRef Constraint, 2869 MVT VT) const { 2870 // First, see if this is a constraint that directly corresponds to a 2871 // RISCV register class. 2872 if (Constraint.size() == 1) { 2873 switch (Constraint[0]) { 2874 case 'r': 2875 return std::make_pair(0U, &RISCV::GPRRegClass); 2876 case 'f': 2877 if (Subtarget.hasStdExtF() && VT == MVT::f32) 2878 return std::make_pair(0U, &RISCV::FPR32RegClass); 2879 if (Subtarget.hasStdExtD() && VT == MVT::f64) 2880 return std::make_pair(0U, &RISCV::FPR64RegClass); 2881 break; 2882 default: 2883 break; 2884 } 2885 } 2886 2887 // Clang will correctly decode the usage of register name aliases into their 2888 // official names. However, other frontends like `rustc` do not. This allows 2889 // users of these frontends to use the ABI names for registers in LLVM-style 2890 // register constraints. 2891 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 2892 .Case("{zero}", RISCV::X0) 2893 .Case("{ra}", RISCV::X1) 2894 .Case("{sp}", RISCV::X2) 2895 .Case("{gp}", RISCV::X3) 2896 .Case("{tp}", RISCV::X4) 2897 .Case("{t0}", RISCV::X5) 2898 .Case("{t1}", RISCV::X6) 2899 .Case("{t2}", RISCV::X7) 2900 .Cases("{s0}", "{fp}", RISCV::X8) 2901 .Case("{s1}", RISCV::X9) 2902 .Case("{a0}", RISCV::X10) 2903 .Case("{a1}", RISCV::X11) 2904 .Case("{a2}", RISCV::X12) 2905 .Case("{a3}", RISCV::X13) 2906 .Case("{a4}", RISCV::X14) 2907 .Case("{a5}", RISCV::X15) 2908 .Case("{a6}", RISCV::X16) 2909 .Case("{a7}", RISCV::X17) 2910 .Case("{s2}", RISCV::X18) 2911 .Case("{s3}", RISCV::X19) 2912 .Case("{s4}", RISCV::X20) 2913 .Case("{s5}", RISCV::X21) 2914 .Case("{s6}", RISCV::X22) 2915 .Case("{s7}", RISCV::X23) 2916 .Case("{s8}", RISCV::X24) 2917 .Case("{s9}", RISCV::X25) 2918 .Case("{s10}", RISCV::X26) 2919 .Case("{s11}", RISCV::X27) 2920 .Case("{t3}", RISCV::X28) 2921 .Case("{t4}", RISCV::X29) 2922 .Case("{t5}", RISCV::X30) 2923 .Case("{t6}", RISCV::X31) 2924 .Default(RISCV::NoRegister); 2925 if (XRegFromAlias != RISCV::NoRegister) 2926 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 2927 2928 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 2929 // TableGen record rather than the AsmName to choose registers for InlineAsm 2930 // constraints, plus we want to match those names to the widest floating point 2931 // register type available, manually select floating point registers here. 2932 // 2933 // The second case is the ABI name of the register, so that frontends can also 2934 // use the ABI names in register constraint lists. 2935 if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) { 2936 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 2937 .Cases("{f0}", "{ft0}", RISCV::F0_F) 2938 .Cases("{f1}", "{ft1}", RISCV::F1_F) 2939 .Cases("{f2}", "{ft2}", RISCV::F2_F) 2940 .Cases("{f3}", "{ft3}", RISCV::F3_F) 2941 .Cases("{f4}", "{ft4}", RISCV::F4_F) 2942 .Cases("{f5}", "{ft5}", RISCV::F5_F) 2943 .Cases("{f6}", "{ft6}", RISCV::F6_F) 2944 .Cases("{f7}", "{ft7}", RISCV::F7_F) 2945 .Cases("{f8}", "{fs0}", RISCV::F8_F) 2946 .Cases("{f9}", "{fs1}", RISCV::F9_F) 2947 .Cases("{f10}", "{fa0}", RISCV::F10_F) 2948 .Cases("{f11}", "{fa1}", RISCV::F11_F) 2949 .Cases("{f12}", "{fa2}", RISCV::F12_F) 2950 .Cases("{f13}", "{fa3}", RISCV::F13_F) 2951 .Cases("{f14}", "{fa4}", RISCV::F14_F) 2952 .Cases("{f15}", "{fa5}", RISCV::F15_F) 2953 .Cases("{f16}", "{fa6}", RISCV::F16_F) 2954 .Cases("{f17}", "{fa7}", RISCV::F17_F) 2955 .Cases("{f18}", "{fs2}", RISCV::F18_F) 2956 .Cases("{f19}", "{fs3}", RISCV::F19_F) 2957 .Cases("{f20}", "{fs4}", RISCV::F20_F) 2958 .Cases("{f21}", "{fs5}", RISCV::F21_F) 2959 .Cases("{f22}", "{fs6}", RISCV::F22_F) 2960 .Cases("{f23}", "{fs7}", RISCV::F23_F) 2961 .Cases("{f24}", "{fs8}", RISCV::F24_F) 2962 .Cases("{f25}", "{fs9}", RISCV::F25_F) 2963 .Cases("{f26}", "{fs10}", RISCV::F26_F) 2964 .Cases("{f27}", "{fs11}", RISCV::F27_F) 2965 .Cases("{f28}", "{ft8}", RISCV::F28_F) 2966 .Cases("{f29}", "{ft9}", RISCV::F29_F) 2967 .Cases("{f30}", "{ft10}", RISCV::F30_F) 2968 .Cases("{f31}", "{ft11}", RISCV::F31_F) 2969 .Default(RISCV::NoRegister); 2970 if (FReg != RISCV::NoRegister) { 2971 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 2972 if (Subtarget.hasStdExtD()) { 2973 unsigned RegNo = FReg - RISCV::F0_F; 2974 unsigned DReg = RISCV::F0_D + RegNo; 2975 return std::make_pair(DReg, &RISCV::FPR64RegClass); 2976 } 2977 return std::make_pair(FReg, &RISCV::FPR32RegClass); 2978 } 2979 } 2980 2981 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 2982 } 2983 2984 unsigned 2985 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 2986 // Currently only support length 1 constraints. 2987 if (ConstraintCode.size() == 1) { 2988 switch (ConstraintCode[0]) { 2989 case 'A': 2990 return InlineAsm::Constraint_A; 2991 default: 2992 break; 2993 } 2994 } 2995 2996 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 2997 } 2998 2999 void RISCVTargetLowering::LowerAsmOperandForConstraint( 3000 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 3001 SelectionDAG &DAG) const { 3002 // Currently only support length 1 constraints. 3003 if (Constraint.length() == 1) { 3004 switch (Constraint[0]) { 3005 case 'I': 3006 // Validate & create a 12-bit signed immediate operand. 3007 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3008 uint64_t CVal = C->getSExtValue(); 3009 if (isInt<12>(CVal)) 3010 Ops.push_back( 3011 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 3012 } 3013 return; 3014 case 'J': 3015 // Validate & create an integer zero operand. 3016 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 3017 if (C->getZExtValue() == 0) 3018 Ops.push_back( 3019 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 3020 return; 3021 case 'K': 3022 // Validate & create a 5-bit unsigned immediate operand. 3023 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3024 uint64_t CVal = C->getZExtValue(); 3025 if (isUInt<5>(CVal)) 3026 Ops.push_back( 3027 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 3028 } 3029 return; 3030 default: 3031 break; 3032 } 3033 } 3034 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 3035 } 3036 3037 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 3038 Instruction *Inst, 3039 AtomicOrdering Ord) const { 3040 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 3041 return Builder.CreateFence(Ord); 3042 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 3043 return Builder.CreateFence(AtomicOrdering::Release); 3044 return nullptr; 3045 } 3046 3047 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 3048 Instruction *Inst, 3049 AtomicOrdering Ord) const { 3050 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 3051 return Builder.CreateFence(AtomicOrdering::Acquire); 3052 return nullptr; 3053 } 3054 3055 TargetLowering::AtomicExpansionKind 3056 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 3057 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 3058 // point operations can't be used in an lr/sc sequence without breaking the 3059 // forward-progress guarantee. 3060 if (AI->isFloatingPointOperation()) 3061 return AtomicExpansionKind::CmpXChg; 3062 3063 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 3064 if (Size == 8 || Size == 16) 3065 return AtomicExpansionKind::MaskedIntrinsic; 3066 return AtomicExpansionKind::None; 3067 } 3068 3069 static Intrinsic::ID 3070 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 3071 if (XLen == 32) { 3072 switch (BinOp) { 3073 default: 3074 llvm_unreachable("Unexpected AtomicRMW BinOp"); 3075 case AtomicRMWInst::Xchg: 3076 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 3077 case AtomicRMWInst::Add: 3078 return Intrinsic::riscv_masked_atomicrmw_add_i32; 3079 case AtomicRMWInst::Sub: 3080 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 3081 case AtomicRMWInst::Nand: 3082 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 3083 case AtomicRMWInst::Max: 3084 return Intrinsic::riscv_masked_atomicrmw_max_i32; 3085 case AtomicRMWInst::Min: 3086 return Intrinsic::riscv_masked_atomicrmw_min_i32; 3087 case AtomicRMWInst::UMax: 3088 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 3089 case AtomicRMWInst::UMin: 3090 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 3091 } 3092 } 3093 3094 if (XLen == 64) { 3095 switch (BinOp) { 3096 default: 3097 llvm_unreachable("Unexpected AtomicRMW BinOp"); 3098 case AtomicRMWInst::Xchg: 3099 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 3100 case AtomicRMWInst::Add: 3101 return Intrinsic::riscv_masked_atomicrmw_add_i64; 3102 case AtomicRMWInst::Sub: 3103 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 3104 case AtomicRMWInst::Nand: 3105 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 3106 case AtomicRMWInst::Max: 3107 return Intrinsic::riscv_masked_atomicrmw_max_i64; 3108 case AtomicRMWInst::Min: 3109 return Intrinsic::riscv_masked_atomicrmw_min_i64; 3110 case AtomicRMWInst::UMax: 3111 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 3112 case AtomicRMWInst::UMin: 3113 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 3114 } 3115 } 3116 3117 llvm_unreachable("Unexpected XLen\n"); 3118 } 3119 3120 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 3121 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 3122 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 3123 unsigned XLen = Subtarget.getXLen(); 3124 Value *Ordering = 3125 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 3126 Type *Tys[] = {AlignedAddr->getType()}; 3127 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 3128 AI->getModule(), 3129 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 3130 3131 if (XLen == 64) { 3132 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 3133 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 3134 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 3135 } 3136 3137 Value *Result; 3138 3139 // Must pass the shift amount needed to sign extend the loaded value prior 3140 // to performing a signed comparison for min/max. ShiftAmt is the number of 3141 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 3142 // is the number of bits to left+right shift the value in order to 3143 // sign-extend. 3144 if (AI->getOperation() == AtomicRMWInst::Min || 3145 AI->getOperation() == AtomicRMWInst::Max) { 3146 const DataLayout &DL = AI->getModule()->getDataLayout(); 3147 unsigned ValWidth = 3148 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 3149 Value *SextShamt = 3150 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 3151 Result = Builder.CreateCall(LrwOpScwLoop, 3152 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 3153 } else { 3154 Result = 3155 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 3156 } 3157 3158 if (XLen == 64) 3159 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 3160 return Result; 3161 } 3162 3163 TargetLowering::AtomicExpansionKind 3164 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 3165 AtomicCmpXchgInst *CI) const { 3166 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 3167 if (Size == 8 || Size == 16) 3168 return AtomicExpansionKind::MaskedIntrinsic; 3169 return AtomicExpansionKind::None; 3170 } 3171 3172 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 3173 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 3174 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 3175 unsigned XLen = Subtarget.getXLen(); 3176 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 3177 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 3178 if (XLen == 64) { 3179 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 3180 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 3181 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 3182 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 3183 } 3184 Type *Tys[] = {AlignedAddr->getType()}; 3185 Function *MaskedCmpXchg = 3186 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 3187 Value *Result = Builder.CreateCall( 3188 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 3189 if (XLen == 64) 3190 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 3191 return Result; 3192 } 3193 3194 Register RISCVTargetLowering::getExceptionPointerRegister( 3195 const Constant *PersonalityFn) const { 3196 return RISCV::X10; 3197 } 3198 3199 Register RISCVTargetLowering::getExceptionSelectorRegister( 3200 const Constant *PersonalityFn) const { 3201 return RISCV::X11; 3202 } 3203 3204 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 3205 // Return false to suppress the unnecessary extensions if the LibCall 3206 // arguments or return value is f32 type for LP64 ABI. 3207 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 3208 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 3209 return false; 3210 3211 return true; 3212 } 3213 3214 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 3215 SDValue C) const { 3216 // Check integral scalar types. 3217 if (VT.isScalarInteger()) { 3218 // Do not perform the transformation on riscv32 with the M extension. 3219 if (!Subtarget.is64Bit() && Subtarget.hasStdExtM()) 3220 return false; 3221 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 3222 if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t)) 3223 return false; 3224 int64_t Imm = ConstNode->getSExtValue(); 3225 if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) || 3226 isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm)) 3227 return true; 3228 } 3229 } 3230 3231 return false; 3232 } 3233 3234 #define GET_REGISTER_MATCHER 3235 #include "RISCVGenAsmMatcher.inc" 3236 3237 Register 3238 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 3239 const MachineFunction &MF) const { 3240 Register Reg = MatchRegisterAltName(RegName); 3241 if (Reg == RISCV::NoRegister) 3242 Reg = MatchRegisterName(RegName); 3243 if (Reg == RISCV::NoRegister) 3244 report_fatal_error( 3245 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 3246 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 3247 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 3248 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 3249 StringRef(RegName) + "\".")); 3250 return Reg; 3251 } 3252