1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVRegisterInfo.h" 18 #include "RISCVSubtarget.h" 19 #include "RISCVTargetMachine.h" 20 #include "llvm/ADT/SmallSet.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/SelectionDAGISel.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/raw_ostream.h" 35 36 using namespace llvm; 37 38 #define DEBUG_TYPE "riscv-lower" 39 40 STATISTIC(NumTailCalls, "Number of tail calls"); 41 42 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 43 const RISCVSubtarget &STI) 44 : TargetLowering(TM), Subtarget(STI) { 45 46 if (Subtarget.isRV32E()) 47 report_fatal_error("Codegen not yet implemented for RV32E"); 48 49 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 50 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 51 52 if (ABI != RISCVABI::ABI_ILP32 && ABI != RISCVABI::ABI_LP64) 53 report_fatal_error("Don't know how to lower this ABI"); 54 55 MVT XLenVT = Subtarget.getXLenVT(); 56 57 // Set up the register classes. 58 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 59 60 if (Subtarget.hasStdExtF()) 61 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 62 if (Subtarget.hasStdExtD()) 63 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 64 65 // Compute derived properties from the register classes. 66 computeRegisterProperties(STI.getRegisterInfo()); 67 68 setStackPointerRegisterToSaveRestore(RISCV::X2); 69 70 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 71 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 72 73 // TODO: add all necessary setOperationAction calls. 74 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 75 76 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 77 setOperationAction(ISD::BR_CC, XLenVT, Expand); 78 setOperationAction(ISD::SELECT, XLenVT, Custom); 79 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 80 81 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 82 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 83 84 setOperationAction(ISD::VASTART, MVT::Other, Custom); 85 setOperationAction(ISD::VAARG, MVT::Other, Expand); 86 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 87 setOperationAction(ISD::VAEND, MVT::Other, Expand); 88 89 for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) 90 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 91 92 if (Subtarget.is64Bit()) { 93 setOperationAction(ISD::SHL, MVT::i32, Custom); 94 setOperationAction(ISD::SRA, MVT::i32, Custom); 95 setOperationAction(ISD::SRL, MVT::i32, Custom); 96 } 97 98 if (!Subtarget.hasStdExtM()) { 99 setOperationAction(ISD::MUL, XLenVT, Expand); 100 setOperationAction(ISD::MULHS, XLenVT, Expand); 101 setOperationAction(ISD::MULHU, XLenVT, Expand); 102 setOperationAction(ISD::SDIV, XLenVT, Expand); 103 setOperationAction(ISD::UDIV, XLenVT, Expand); 104 setOperationAction(ISD::SREM, XLenVT, Expand); 105 setOperationAction(ISD::UREM, XLenVT, Expand); 106 } 107 108 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 109 setOperationAction(ISD::SDIV, MVT::i32, Custom); 110 setOperationAction(ISD::UDIV, MVT::i32, Custom); 111 setOperationAction(ISD::UREM, MVT::i32, Custom); 112 } 113 114 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 115 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 116 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 117 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 118 119 setOperationAction(ISD::SHL_PARTS, XLenVT, Expand); 120 setOperationAction(ISD::SRL_PARTS, XLenVT, Expand); 121 setOperationAction(ISD::SRA_PARTS, XLenVT, Expand); 122 123 setOperationAction(ISD::ROTL, XLenVT, Expand); 124 setOperationAction(ISD::ROTR, XLenVT, Expand); 125 setOperationAction(ISD::BSWAP, XLenVT, Expand); 126 setOperationAction(ISD::CTTZ, XLenVT, Expand); 127 setOperationAction(ISD::CTLZ, XLenVT, Expand); 128 setOperationAction(ISD::CTPOP, XLenVT, Expand); 129 130 ISD::CondCode FPCCToExtend[] = { 131 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ, 132 ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, 133 ISD::SETGT, ISD::SETGE, ISD::SETNE}; 134 135 ISD::NodeType FPOpToExtend[] = { 136 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM}; 137 138 if (Subtarget.hasStdExtF()) { 139 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 140 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 141 for (auto CC : FPCCToExtend) 142 setCondCodeAction(CC, MVT::f32, Expand); 143 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 144 setOperationAction(ISD::SELECT, MVT::f32, Custom); 145 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 146 for (auto Op : FPOpToExtend) 147 setOperationAction(Op, MVT::f32, Expand); 148 } 149 150 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 151 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 152 153 if (Subtarget.hasStdExtD()) { 154 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 155 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 156 for (auto CC : FPCCToExtend) 157 setCondCodeAction(CC, MVT::f64, Expand); 158 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 159 setOperationAction(ISD::SELECT, MVT::f64, Custom); 160 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 161 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 162 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 163 for (auto Op : FPOpToExtend) 164 setOperationAction(Op, MVT::f64, Expand); 165 } 166 167 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 168 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 169 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 170 171 if (Subtarget.hasStdExtA()) { 172 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 173 setMinCmpXchgSizeInBits(32); 174 } else { 175 setMaxAtomicSizeInBitsSupported(0); 176 } 177 178 setBooleanContents(ZeroOrOneBooleanContent); 179 180 // Function alignments (log2). 181 unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2; 182 setMinFunctionAlignment(FunctionAlignment); 183 setPrefFunctionAlignment(FunctionAlignment); 184 185 // Effectively disable jump table generation. 186 setMinimumJumpTableEntries(INT_MAX); 187 } 188 189 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 190 EVT VT) const { 191 if (!VT.isVector()) 192 return getPointerTy(DL); 193 return VT.changeVectorElementTypeToInteger(); 194 } 195 196 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 197 const CallInst &I, 198 MachineFunction &MF, 199 unsigned Intrinsic) const { 200 switch (Intrinsic) { 201 default: 202 return false; 203 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 204 case Intrinsic::riscv_masked_atomicrmw_add_i32: 205 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 206 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 207 case Intrinsic::riscv_masked_atomicrmw_max_i32: 208 case Intrinsic::riscv_masked_atomicrmw_min_i32: 209 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 210 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 211 case Intrinsic::riscv_masked_cmpxchg_i32: 212 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 213 Info.opc = ISD::INTRINSIC_W_CHAIN; 214 Info.memVT = MVT::getVT(PtrTy->getElementType()); 215 Info.ptrVal = I.getArgOperand(0); 216 Info.offset = 0; 217 Info.align = 4; 218 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 219 MachineMemOperand::MOVolatile; 220 return true; 221 } 222 } 223 224 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 225 const AddrMode &AM, Type *Ty, 226 unsigned AS, 227 Instruction *I) const { 228 // No global is ever allowed as a base. 229 if (AM.BaseGV) 230 return false; 231 232 // Require a 12-bit signed offset. 233 if (!isInt<12>(AM.BaseOffs)) 234 return false; 235 236 switch (AM.Scale) { 237 case 0: // "r+i" or just "i", depending on HasBaseReg. 238 break; 239 case 1: 240 if (!AM.HasBaseReg) // allow "r+i". 241 break; 242 return false; // disallow "r+r" or "r+r+i". 243 default: 244 return false; 245 } 246 247 return true; 248 } 249 250 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 251 return isInt<12>(Imm); 252 } 253 254 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 255 return isInt<12>(Imm); 256 } 257 258 // On RV32, 64-bit integers are split into their high and low parts and held 259 // in two different registers, so the trunc is free since the low register can 260 // just be used. 261 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 262 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 263 return false; 264 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 265 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 266 return (SrcBits == 64 && DestBits == 32); 267 } 268 269 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 270 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 271 !SrcVT.isInteger() || !DstVT.isInteger()) 272 return false; 273 unsigned SrcBits = SrcVT.getSizeInBits(); 274 unsigned DestBits = DstVT.getSizeInBits(); 275 return (SrcBits == 64 && DestBits == 32); 276 } 277 278 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 279 // Zexts are free if they can be combined with a load. 280 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 281 EVT MemVT = LD->getMemoryVT(); 282 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 283 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 284 (LD->getExtensionType() == ISD::NON_EXTLOAD || 285 LD->getExtensionType() == ISD::ZEXTLOAD)) 286 return true; 287 } 288 289 return TargetLowering::isZExtFree(Val, VT2); 290 } 291 292 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 293 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 294 } 295 296 // Changes the condition code and swaps operands if necessary, so the SetCC 297 // operation matches one of the comparisons supported directly in the RISC-V 298 // ISA. 299 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 300 switch (CC) { 301 default: 302 break; 303 case ISD::SETGT: 304 case ISD::SETLE: 305 case ISD::SETUGT: 306 case ISD::SETULE: 307 CC = ISD::getSetCCSwappedOperands(CC); 308 std::swap(LHS, RHS); 309 break; 310 } 311 } 312 313 // Return the RISC-V branch opcode that matches the given DAG integer 314 // condition code. The CondCode must be one of those supported by the RISC-V 315 // ISA (see normaliseSetCC). 316 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 317 switch (CC) { 318 default: 319 llvm_unreachable("Unsupported CondCode"); 320 case ISD::SETEQ: 321 return RISCV::BEQ; 322 case ISD::SETNE: 323 return RISCV::BNE; 324 case ISD::SETLT: 325 return RISCV::BLT; 326 case ISD::SETGE: 327 return RISCV::BGE; 328 case ISD::SETULT: 329 return RISCV::BLTU; 330 case ISD::SETUGE: 331 return RISCV::BGEU; 332 } 333 } 334 335 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 336 SelectionDAG &DAG) const { 337 switch (Op.getOpcode()) { 338 default: 339 report_fatal_error("unimplemented operand"); 340 case ISD::GlobalAddress: 341 return lowerGlobalAddress(Op, DAG); 342 case ISD::BlockAddress: 343 return lowerBlockAddress(Op, DAG); 344 case ISD::ConstantPool: 345 return lowerConstantPool(Op, DAG); 346 case ISD::SELECT: 347 return lowerSELECT(Op, DAG); 348 case ISD::VASTART: 349 return lowerVASTART(Op, DAG); 350 case ISD::FRAMEADDR: 351 return lowerFRAMEADDR(Op, DAG); 352 case ISD::RETURNADDR: 353 return lowerRETURNADDR(Op, DAG); 354 case ISD::BITCAST: { 355 assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() && 356 "Unexpected custom legalisation"); 357 SDLoc DL(Op); 358 SDValue Op0 = Op.getOperand(0); 359 if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32) 360 return SDValue(); 361 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 362 SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 363 return FPConv; 364 } 365 } 366 } 367 368 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 369 SelectionDAG &DAG) const { 370 SDLoc DL(Op); 371 EVT Ty = Op.getValueType(); 372 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 373 const GlobalValue *GV = N->getGlobal(); 374 int64_t Offset = N->getOffset(); 375 MVT XLenVT = Subtarget.getXLenVT(); 376 377 if (isPositionIndependent()) 378 report_fatal_error("Unable to lowerGlobalAddress"); 379 // In order to maximise the opportunity for common subexpression elimination, 380 // emit a separate ADD node for the global address offset instead of folding 381 // it in the global address node. Later peephole optimisations may choose to 382 // fold it back in when profitable. 383 SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); 384 SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); 385 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); 386 SDValue MNLo = 387 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); 388 if (Offset != 0) 389 return DAG.getNode(ISD::ADD, DL, Ty, MNLo, 390 DAG.getConstant(Offset, DL, XLenVT)); 391 return MNLo; 392 } 393 394 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 395 SelectionDAG &DAG) const { 396 SDLoc DL(Op); 397 EVT Ty = Op.getValueType(); 398 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 399 const BlockAddress *BA = N->getBlockAddress(); 400 int64_t Offset = N->getOffset(); 401 402 if (isPositionIndependent()) 403 report_fatal_error("Unable to lowerBlockAddress"); 404 405 SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI); 406 SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO); 407 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0); 408 SDValue MNLo = 409 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0); 410 return MNLo; 411 } 412 413 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 414 SelectionDAG &DAG) const { 415 SDLoc DL(Op); 416 EVT Ty = Op.getValueType(); 417 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 418 const Constant *CPA = N->getConstVal(); 419 int64_t Offset = N->getOffset(); 420 unsigned Alignment = N->getAlignment(); 421 422 if (!isPositionIndependent()) { 423 SDValue CPAHi = 424 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI); 425 SDValue CPALo = 426 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO); 427 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0); 428 SDValue MNLo = 429 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0); 430 return MNLo; 431 } else { 432 report_fatal_error("Unable to lowerConstantPool"); 433 } 434 } 435 436 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 437 SDValue CondV = Op.getOperand(0); 438 SDValue TrueV = Op.getOperand(1); 439 SDValue FalseV = Op.getOperand(2); 440 SDLoc DL(Op); 441 MVT XLenVT = Subtarget.getXLenVT(); 442 443 // If the result type is XLenVT and CondV is the output of a SETCC node 444 // which also operated on XLenVT inputs, then merge the SETCC node into the 445 // lowered RISCVISD::SELECT_CC to take advantage of the integer 446 // compare+branch instructions. i.e.: 447 // (select (setcc lhs, rhs, cc), truev, falsev) 448 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 449 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 450 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 451 SDValue LHS = CondV.getOperand(0); 452 SDValue RHS = CondV.getOperand(1); 453 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 454 ISD::CondCode CCVal = CC->get(); 455 456 normaliseSetCC(LHS, RHS, CCVal); 457 458 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 459 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 460 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 461 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 462 } 463 464 // Otherwise: 465 // (select condv, truev, falsev) 466 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 467 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 468 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 469 470 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 471 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 472 473 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 474 } 475 476 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 477 MachineFunction &MF = DAG.getMachineFunction(); 478 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 479 480 SDLoc DL(Op); 481 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 482 getPointerTy(MF.getDataLayout())); 483 484 // vastart just stores the address of the VarArgsFrameIndex slot into the 485 // memory location argument. 486 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 487 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 488 MachinePointerInfo(SV)); 489 } 490 491 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 492 SelectionDAG &DAG) const { 493 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 494 MachineFunction &MF = DAG.getMachineFunction(); 495 MachineFrameInfo &MFI = MF.getFrameInfo(); 496 MFI.setFrameAddressIsTaken(true); 497 unsigned FrameReg = RI.getFrameRegister(MF); 498 int XLenInBytes = Subtarget.getXLen() / 8; 499 500 EVT VT = Op.getValueType(); 501 SDLoc DL(Op); 502 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 503 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 504 while (Depth--) { 505 int Offset = -(XLenInBytes * 2); 506 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 507 DAG.getIntPtrConstant(Offset, DL)); 508 FrameAddr = 509 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 510 } 511 return FrameAddr; 512 } 513 514 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 515 SelectionDAG &DAG) const { 516 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 517 MachineFunction &MF = DAG.getMachineFunction(); 518 MachineFrameInfo &MFI = MF.getFrameInfo(); 519 MFI.setReturnAddressIsTaken(true); 520 MVT XLenVT = Subtarget.getXLenVT(); 521 int XLenInBytes = Subtarget.getXLen() / 8; 522 523 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 524 return SDValue(); 525 526 EVT VT = Op.getValueType(); 527 SDLoc DL(Op); 528 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 529 if (Depth) { 530 int Off = -XLenInBytes; 531 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 532 SDValue Offset = DAG.getConstant(Off, DL, VT); 533 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 534 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 535 MachinePointerInfo()); 536 } 537 538 // Return the value of the return address register, marking it an implicit 539 // live-in. 540 unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 541 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 542 } 543 544 // Returns the opcode of the target-specific SDNode that implements the 32-bit 545 // form of the given Opcode. 546 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 547 switch (Opcode) { 548 default: 549 llvm_unreachable("Unexpected opcode"); 550 case ISD::SHL: 551 return RISCVISD::SLLW; 552 case ISD::SRA: 553 return RISCVISD::SRAW; 554 case ISD::SRL: 555 return RISCVISD::SRLW; 556 case ISD::SDIV: 557 return RISCVISD::DIVW; 558 case ISD::UDIV: 559 return RISCVISD::DIVUW; 560 case ISD::UREM: 561 return RISCVISD::REMUW; 562 } 563 } 564 565 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 566 // Because i32 isn't a legal type for RV64, these operations would otherwise 567 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 568 // later one because the fact the operation was originally of type i32 is 569 // lost. 570 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { 571 SDLoc DL(N); 572 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 573 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 574 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 575 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 576 // ReplaceNodeResults requires we maintain the same type for the return value. 577 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 578 } 579 580 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 581 SmallVectorImpl<SDValue> &Results, 582 SelectionDAG &DAG) const { 583 SDLoc DL(N); 584 switch (N->getOpcode()) { 585 default: 586 llvm_unreachable("Don't know how to custom type legalize this operation!"); 587 case ISD::SHL: 588 case ISD::SRA: 589 case ISD::SRL: 590 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 591 "Unexpected custom legalisation"); 592 if (N->getOperand(1).getOpcode() == ISD::Constant) 593 return; 594 Results.push_back(customLegalizeToWOp(N, DAG)); 595 break; 596 case ISD::SDIV: 597 case ISD::UDIV: 598 case ISD::UREM: 599 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 600 Subtarget.hasStdExtM() && "Unexpected custom legalisation"); 601 if (N->getOperand(0).getOpcode() == ISD::Constant || 602 N->getOperand(1).getOpcode() == ISD::Constant) 603 return; 604 Results.push_back(customLegalizeToWOp(N, DAG)); 605 break; 606 case ISD::BITCAST: { 607 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 608 Subtarget.hasStdExtF() && "Unexpected custom legalisation"); 609 SDLoc DL(N); 610 SDValue Op0 = N->getOperand(0); 611 if (Op0.getValueType() != MVT::f32) 612 return; 613 SDValue FPConv = 614 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 615 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 616 break; 617 } 618 } 619 } 620 621 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 622 DAGCombinerInfo &DCI) const { 623 SelectionDAG &DAG = DCI.DAG; 624 625 switch (N->getOpcode()) { 626 default: 627 break; 628 case RISCVISD::SplitF64: { 629 SDValue Op0 = N->getOperand(0); 630 // If the input to SplitF64 is just BuildPairF64 then the operation is 631 // redundant. Instead, use BuildPairF64's operands directly. 632 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 633 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 634 635 SDLoc DL(N); 636 // This is a target-specific version of a DAGCombine performed in 637 // DAGCombiner::visitBITCAST. It performs the equivalent of: 638 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 639 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 640 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 641 !Op0.getNode()->hasOneUse()) 642 break; 643 SDValue NewSplitF64 = 644 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 645 Op0.getOperand(0)); 646 SDValue Lo = NewSplitF64.getValue(0); 647 SDValue Hi = NewSplitF64.getValue(1); 648 APInt SignBit = APInt::getSignMask(32); 649 if (Op0.getOpcode() == ISD::FNEG) { 650 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 651 DAG.getConstant(SignBit, DL, MVT::i32)); 652 return DCI.CombineTo(N, Lo, NewHi); 653 } 654 assert(Op0.getOpcode() == ISD::FABS); 655 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 656 DAG.getConstant(~SignBit, DL, MVT::i32)); 657 return DCI.CombineTo(N, Lo, NewHi); 658 } 659 case RISCVISD::SLLW: 660 case RISCVISD::SRAW: 661 case RISCVISD::SRLW: { 662 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 663 SDValue LHS = N->getOperand(0); 664 SDValue RHS = N->getOperand(1); 665 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 666 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 667 if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) || 668 (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI))) 669 return SDValue(); 670 break; 671 } 672 case RISCVISD::FMV_X_ANYEXTW_RV64: { 673 SDLoc DL(N); 674 SDValue Op0 = N->getOperand(0); 675 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 676 // conversion is unnecessary and can be replaced with an ANY_EXTEND 677 // of the FMV_W_X_RV64 operand. 678 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 679 SDValue AExtOp = 680 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0)); 681 return DCI.CombineTo(N, AExtOp); 682 } 683 684 // This is a target-specific version of a DAGCombine performed in 685 // DAGCombiner::visitBITCAST. It performs the equivalent of: 686 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 687 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 688 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 689 !Op0.getNode()->hasOneUse()) 690 break; 691 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 692 Op0.getOperand(0)); 693 APInt SignBit = APInt::getSignMask(32).sext(64); 694 if (Op0.getOpcode() == ISD::FNEG) { 695 return DCI.CombineTo(N, 696 DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 697 DAG.getConstant(SignBit, DL, MVT::i64))); 698 } 699 assert(Op0.getOpcode() == ISD::FABS); 700 return DCI.CombineTo(N, 701 DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 702 DAG.getConstant(~SignBit, DL, MVT::i64))); 703 } 704 } 705 706 return SDValue(); 707 } 708 709 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 710 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 711 unsigned Depth) const { 712 switch (Op.getOpcode()) { 713 default: 714 break; 715 case RISCVISD::SLLW: 716 case RISCVISD::SRAW: 717 case RISCVISD::SRLW: 718 case RISCVISD::DIVW: 719 case RISCVISD::DIVUW: 720 case RISCVISD::REMUW: 721 // TODO: As the result is sign-extended, this is conservatively correct. A 722 // more precise answer could be calculated for SRAW depending on known 723 // bits in the shift amount. 724 return 33; 725 } 726 727 return 1; 728 } 729 730 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 731 MachineBasicBlock *BB) { 732 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 733 734 MachineFunction &MF = *BB->getParent(); 735 DebugLoc DL = MI.getDebugLoc(); 736 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 737 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 738 unsigned LoReg = MI.getOperand(0).getReg(); 739 unsigned HiReg = MI.getOperand(1).getReg(); 740 unsigned SrcReg = MI.getOperand(2).getReg(); 741 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 742 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 743 744 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 745 RI); 746 MachineMemOperand *MMO = 747 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 748 MachineMemOperand::MOLoad, 8, 8); 749 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 750 .addFrameIndex(FI) 751 .addImm(0) 752 .addMemOperand(MMO); 753 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 754 .addFrameIndex(FI) 755 .addImm(4) 756 .addMemOperand(MMO); 757 MI.eraseFromParent(); // The pseudo instruction is gone now. 758 return BB; 759 } 760 761 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 762 MachineBasicBlock *BB) { 763 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 764 "Unexpected instruction"); 765 766 MachineFunction &MF = *BB->getParent(); 767 DebugLoc DL = MI.getDebugLoc(); 768 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 769 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 770 unsigned DstReg = MI.getOperand(0).getReg(); 771 unsigned LoReg = MI.getOperand(1).getReg(); 772 unsigned HiReg = MI.getOperand(2).getReg(); 773 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 774 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 775 776 MachineMemOperand *MMO = 777 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 778 MachineMemOperand::MOStore, 8, 8); 779 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 780 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 781 .addFrameIndex(FI) 782 .addImm(0) 783 .addMemOperand(MMO); 784 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 785 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 786 .addFrameIndex(FI) 787 .addImm(4) 788 .addMemOperand(MMO); 789 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 790 MI.eraseFromParent(); // The pseudo instruction is gone now. 791 return BB; 792 } 793 794 static bool isSelectPseudo(MachineInstr &MI) { 795 switch (MI.getOpcode()) { 796 default: 797 return false; 798 case RISCV::Select_GPR_Using_CC_GPR: 799 case RISCV::Select_FPR32_Using_CC_GPR: 800 case RISCV::Select_FPR64_Using_CC_GPR: 801 return true; 802 } 803 } 804 805 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 806 MachineBasicBlock *BB) { 807 // To "insert" Select_* instructions, we actually have to insert the triangle 808 // control-flow pattern. The incoming instructions know the destination vreg 809 // to set, the condition code register to branch on, the true/false values to 810 // select between, and the condcode to use to select the appropriate branch. 811 // 812 // We produce the following control flow: 813 // HeadMBB 814 // | \ 815 // | IfFalseMBB 816 // | / 817 // TailMBB 818 // 819 // When we find a sequence of selects we attempt to optimize their emission 820 // by sharing the control flow. Currently we only handle cases where we have 821 // multiple selects with the exact same condition (same LHS, RHS and CC). 822 // The selects may be interleaved with other instructions if the other 823 // instructions meet some requirements we deem safe: 824 // - They are debug instructions. Otherwise, 825 // - They do not have side-effects, do not access memory and their inputs do 826 // not depend on the results of the select pseudo-instructions. 827 // The TrueV/FalseV operands of the selects cannot depend on the result of 828 // previous selects in the sequence. 829 // These conditions could be further relaxed. See the X86 target for a 830 // related approach and more information. 831 unsigned LHS = MI.getOperand(1).getReg(); 832 unsigned RHS = MI.getOperand(2).getReg(); 833 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 834 835 SmallVector<MachineInstr *, 4> SelectDebugValues; 836 SmallSet<unsigned, 4> SelectDests; 837 SelectDests.insert(MI.getOperand(0).getReg()); 838 839 MachineInstr *LastSelectPseudo = &MI; 840 841 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 842 SequenceMBBI != E; ++SequenceMBBI) { 843 if (SequenceMBBI->isDebugInstr()) 844 continue; 845 else if (isSelectPseudo(*SequenceMBBI)) { 846 if (SequenceMBBI->getOperand(1).getReg() != LHS || 847 SequenceMBBI->getOperand(2).getReg() != RHS || 848 SequenceMBBI->getOperand(3).getImm() != CC || 849 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 850 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 851 break; 852 LastSelectPseudo = &*SequenceMBBI; 853 SequenceMBBI->collectDebugValues(SelectDebugValues); 854 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 855 } else { 856 if (SequenceMBBI->hasUnmodeledSideEffects() || 857 SequenceMBBI->mayLoadOrStore()) 858 break; 859 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 860 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 861 })) 862 break; 863 } 864 } 865 866 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 867 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 868 DebugLoc DL = MI.getDebugLoc(); 869 MachineFunction::iterator I = ++BB->getIterator(); 870 871 MachineBasicBlock *HeadMBB = BB; 872 MachineFunction *F = BB->getParent(); 873 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 874 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 875 876 F->insert(I, IfFalseMBB); 877 F->insert(I, TailMBB); 878 879 // Transfer debug instructions associated with the selects to TailMBB. 880 for (MachineInstr *DebugInstr : SelectDebugValues) { 881 TailMBB->push_back(DebugInstr->removeFromParent()); 882 } 883 884 // Move all instructions after the sequence to TailMBB. 885 TailMBB->splice(TailMBB->end(), HeadMBB, 886 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 887 // Update machine-CFG edges by transferring all successors of the current 888 // block to the new block which will contain the Phi nodes for the selects. 889 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 890 // Set the successors for HeadMBB. 891 HeadMBB->addSuccessor(IfFalseMBB); 892 HeadMBB->addSuccessor(TailMBB); 893 894 // Insert appropriate branch. 895 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 896 897 BuildMI(HeadMBB, DL, TII.get(Opcode)) 898 .addReg(LHS) 899 .addReg(RHS) 900 .addMBB(TailMBB); 901 902 // IfFalseMBB just falls through to TailMBB. 903 IfFalseMBB->addSuccessor(TailMBB); 904 905 // Create PHIs for all of the select pseudo-instructions. 906 auto SelectMBBI = MI.getIterator(); 907 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 908 auto InsertionPoint = TailMBB->begin(); 909 while (SelectMBBI != SelectEnd) { 910 auto Next = std::next(SelectMBBI); 911 if (isSelectPseudo(*SelectMBBI)) { 912 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 913 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 914 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 915 .addReg(SelectMBBI->getOperand(4).getReg()) 916 .addMBB(HeadMBB) 917 .addReg(SelectMBBI->getOperand(5).getReg()) 918 .addMBB(IfFalseMBB); 919 SelectMBBI->eraseFromParent(); 920 } 921 SelectMBBI = Next; 922 } 923 924 return TailMBB; 925 } 926 927 MachineBasicBlock * 928 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 929 MachineBasicBlock *BB) const { 930 switch (MI.getOpcode()) { 931 default: 932 llvm_unreachable("Unexpected instr type to insert"); 933 case RISCV::Select_GPR_Using_CC_GPR: 934 case RISCV::Select_FPR32_Using_CC_GPR: 935 case RISCV::Select_FPR64_Using_CC_GPR: 936 return emitSelectPseudo(MI, BB); 937 case RISCV::BuildPairF64Pseudo: 938 return emitBuildPairF64Pseudo(MI, BB); 939 case RISCV::SplitF64Pseudo: 940 return emitSplitF64Pseudo(MI, BB); 941 } 942 } 943 944 // Calling Convention Implementation. 945 // The expectations for frontend ABI lowering vary from target to target. 946 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 947 // details, but this is a longer term goal. For now, we simply try to keep the 948 // role of the frontend as simple and well-defined as possible. The rules can 949 // be summarised as: 950 // * Never split up large scalar arguments. We handle them here. 951 // * If a hardfloat calling convention is being used, and the struct may be 952 // passed in a pair of registers (fp+fp, int+fp), and both registers are 953 // available, then pass as two separate arguments. If either the GPRs or FPRs 954 // are exhausted, then pass according to the rule below. 955 // * If a struct could never be passed in registers or directly in a stack 956 // slot (as it is larger than 2*XLEN and the floating point rules don't 957 // apply), then pass it using a pointer with the byval attribute. 958 // * If a struct is less than 2*XLEN, then coerce to either a two-element 959 // word-sized array or a 2*XLEN scalar (depending on alignment). 960 // * The frontend can determine whether a struct is returned by reference or 961 // not based on its size and fields. If it will be returned by reference, the 962 // frontend must modify the prototype so a pointer with the sret annotation is 963 // passed as the first argument. This is not necessary for large scalar 964 // returns. 965 // * Struct return values and varargs should be coerced to structs containing 966 // register-size fields in the same situations they would be for fixed 967 // arguments. 968 969 static const MCPhysReg ArgGPRs[] = { 970 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 971 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 972 }; 973 974 // Pass a 2*XLEN argument that has been split into two XLEN values through 975 // registers or the stack as necessary. 976 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 977 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 978 MVT ValVT2, MVT LocVT2, 979 ISD::ArgFlagsTy ArgFlags2) { 980 unsigned XLenInBytes = XLen / 8; 981 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 982 // At least one half can be passed via register. 983 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 984 VA1.getLocVT(), CCValAssign::Full)); 985 } else { 986 // Both halves must be passed on the stack, with proper alignment. 987 unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign()); 988 State.addLoc( 989 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 990 State.AllocateStack(XLenInBytes, StackAlign), 991 VA1.getLocVT(), CCValAssign::Full)); 992 State.addLoc(CCValAssign::getMem( 993 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 994 CCValAssign::Full)); 995 return false; 996 } 997 998 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 999 // The second half can also be passed via register. 1000 State.addLoc( 1001 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 1002 } else { 1003 // The second half is passed via the stack, without additional alignment. 1004 State.addLoc(CCValAssign::getMem( 1005 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 1006 CCValAssign::Full)); 1007 } 1008 1009 return false; 1010 } 1011 1012 // Implements the RISC-V calling convention. Returns true upon failure. 1013 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, 1014 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 1015 CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { 1016 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 1017 assert(XLen == 32 || XLen == 64); 1018 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 1019 1020 // Any return value split in to more than two values can't be returned 1021 // directly. 1022 if (IsRet && ValNo > 1) 1023 return true; 1024 1025 if (ValVT == MVT::f32) { 1026 LocVT = XLenVT; 1027 LocInfo = CCValAssign::BCvt; 1028 } else if (XLen == 64 && ValVT == MVT::f64) { 1029 LocVT = MVT::i64; 1030 LocInfo = CCValAssign::BCvt; 1031 } 1032 1033 // If this is a variadic argument, the RISC-V calling convention requires 1034 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 1035 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 1036 // be used regardless of whether the original argument was split during 1037 // legalisation or not. The argument will not be passed by registers if the 1038 // original type is larger than 2*XLEN, so the register alignment rule does 1039 // not apply. 1040 unsigned TwoXLenInBytes = (2 * XLen) / 8; 1041 if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && 1042 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 1043 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 1044 // Skip 'odd' register if necessary. 1045 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 1046 State.AllocateReg(ArgGPRs); 1047 } 1048 1049 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 1050 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 1051 State.getPendingArgFlags(); 1052 1053 assert(PendingLocs.size() == PendingArgFlags.size() && 1054 "PendingLocs and PendingArgFlags out of sync"); 1055 1056 // Handle passing f64 on RV32D with a soft float ABI. 1057 if (XLen == 32 && ValVT == MVT::f64) { 1058 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 1059 "Can't lower f64 if it is split"); 1060 // Depending on available argument GPRS, f64 may be passed in a pair of 1061 // GPRs, split between a GPR and the stack, or passed completely on the 1062 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 1063 // cases. 1064 unsigned Reg = State.AllocateReg(ArgGPRs); 1065 LocVT = MVT::i32; 1066 if (!Reg) { 1067 unsigned StackOffset = State.AllocateStack(8, 8); 1068 State.addLoc( 1069 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 1070 return false; 1071 } 1072 if (!State.AllocateReg(ArgGPRs)) 1073 State.AllocateStack(4, 4); 1074 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1075 return false; 1076 } 1077 1078 // Split arguments might be passed indirectly, so keep track of the pending 1079 // values. 1080 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 1081 LocVT = XLenVT; 1082 LocInfo = CCValAssign::Indirect; 1083 PendingLocs.push_back( 1084 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 1085 PendingArgFlags.push_back(ArgFlags); 1086 if (!ArgFlags.isSplitEnd()) { 1087 return false; 1088 } 1089 } 1090 1091 // If the split argument only had two elements, it should be passed directly 1092 // in registers or on the stack. 1093 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 1094 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 1095 // Apply the normal calling convention rules to the first half of the 1096 // split argument. 1097 CCValAssign VA = PendingLocs[0]; 1098 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 1099 PendingLocs.clear(); 1100 PendingArgFlags.clear(); 1101 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 1102 ArgFlags); 1103 } 1104 1105 // Allocate to a register if possible, or else a stack slot. 1106 unsigned Reg = State.AllocateReg(ArgGPRs); 1107 unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); 1108 1109 // If we reach this point and PendingLocs is non-empty, we must be at the 1110 // end of a split argument that must be passed indirectly. 1111 if (!PendingLocs.empty()) { 1112 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 1113 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 1114 1115 for (auto &It : PendingLocs) { 1116 if (Reg) 1117 It.convertToReg(Reg); 1118 else 1119 It.convertToMem(StackOffset); 1120 State.addLoc(It); 1121 } 1122 PendingLocs.clear(); 1123 PendingArgFlags.clear(); 1124 return false; 1125 } 1126 1127 assert(LocVT == XLenVT && "Expected an XLenVT at this stage"); 1128 1129 if (Reg) { 1130 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1131 return false; 1132 } 1133 1134 // When an f32 or f64 is passed on the stack, no bit-conversion is needed. 1135 if (ValVT == MVT::f32 || ValVT == MVT::f64) { 1136 LocVT = ValVT; 1137 LocInfo = CCValAssign::Full; 1138 } 1139 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 1140 return false; 1141 } 1142 1143 void RISCVTargetLowering::analyzeInputArgs( 1144 MachineFunction &MF, CCState &CCInfo, 1145 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 1146 unsigned NumArgs = Ins.size(); 1147 FunctionType *FType = MF.getFunction().getFunctionType(); 1148 1149 for (unsigned i = 0; i != NumArgs; ++i) { 1150 MVT ArgVT = Ins[i].VT; 1151 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 1152 1153 Type *ArgTy = nullptr; 1154 if (IsRet) 1155 ArgTy = FType->getReturnType(); 1156 else if (Ins[i].isOrigArg()) 1157 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 1158 1159 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 1160 ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { 1161 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 1162 << EVT(ArgVT).getEVTString() << '\n'); 1163 llvm_unreachable(nullptr); 1164 } 1165 } 1166 } 1167 1168 void RISCVTargetLowering::analyzeOutputArgs( 1169 MachineFunction &MF, CCState &CCInfo, 1170 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 1171 CallLoweringInfo *CLI) const { 1172 unsigned NumArgs = Outs.size(); 1173 1174 for (unsigned i = 0; i != NumArgs; i++) { 1175 MVT ArgVT = Outs[i].VT; 1176 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1177 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 1178 1179 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 1180 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 1181 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 1182 << EVT(ArgVT).getEVTString() << "\n"); 1183 llvm_unreachable(nullptr); 1184 } 1185 } 1186 } 1187 1188 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 1189 // values. 1190 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 1191 const CCValAssign &VA, const SDLoc &DL) { 1192 switch (VA.getLocInfo()) { 1193 default: 1194 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1195 case CCValAssign::Full: 1196 break; 1197 case CCValAssign::BCvt: 1198 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 1199 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 1200 break; 1201 } 1202 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 1203 break; 1204 } 1205 return Val; 1206 } 1207 1208 // The caller is responsible for loading the full value if the argument is 1209 // passed with CCValAssign::Indirect. 1210 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 1211 const CCValAssign &VA, const SDLoc &DL) { 1212 MachineFunction &MF = DAG.getMachineFunction(); 1213 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1214 EVT LocVT = VA.getLocVT(); 1215 SDValue Val; 1216 1217 unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1218 RegInfo.addLiveIn(VA.getLocReg(), VReg); 1219 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 1220 1221 if (VA.getLocInfo() == CCValAssign::Indirect) 1222 return Val; 1223 1224 return convertLocVTToValVT(DAG, Val, VA, DL); 1225 } 1226 1227 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 1228 const CCValAssign &VA, const SDLoc &DL) { 1229 EVT LocVT = VA.getLocVT(); 1230 1231 switch (VA.getLocInfo()) { 1232 default: 1233 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1234 case CCValAssign::Full: 1235 break; 1236 case CCValAssign::BCvt: 1237 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 1238 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 1239 break; 1240 } 1241 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 1242 break; 1243 } 1244 return Val; 1245 } 1246 1247 // The caller is responsible for loading the full value if the argument is 1248 // passed with CCValAssign::Indirect. 1249 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 1250 const CCValAssign &VA, const SDLoc &DL) { 1251 MachineFunction &MF = DAG.getMachineFunction(); 1252 MachineFrameInfo &MFI = MF.getFrameInfo(); 1253 EVT LocVT = VA.getLocVT(); 1254 EVT ValVT = VA.getValVT(); 1255 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 1256 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 1257 VA.getLocMemOffset(), /*Immutable=*/true); 1258 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1259 SDValue Val; 1260 1261 ISD::LoadExtType ExtType; 1262 switch (VA.getLocInfo()) { 1263 default: 1264 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1265 case CCValAssign::Full: 1266 case CCValAssign::Indirect: 1267 case CCValAssign::BCvt: 1268 ExtType = ISD::NON_EXTLOAD; 1269 break; 1270 } 1271 Val = DAG.getExtLoad( 1272 ExtType, DL, LocVT, Chain, FIN, 1273 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 1274 return Val; 1275 } 1276 1277 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 1278 const CCValAssign &VA, const SDLoc &DL) { 1279 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 1280 "Unexpected VA"); 1281 MachineFunction &MF = DAG.getMachineFunction(); 1282 MachineFrameInfo &MFI = MF.getFrameInfo(); 1283 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1284 1285 if (VA.isMemLoc()) { 1286 // f64 is passed on the stack. 1287 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 1288 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1289 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 1290 MachinePointerInfo::getFixedStack(MF, FI)); 1291 } 1292 1293 assert(VA.isRegLoc() && "Expected register VA assignment"); 1294 1295 unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1296 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 1297 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 1298 SDValue Hi; 1299 if (VA.getLocReg() == RISCV::X17) { 1300 // Second half of f64 is passed on the stack. 1301 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 1302 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1303 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 1304 MachinePointerInfo::getFixedStack(MF, FI)); 1305 } else { 1306 // Second half of f64 is passed in another GPR. 1307 unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1308 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 1309 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 1310 } 1311 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1312 } 1313 1314 // Transform physical registers into virtual registers. 1315 SDValue RISCVTargetLowering::LowerFormalArguments( 1316 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1317 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1318 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1319 1320 switch (CallConv) { 1321 default: 1322 report_fatal_error("Unsupported calling convention"); 1323 case CallingConv::C: 1324 case CallingConv::Fast: 1325 break; 1326 } 1327 1328 MachineFunction &MF = DAG.getMachineFunction(); 1329 1330 const Function &Func = MF.getFunction(); 1331 if (Func.hasFnAttribute("interrupt")) { 1332 if (!Func.arg_empty()) 1333 report_fatal_error( 1334 "Functions with the interrupt attribute cannot have arguments!"); 1335 1336 StringRef Kind = 1337 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1338 1339 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 1340 report_fatal_error( 1341 "Function interrupt attribute argument not supported!"); 1342 } 1343 1344 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1345 MVT XLenVT = Subtarget.getXLenVT(); 1346 unsigned XLenInBytes = Subtarget.getXLen() / 8; 1347 // Used with vargs to acumulate store chains. 1348 std::vector<SDValue> OutChains; 1349 1350 // Assign locations to all of the incoming arguments. 1351 SmallVector<CCValAssign, 16> ArgLocs; 1352 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1353 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 1354 1355 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1356 CCValAssign &VA = ArgLocs[i]; 1357 SDValue ArgValue; 1358 // Passing f64 on RV32D with a soft float ABI must be handled as a special 1359 // case. 1360 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 1361 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 1362 else if (VA.isRegLoc()) 1363 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); 1364 else 1365 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 1366 1367 if (VA.getLocInfo() == CCValAssign::Indirect) { 1368 // If the original argument was split and passed by reference (e.g. i128 1369 // on RV32), we need to load all parts of it here (using the same 1370 // address). 1371 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 1372 MachinePointerInfo())); 1373 unsigned ArgIndex = Ins[i].OrigArgIndex; 1374 assert(Ins[i].PartOffset == 0); 1375 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 1376 CCValAssign &PartVA = ArgLocs[i + 1]; 1377 unsigned PartOffset = Ins[i + 1].PartOffset; 1378 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 1379 DAG.getIntPtrConstant(PartOffset, DL)); 1380 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 1381 MachinePointerInfo())); 1382 ++i; 1383 } 1384 continue; 1385 } 1386 InVals.push_back(ArgValue); 1387 } 1388 1389 if (IsVarArg) { 1390 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 1391 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 1392 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 1393 MachineFrameInfo &MFI = MF.getFrameInfo(); 1394 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1395 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1396 1397 // Offset of the first variable argument from stack pointer, and size of 1398 // the vararg save area. For now, the varargs save area is either zero or 1399 // large enough to hold a0-a7. 1400 int VaArgOffset, VarArgsSaveSize; 1401 1402 // If all registers are allocated, then all varargs must be passed on the 1403 // stack and we don't need to save any argregs. 1404 if (ArgRegs.size() == Idx) { 1405 VaArgOffset = CCInfo.getNextStackOffset(); 1406 VarArgsSaveSize = 0; 1407 } else { 1408 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 1409 VaArgOffset = -VarArgsSaveSize; 1410 } 1411 1412 // Record the frame index of the first variable argument 1413 // which is a value necessary to VASTART. 1414 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1415 RVFI->setVarArgsFrameIndex(FI); 1416 1417 // If saving an odd number of registers then create an extra stack slot to 1418 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 1419 // offsets to even-numbered registered remain 2*XLEN-aligned. 1420 if (Idx % 2) { 1421 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, 1422 true); 1423 VarArgsSaveSize += XLenInBytes; 1424 } 1425 1426 // Copy the integer registers that may have been used for passing varargs 1427 // to the vararg save area. 1428 for (unsigned I = Idx; I < ArgRegs.size(); 1429 ++I, VaArgOffset += XLenInBytes) { 1430 const unsigned Reg = RegInfo.createVirtualRegister(RC); 1431 RegInfo.addLiveIn(ArgRegs[I], Reg); 1432 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 1433 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1434 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1435 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 1436 MachinePointerInfo::getFixedStack(MF, FI)); 1437 cast<StoreSDNode>(Store.getNode()) 1438 ->getMemOperand() 1439 ->setValue((Value *)nullptr); 1440 OutChains.push_back(Store); 1441 } 1442 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 1443 } 1444 1445 // All stores are grouped in one node to allow the matching between 1446 // the size of Ins and InVals. This only happens for vararg functions. 1447 if (!OutChains.empty()) { 1448 OutChains.push_back(Chain); 1449 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 1450 } 1451 1452 return Chain; 1453 } 1454 1455 /// isEligibleForTailCallOptimization - Check whether the call is eligible 1456 /// for tail call optimization. 1457 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 1458 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 1459 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 1460 const SmallVector<CCValAssign, 16> &ArgLocs) const { 1461 1462 auto &Callee = CLI.Callee; 1463 auto CalleeCC = CLI.CallConv; 1464 auto IsVarArg = CLI.IsVarArg; 1465 auto &Outs = CLI.Outs; 1466 auto &Caller = MF.getFunction(); 1467 auto CallerCC = Caller.getCallingConv(); 1468 1469 // Do not tail call opt functions with "disable-tail-calls" attribute. 1470 if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") 1471 return false; 1472 1473 // Exception-handling functions need a special set of instructions to 1474 // indicate a return to the hardware. Tail-calling another function would 1475 // probably break this. 1476 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 1477 // should be expanded as new function attributes are introduced. 1478 if (Caller.hasFnAttribute("interrupt")) 1479 return false; 1480 1481 // Do not tail call opt functions with varargs. 1482 if (IsVarArg) 1483 return false; 1484 1485 // Do not tail call opt if the stack is used to pass parameters. 1486 if (CCInfo.getNextStackOffset() != 0) 1487 return false; 1488 1489 // Do not tail call opt if any parameters need to be passed indirectly. 1490 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 1491 // passed indirectly. So the address of the value will be passed in a 1492 // register, or if not available, then the address is put on the stack. In 1493 // order to pass indirectly, space on the stack often needs to be allocated 1494 // in order to store the value. In this case the CCInfo.getNextStackOffset() 1495 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 1496 // are passed CCValAssign::Indirect. 1497 for (auto &VA : ArgLocs) 1498 if (VA.getLocInfo() == CCValAssign::Indirect) 1499 return false; 1500 1501 // Do not tail call opt if either caller or callee uses struct return 1502 // semantics. 1503 auto IsCallerStructRet = Caller.hasStructRetAttr(); 1504 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 1505 if (IsCallerStructRet || IsCalleeStructRet) 1506 return false; 1507 1508 // Externally-defined functions with weak linkage should not be 1509 // tail-called. The behaviour of branch instructions in this situation (as 1510 // used for tail calls) is implementation-defined, so we cannot rely on the 1511 // linker replacing the tail call with a return. 1512 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1513 const GlobalValue *GV = G->getGlobal(); 1514 if (GV->hasExternalWeakLinkage()) 1515 return false; 1516 } 1517 1518 // The callee has to preserve all registers the caller needs to preserve. 1519 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1520 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 1521 if (CalleeCC != CallerCC) { 1522 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 1523 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 1524 return false; 1525 } 1526 1527 // Byval parameters hand the function a pointer directly into the stack area 1528 // we want to reuse during a tail call. Working around this *is* possible 1529 // but less efficient and uglier in LowerCall. 1530 for (auto &Arg : Outs) 1531 if (Arg.Flags.isByVal()) 1532 return false; 1533 1534 return true; 1535 } 1536 1537 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 1538 // and output parameter nodes. 1539 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 1540 SmallVectorImpl<SDValue> &InVals) const { 1541 SelectionDAG &DAG = CLI.DAG; 1542 SDLoc &DL = CLI.DL; 1543 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1544 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1545 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1546 SDValue Chain = CLI.Chain; 1547 SDValue Callee = CLI.Callee; 1548 bool &IsTailCall = CLI.IsTailCall; 1549 CallingConv::ID CallConv = CLI.CallConv; 1550 bool IsVarArg = CLI.IsVarArg; 1551 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1552 MVT XLenVT = Subtarget.getXLenVT(); 1553 1554 MachineFunction &MF = DAG.getMachineFunction(); 1555 1556 // Analyze the operands of the call, assigning locations to each operand. 1557 SmallVector<CCValAssign, 16> ArgLocs; 1558 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1559 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 1560 1561 // Check if it's really possible to do a tail call. 1562 if (IsTailCall) 1563 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 1564 1565 if (IsTailCall) 1566 ++NumTailCalls; 1567 else if (CLI.CS && CLI.CS.isMustTailCall()) 1568 report_fatal_error("failed to perform tail call elimination on a call " 1569 "site marked musttail"); 1570 1571 // Get a count of how many bytes are to be pushed on the stack. 1572 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 1573 1574 // Create local copies for byval args 1575 SmallVector<SDValue, 8> ByValArgs; 1576 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1577 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1578 if (!Flags.isByVal()) 1579 continue; 1580 1581 SDValue Arg = OutVals[i]; 1582 unsigned Size = Flags.getByValSize(); 1583 unsigned Align = Flags.getByValAlign(); 1584 1585 int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false); 1586 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1587 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 1588 1589 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, 1590 /*IsVolatile=*/false, 1591 /*AlwaysInline=*/false, 1592 IsTailCall, MachinePointerInfo(), 1593 MachinePointerInfo()); 1594 ByValArgs.push_back(FIPtr); 1595 } 1596 1597 if (!IsTailCall) 1598 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 1599 1600 // Copy argument values to their designated locations. 1601 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1602 SmallVector<SDValue, 8> MemOpChains; 1603 SDValue StackPtr; 1604 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 1605 CCValAssign &VA = ArgLocs[i]; 1606 SDValue ArgValue = OutVals[i]; 1607 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1608 1609 // Handle passing f64 on RV32D with a soft float ABI as a special case. 1610 bool IsF64OnRV32DSoftABI = 1611 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 1612 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 1613 SDValue SplitF64 = DAG.getNode( 1614 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 1615 SDValue Lo = SplitF64.getValue(0); 1616 SDValue Hi = SplitF64.getValue(1); 1617 1618 unsigned RegLo = VA.getLocReg(); 1619 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 1620 1621 if (RegLo == RISCV::X17) { 1622 // Second half of f64 is passed on the stack. 1623 // Work out the address of the stack slot. 1624 if (!StackPtr.getNode()) 1625 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1626 // Emit the store. 1627 MemOpChains.push_back( 1628 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 1629 } else { 1630 // Second half of f64 is passed in another GPR. 1631 unsigned RegHigh = RegLo + 1; 1632 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 1633 } 1634 continue; 1635 } 1636 1637 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 1638 // as any other MemLoc. 1639 1640 // Promote the value if needed. 1641 // For now, only handle fully promoted and indirect arguments. 1642 if (VA.getLocInfo() == CCValAssign::Indirect) { 1643 // Store the argument in a stack slot and pass its address. 1644 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 1645 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1646 MemOpChains.push_back( 1647 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 1648 MachinePointerInfo::getFixedStack(MF, FI))); 1649 // If the original argument was split (e.g. i128), we need 1650 // to store all parts of it here (and pass just one address). 1651 unsigned ArgIndex = Outs[i].OrigArgIndex; 1652 assert(Outs[i].PartOffset == 0); 1653 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 1654 SDValue PartValue = OutVals[i + 1]; 1655 unsigned PartOffset = Outs[i + 1].PartOffset; 1656 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 1657 DAG.getIntPtrConstant(PartOffset, DL)); 1658 MemOpChains.push_back( 1659 DAG.getStore(Chain, DL, PartValue, Address, 1660 MachinePointerInfo::getFixedStack(MF, FI))); 1661 ++i; 1662 } 1663 ArgValue = SpillSlot; 1664 } else { 1665 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 1666 } 1667 1668 // Use local copy if it is a byval arg. 1669 if (Flags.isByVal()) 1670 ArgValue = ByValArgs[j++]; 1671 1672 if (VA.isRegLoc()) { 1673 // Queue up the argument copies and emit them at the end. 1674 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 1675 } else { 1676 assert(VA.isMemLoc() && "Argument not register or memory"); 1677 assert(!IsTailCall && "Tail call not allowed if stack is used " 1678 "for passing parameters"); 1679 1680 // Work out the address of the stack slot. 1681 if (!StackPtr.getNode()) 1682 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1683 SDValue Address = 1684 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 1685 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 1686 1687 // Emit the store. 1688 MemOpChains.push_back( 1689 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 1690 } 1691 } 1692 1693 // Join the stores, which are independent of one another. 1694 if (!MemOpChains.empty()) 1695 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 1696 1697 SDValue Glue; 1698 1699 // Build a sequence of copy-to-reg nodes, chained and glued together. 1700 for (auto &Reg : RegsToPass) { 1701 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 1702 Glue = Chain.getValue(1); 1703 } 1704 1705 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 1706 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 1707 // split it and then direct call can be matched by PseudoCALL. 1708 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 1709 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0); 1710 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1711 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0); 1712 } 1713 1714 // The first call operand is the chain and the second is the target address. 1715 SmallVector<SDValue, 8> Ops; 1716 Ops.push_back(Chain); 1717 Ops.push_back(Callee); 1718 1719 // Add argument registers to the end of the list so that they are 1720 // known live into the call. 1721 for (auto &Reg : RegsToPass) 1722 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 1723 1724 if (!IsTailCall) { 1725 // Add a register mask operand representing the call-preserved registers. 1726 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1727 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 1728 assert(Mask && "Missing call preserved mask for calling convention"); 1729 Ops.push_back(DAG.getRegisterMask(Mask)); 1730 } 1731 1732 // Glue the call to the argument copies, if any. 1733 if (Glue.getNode()) 1734 Ops.push_back(Glue); 1735 1736 // Emit the call. 1737 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1738 1739 if (IsTailCall) { 1740 MF.getFrameInfo().setHasTailCall(); 1741 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 1742 } 1743 1744 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 1745 Glue = Chain.getValue(1); 1746 1747 // Mark the end of the call, which is glued to the call itself. 1748 Chain = DAG.getCALLSEQ_END(Chain, 1749 DAG.getConstant(NumBytes, DL, PtrVT, true), 1750 DAG.getConstant(0, DL, PtrVT, true), 1751 Glue, DL); 1752 Glue = Chain.getValue(1); 1753 1754 // Assign locations to each value returned by this call. 1755 SmallVector<CCValAssign, 16> RVLocs; 1756 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 1757 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 1758 1759 // Copy all of the result registers out of their specified physreg. 1760 for (auto &VA : RVLocs) { 1761 // Copy the value out 1762 SDValue RetValue = 1763 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 1764 // Glue the RetValue to the end of the call sequence 1765 Chain = RetValue.getValue(1); 1766 Glue = RetValue.getValue(2); 1767 1768 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1769 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 1770 SDValue RetValue2 = 1771 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 1772 Chain = RetValue2.getValue(1); 1773 Glue = RetValue2.getValue(2); 1774 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 1775 RetValue2); 1776 } 1777 1778 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 1779 1780 InVals.push_back(RetValue); 1781 } 1782 1783 return Chain; 1784 } 1785 1786 bool RISCVTargetLowering::CanLowerReturn( 1787 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 1788 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 1789 SmallVector<CCValAssign, 16> RVLocs; 1790 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 1791 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1792 MVT VT = Outs[i].VT; 1793 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1794 if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, 1795 CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) 1796 return false; 1797 } 1798 return true; 1799 } 1800 1801 SDValue 1802 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1803 bool IsVarArg, 1804 const SmallVectorImpl<ISD::OutputArg> &Outs, 1805 const SmallVectorImpl<SDValue> &OutVals, 1806 const SDLoc &DL, SelectionDAG &DAG) const { 1807 // Stores the assignment of the return value to a location. 1808 SmallVector<CCValAssign, 16> RVLocs; 1809 1810 // Info about the registers and stack slot. 1811 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 1812 *DAG.getContext()); 1813 1814 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 1815 nullptr); 1816 1817 SDValue Glue; 1818 SmallVector<SDValue, 4> RetOps(1, Chain); 1819 1820 // Copy the result values into the output registers. 1821 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 1822 SDValue Val = OutVals[i]; 1823 CCValAssign &VA = RVLocs[i]; 1824 assert(VA.isRegLoc() && "Can only return in registers!"); 1825 1826 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1827 // Handle returning f64 on RV32D with a soft float ABI. 1828 assert(VA.isRegLoc() && "Expected return via registers"); 1829 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 1830 DAG.getVTList(MVT::i32, MVT::i32), Val); 1831 SDValue Lo = SplitF64.getValue(0); 1832 SDValue Hi = SplitF64.getValue(1); 1833 unsigned RegLo = VA.getLocReg(); 1834 unsigned RegHi = RegLo + 1; 1835 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 1836 Glue = Chain.getValue(1); 1837 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 1838 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 1839 Glue = Chain.getValue(1); 1840 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 1841 } else { 1842 // Handle a 'normal' return. 1843 Val = convertValVTToLocVT(DAG, Val, VA, DL); 1844 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 1845 1846 // Guarantee that all emitted copies are stuck together. 1847 Glue = Chain.getValue(1); 1848 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 1849 } 1850 } 1851 1852 RetOps[0] = Chain; // Update chain. 1853 1854 // Add the glue node if we have it. 1855 if (Glue.getNode()) { 1856 RetOps.push_back(Glue); 1857 } 1858 1859 // Interrupt service routines use different return instructions. 1860 const Function &Func = DAG.getMachineFunction().getFunction(); 1861 if (Func.hasFnAttribute("interrupt")) { 1862 if (!Func.getReturnType()->isVoidTy()) 1863 report_fatal_error( 1864 "Functions with the interrupt attribute must have void return type!"); 1865 1866 MachineFunction &MF = DAG.getMachineFunction(); 1867 StringRef Kind = 1868 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1869 1870 unsigned RetOpc; 1871 if (Kind == "user") 1872 RetOpc = RISCVISD::URET_FLAG; 1873 else if (Kind == "supervisor") 1874 RetOpc = RISCVISD::SRET_FLAG; 1875 else 1876 RetOpc = RISCVISD::MRET_FLAG; 1877 1878 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 1879 } 1880 1881 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 1882 } 1883 1884 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 1885 switch ((RISCVISD::NodeType)Opcode) { 1886 case RISCVISD::FIRST_NUMBER: 1887 break; 1888 case RISCVISD::RET_FLAG: 1889 return "RISCVISD::RET_FLAG"; 1890 case RISCVISD::URET_FLAG: 1891 return "RISCVISD::URET_FLAG"; 1892 case RISCVISD::SRET_FLAG: 1893 return "RISCVISD::SRET_FLAG"; 1894 case RISCVISD::MRET_FLAG: 1895 return "RISCVISD::MRET_FLAG"; 1896 case RISCVISD::CALL: 1897 return "RISCVISD::CALL"; 1898 case RISCVISD::SELECT_CC: 1899 return "RISCVISD::SELECT_CC"; 1900 case RISCVISD::BuildPairF64: 1901 return "RISCVISD::BuildPairF64"; 1902 case RISCVISD::SplitF64: 1903 return "RISCVISD::SplitF64"; 1904 case RISCVISD::TAIL: 1905 return "RISCVISD::TAIL"; 1906 case RISCVISD::SLLW: 1907 return "RISCVISD::SLLW"; 1908 case RISCVISD::SRAW: 1909 return "RISCVISD::SRAW"; 1910 case RISCVISD::SRLW: 1911 return "RISCVISD::SRLW"; 1912 case RISCVISD::DIVW: 1913 return "RISCVISD::DIVW"; 1914 case RISCVISD::DIVUW: 1915 return "RISCVISD::DIVUW"; 1916 case RISCVISD::REMUW: 1917 return "RISCVISD::REMUW"; 1918 case RISCVISD::FMV_W_X_RV64: 1919 return "RISCVISD::FMV_W_X_RV64"; 1920 case RISCVISD::FMV_X_ANYEXTW_RV64: 1921 return "RISCVISD::FMV_X_ANYEXTW_RV64"; 1922 } 1923 return nullptr; 1924 } 1925 1926 std::pair<unsigned, const TargetRegisterClass *> 1927 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1928 StringRef Constraint, 1929 MVT VT) const { 1930 // First, see if this is a constraint that directly corresponds to a 1931 // RISCV register class. 1932 if (Constraint.size() == 1) { 1933 switch (Constraint[0]) { 1934 case 'r': 1935 return std::make_pair(0U, &RISCV::GPRRegClass); 1936 default: 1937 break; 1938 } 1939 } 1940 1941 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 1942 } 1943 1944 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 1945 Instruction *Inst, 1946 AtomicOrdering Ord) const { 1947 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 1948 return Builder.CreateFence(Ord); 1949 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 1950 return Builder.CreateFence(AtomicOrdering::Release); 1951 return nullptr; 1952 } 1953 1954 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 1955 Instruction *Inst, 1956 AtomicOrdering Ord) const { 1957 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 1958 return Builder.CreateFence(AtomicOrdering::Acquire); 1959 return nullptr; 1960 } 1961 1962 TargetLowering::AtomicExpansionKind 1963 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 1964 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 1965 // point operations can't be used in an lr/sc sequence without breaking the 1966 // forward-progress guarantee. 1967 if (AI->isFloatingPointOperation()) 1968 return AtomicExpansionKind::CmpXChg; 1969 1970 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 1971 if (Size == 8 || Size == 16) 1972 return AtomicExpansionKind::MaskedIntrinsic; 1973 return AtomicExpansionKind::None; 1974 } 1975 1976 static Intrinsic::ID 1977 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 1978 if (XLen == 32) { 1979 switch (BinOp) { 1980 default: 1981 llvm_unreachable("Unexpected AtomicRMW BinOp"); 1982 case AtomicRMWInst::Xchg: 1983 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 1984 case AtomicRMWInst::Add: 1985 return Intrinsic::riscv_masked_atomicrmw_add_i32; 1986 case AtomicRMWInst::Sub: 1987 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 1988 case AtomicRMWInst::Nand: 1989 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 1990 case AtomicRMWInst::Max: 1991 return Intrinsic::riscv_masked_atomicrmw_max_i32; 1992 case AtomicRMWInst::Min: 1993 return Intrinsic::riscv_masked_atomicrmw_min_i32; 1994 case AtomicRMWInst::UMax: 1995 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 1996 case AtomicRMWInst::UMin: 1997 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 1998 } 1999 } 2000 2001 if (XLen == 64) { 2002 switch (BinOp) { 2003 default: 2004 llvm_unreachable("Unexpected AtomicRMW BinOp"); 2005 case AtomicRMWInst::Xchg: 2006 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 2007 case AtomicRMWInst::Add: 2008 return Intrinsic::riscv_masked_atomicrmw_add_i64; 2009 case AtomicRMWInst::Sub: 2010 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 2011 case AtomicRMWInst::Nand: 2012 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 2013 case AtomicRMWInst::Max: 2014 return Intrinsic::riscv_masked_atomicrmw_max_i64; 2015 case AtomicRMWInst::Min: 2016 return Intrinsic::riscv_masked_atomicrmw_min_i64; 2017 case AtomicRMWInst::UMax: 2018 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 2019 case AtomicRMWInst::UMin: 2020 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 2021 } 2022 } 2023 2024 llvm_unreachable("Unexpected XLen\n"); 2025 } 2026 2027 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 2028 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 2029 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 2030 unsigned XLen = Subtarget.getXLen(); 2031 Value *Ordering = 2032 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 2033 Type *Tys[] = {AlignedAddr->getType()}; 2034 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 2035 AI->getModule(), 2036 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 2037 2038 if (XLen == 64) { 2039 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 2040 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 2041 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 2042 } 2043 2044 Value *Result; 2045 2046 // Must pass the shift amount needed to sign extend the loaded value prior 2047 // to performing a signed comparison for min/max. ShiftAmt is the number of 2048 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 2049 // is the number of bits to left+right shift the value in order to 2050 // sign-extend. 2051 if (AI->getOperation() == AtomicRMWInst::Min || 2052 AI->getOperation() == AtomicRMWInst::Max) { 2053 const DataLayout &DL = AI->getModule()->getDataLayout(); 2054 unsigned ValWidth = 2055 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 2056 Value *SextShamt = 2057 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 2058 Result = Builder.CreateCall(LrwOpScwLoop, 2059 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 2060 } else { 2061 Result = 2062 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 2063 } 2064 2065 if (XLen == 64) 2066 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 2067 return Result; 2068 } 2069 2070 TargetLowering::AtomicExpansionKind 2071 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 2072 AtomicCmpXchgInst *CI) const { 2073 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 2074 if (Size == 8 || Size == 16) 2075 return AtomicExpansionKind::MaskedIntrinsic; 2076 return AtomicExpansionKind::None; 2077 } 2078 2079 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 2080 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 2081 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 2082 unsigned XLen = Subtarget.getXLen(); 2083 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 2084 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 2085 if (XLen == 64) { 2086 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 2087 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 2088 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 2089 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 2090 } 2091 Type *Tys[] = {AlignedAddr->getType()}; 2092 Function *MaskedCmpXchg = 2093 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 2094 Value *Result = Builder.CreateCall( 2095 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 2096 if (XLen == 64) 2097 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 2098 return Result; 2099 } 2100