1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that RISCV uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "RISCVISelLowering.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/SelectionDAGISel.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/raw_ostream.h" 35 36 using namespace llvm; 37 38 #define DEBUG_TYPE "riscv-lower" 39 40 STATISTIC(NumTailCalls, "Number of tail calls"); 41 42 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 43 const RISCVSubtarget &STI) 44 : TargetLowering(TM), Subtarget(STI) { 45 46 MVT XLenVT = Subtarget.getXLenVT(); 47 48 // Set up the register classes. 49 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 50 51 if (Subtarget.hasStdExtF()) 52 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 53 if (Subtarget.hasStdExtD()) 54 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 55 56 // Compute derived properties from the register classes. 57 computeRegisterProperties(STI.getRegisterInfo()); 58 59 setStackPointerRegisterToSaveRestore(RISCV::X2); 60 61 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 62 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 63 64 // TODO: add all necessary setOperationAction calls. 65 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 66 67 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 68 setOperationAction(ISD::BR_CC, XLenVT, Expand); 69 setOperationAction(ISD::SELECT, XLenVT, Custom); 70 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 71 72 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 73 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 74 75 setOperationAction(ISD::VASTART, MVT::Other, Custom); 76 setOperationAction(ISD::VAARG, MVT::Other, Expand); 77 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 78 setOperationAction(ISD::VAEND, MVT::Other, Expand); 79 80 for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) 81 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 82 83 if (!Subtarget.hasStdExtM()) { 84 setOperationAction(ISD::MUL, XLenVT, Expand); 85 setOperationAction(ISD::MULHS, XLenVT, Expand); 86 setOperationAction(ISD::MULHU, XLenVT, Expand); 87 setOperationAction(ISD::SDIV, XLenVT, Expand); 88 setOperationAction(ISD::UDIV, XLenVT, Expand); 89 setOperationAction(ISD::SREM, XLenVT, Expand); 90 setOperationAction(ISD::UREM, XLenVT, Expand); 91 } 92 93 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 94 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 95 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 96 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 97 98 setOperationAction(ISD::SHL_PARTS, XLenVT, Expand); 99 setOperationAction(ISD::SRL_PARTS, XLenVT, Expand); 100 setOperationAction(ISD::SRA_PARTS, XLenVT, Expand); 101 102 setOperationAction(ISD::ROTL, XLenVT, Expand); 103 setOperationAction(ISD::ROTR, XLenVT, Expand); 104 setOperationAction(ISD::BSWAP, XLenVT, Expand); 105 setOperationAction(ISD::CTTZ, XLenVT, Expand); 106 setOperationAction(ISD::CTLZ, XLenVT, Expand); 107 setOperationAction(ISD::CTPOP, XLenVT, Expand); 108 109 ISD::CondCode FPCCToExtend[] = { 110 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ, 111 ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, 112 ISD::SETGT, ISD::SETGE, ISD::SETNE}; 113 114 if (Subtarget.hasStdExtF()) { 115 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 116 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 117 for (auto CC : FPCCToExtend) 118 setCondCodeAction(CC, MVT::f32, Expand); 119 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 120 setOperationAction(ISD::SELECT, MVT::f32, Custom); 121 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 122 } 123 124 if (Subtarget.hasStdExtD()) { 125 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 126 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 127 for (auto CC : FPCCToExtend) 128 setCondCodeAction(CC, MVT::f64, Expand); 129 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 130 setOperationAction(ISD::SELECT, MVT::f64, Custom); 131 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 132 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 133 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 134 } 135 136 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 137 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 138 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 139 140 if (Subtarget.hasStdExtA()) { 141 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 142 setMinCmpXchgSizeInBits(32); 143 } else { 144 setMaxAtomicSizeInBitsSupported(0); 145 } 146 147 setBooleanContents(ZeroOrOneBooleanContent); 148 149 // Function alignments (log2). 150 unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2; 151 setMinFunctionAlignment(FunctionAlignment); 152 setPrefFunctionAlignment(FunctionAlignment); 153 154 // Effectively disable jump table generation. 155 setMinimumJumpTableEntries(INT_MAX); 156 } 157 158 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 159 EVT VT) const { 160 if (!VT.isVector()) 161 return getPointerTy(DL); 162 return VT.changeVectorElementTypeToInteger(); 163 } 164 165 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 166 const CallInst &I, 167 MachineFunction &MF, 168 unsigned Intrinsic) const { 169 switch (Intrinsic) { 170 default: 171 return false; 172 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 173 case Intrinsic::riscv_masked_atomicrmw_add_i32: 174 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 175 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 176 case Intrinsic::riscv_masked_atomicrmw_max_i32: 177 case Intrinsic::riscv_masked_atomicrmw_min_i32: 178 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 179 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 180 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 181 Info.opc = ISD::INTRINSIC_W_CHAIN; 182 Info.memVT = MVT::getVT(PtrTy->getElementType()); 183 Info.ptrVal = I.getArgOperand(0); 184 Info.offset = 0; 185 Info.align = 4; 186 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 187 MachineMemOperand::MOVolatile; 188 return true; 189 } 190 } 191 192 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 193 const AddrMode &AM, Type *Ty, 194 unsigned AS, 195 Instruction *I) const { 196 // No global is ever allowed as a base. 197 if (AM.BaseGV) 198 return false; 199 200 // Require a 12-bit signed offset. 201 if (!isInt<12>(AM.BaseOffs)) 202 return false; 203 204 switch (AM.Scale) { 205 case 0: // "r+i" or just "i", depending on HasBaseReg. 206 break; 207 case 1: 208 if (!AM.HasBaseReg) // allow "r+i". 209 break; 210 return false; // disallow "r+r" or "r+r+i". 211 default: 212 return false; 213 } 214 215 return true; 216 } 217 218 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 219 return isInt<12>(Imm); 220 } 221 222 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 223 return isInt<12>(Imm); 224 } 225 226 // On RV32, 64-bit integers are split into their high and low parts and held 227 // in two different registers, so the trunc is free since the low register can 228 // just be used. 229 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 230 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 231 return false; 232 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 233 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 234 return (SrcBits == 64 && DestBits == 32); 235 } 236 237 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 238 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 239 !SrcVT.isInteger() || !DstVT.isInteger()) 240 return false; 241 unsigned SrcBits = SrcVT.getSizeInBits(); 242 unsigned DestBits = DstVT.getSizeInBits(); 243 return (SrcBits == 64 && DestBits == 32); 244 } 245 246 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 247 // Zexts are free if they can be combined with a load. 248 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 249 EVT MemVT = LD->getMemoryVT(); 250 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 251 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 252 (LD->getExtensionType() == ISD::NON_EXTLOAD || 253 LD->getExtensionType() == ISD::ZEXTLOAD)) 254 return true; 255 } 256 257 return TargetLowering::isZExtFree(Val, VT2); 258 } 259 260 // Changes the condition code and swaps operands if necessary, so the SetCC 261 // operation matches one of the comparisons supported directly in the RISC-V 262 // ISA. 263 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 264 switch (CC) { 265 default: 266 break; 267 case ISD::SETGT: 268 case ISD::SETLE: 269 case ISD::SETUGT: 270 case ISD::SETULE: 271 CC = ISD::getSetCCSwappedOperands(CC); 272 std::swap(LHS, RHS); 273 break; 274 } 275 } 276 277 // Return the RISC-V branch opcode that matches the given DAG integer 278 // condition code. The CondCode must be one of those supported by the RISC-V 279 // ISA (see normaliseSetCC). 280 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 281 switch (CC) { 282 default: 283 llvm_unreachable("Unsupported CondCode"); 284 case ISD::SETEQ: 285 return RISCV::BEQ; 286 case ISD::SETNE: 287 return RISCV::BNE; 288 case ISD::SETLT: 289 return RISCV::BLT; 290 case ISD::SETGE: 291 return RISCV::BGE; 292 case ISD::SETULT: 293 return RISCV::BLTU; 294 case ISD::SETUGE: 295 return RISCV::BGEU; 296 } 297 } 298 299 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 300 SelectionDAG &DAG) const { 301 switch (Op.getOpcode()) { 302 default: 303 report_fatal_error("unimplemented operand"); 304 case ISD::GlobalAddress: 305 return lowerGlobalAddress(Op, DAG); 306 case ISD::BlockAddress: 307 return lowerBlockAddress(Op, DAG); 308 case ISD::ConstantPool: 309 return lowerConstantPool(Op, DAG); 310 case ISD::SELECT: 311 return lowerSELECT(Op, DAG); 312 case ISD::VASTART: 313 return lowerVASTART(Op, DAG); 314 case ISD::FRAMEADDR: 315 return lowerFRAMEADDR(Op, DAG); 316 case ISD::RETURNADDR: 317 return lowerRETURNADDR(Op, DAG); 318 } 319 } 320 321 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 322 SelectionDAG &DAG) const { 323 SDLoc DL(Op); 324 EVT Ty = Op.getValueType(); 325 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 326 const GlobalValue *GV = N->getGlobal(); 327 int64_t Offset = N->getOffset(); 328 MVT XLenVT = Subtarget.getXLenVT(); 329 330 if (isPositionIndependent()) 331 report_fatal_error("Unable to lowerGlobalAddress"); 332 // In order to maximise the opportunity for common subexpression elimination, 333 // emit a separate ADD node for the global address offset instead of folding 334 // it in the global address node. Later peephole optimisations may choose to 335 // fold it back in when profitable. 336 SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); 337 SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); 338 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); 339 SDValue MNLo = 340 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); 341 if (Offset != 0) 342 return DAG.getNode(ISD::ADD, DL, Ty, MNLo, 343 DAG.getConstant(Offset, DL, XLenVT)); 344 return MNLo; 345 } 346 347 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 348 SelectionDAG &DAG) const { 349 SDLoc DL(Op); 350 EVT Ty = Op.getValueType(); 351 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 352 const BlockAddress *BA = N->getBlockAddress(); 353 int64_t Offset = N->getOffset(); 354 355 if (isPositionIndependent()) 356 report_fatal_error("Unable to lowerBlockAddress"); 357 358 SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI); 359 SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO); 360 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0); 361 SDValue MNLo = 362 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0); 363 return MNLo; 364 } 365 366 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 367 SelectionDAG &DAG) const { 368 SDLoc DL(Op); 369 EVT Ty = Op.getValueType(); 370 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 371 const Constant *CPA = N->getConstVal(); 372 int64_t Offset = N->getOffset(); 373 unsigned Alignment = N->getAlignment(); 374 375 if (!isPositionIndependent()) { 376 SDValue CPAHi = 377 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI); 378 SDValue CPALo = 379 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO); 380 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0); 381 SDValue MNLo = 382 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0); 383 return MNLo; 384 } else { 385 report_fatal_error("Unable to lowerConstantPool"); 386 } 387 } 388 389 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 390 SDValue CondV = Op.getOperand(0); 391 SDValue TrueV = Op.getOperand(1); 392 SDValue FalseV = Op.getOperand(2); 393 SDLoc DL(Op); 394 MVT XLenVT = Subtarget.getXLenVT(); 395 396 // If the result type is XLenVT and CondV is the output of a SETCC node 397 // which also operated on XLenVT inputs, then merge the SETCC node into the 398 // lowered RISCVISD::SELECT_CC to take advantage of the integer 399 // compare+branch instructions. i.e.: 400 // (select (setcc lhs, rhs, cc), truev, falsev) 401 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 402 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 403 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 404 SDValue LHS = CondV.getOperand(0); 405 SDValue RHS = CondV.getOperand(1); 406 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 407 ISD::CondCode CCVal = CC->get(); 408 409 normaliseSetCC(LHS, RHS, CCVal); 410 411 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 412 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 413 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 414 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 415 } 416 417 // Otherwise: 418 // (select condv, truev, falsev) 419 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 420 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 421 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 422 423 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 424 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 425 426 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 427 } 428 429 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 430 MachineFunction &MF = DAG.getMachineFunction(); 431 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 432 433 SDLoc DL(Op); 434 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 435 getPointerTy(MF.getDataLayout())); 436 437 // vastart just stores the address of the VarArgsFrameIndex slot into the 438 // memory location argument. 439 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 440 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 441 MachinePointerInfo(SV)); 442 } 443 444 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 445 SelectionDAG &DAG) const { 446 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 447 MachineFunction &MF = DAG.getMachineFunction(); 448 MachineFrameInfo &MFI = MF.getFrameInfo(); 449 MFI.setFrameAddressIsTaken(true); 450 unsigned FrameReg = RI.getFrameRegister(MF); 451 int XLenInBytes = Subtarget.getXLen() / 8; 452 453 EVT VT = Op.getValueType(); 454 SDLoc DL(Op); 455 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 456 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 457 while (Depth--) { 458 int Offset = -(XLenInBytes * 2); 459 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 460 DAG.getIntPtrConstant(Offset, DL)); 461 FrameAddr = 462 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 463 } 464 return FrameAddr; 465 } 466 467 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 468 SelectionDAG &DAG) const { 469 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 470 MachineFunction &MF = DAG.getMachineFunction(); 471 MachineFrameInfo &MFI = MF.getFrameInfo(); 472 MFI.setReturnAddressIsTaken(true); 473 MVT XLenVT = Subtarget.getXLenVT(); 474 int XLenInBytes = Subtarget.getXLen() / 8; 475 476 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 477 return SDValue(); 478 479 EVT VT = Op.getValueType(); 480 SDLoc DL(Op); 481 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 482 if (Depth) { 483 int Off = -XLenInBytes; 484 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 485 SDValue Offset = DAG.getConstant(Off, DL, VT); 486 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 487 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 488 MachinePointerInfo()); 489 } 490 491 // Return the value of the return address register, marking it an implicit 492 // live-in. 493 unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 494 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 495 } 496 497 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 498 DAGCombinerInfo &DCI) const { 499 switch (N->getOpcode()) { 500 default: 501 break; 502 case RISCVISD::SplitF64: { 503 // If the input to SplitF64 is just BuildPairF64 then the operation is 504 // redundant. Instead, use BuildPairF64's operands directly. 505 SDValue Op0 = N->getOperand(0); 506 if (Op0->getOpcode() != RISCVISD::BuildPairF64) 507 break; 508 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 509 } 510 } 511 512 return SDValue(); 513 } 514 515 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 516 MachineBasicBlock *BB) { 517 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 518 519 MachineFunction &MF = *BB->getParent(); 520 DebugLoc DL = MI.getDebugLoc(); 521 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 522 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 523 unsigned LoReg = MI.getOperand(0).getReg(); 524 unsigned HiReg = MI.getOperand(1).getReg(); 525 unsigned SrcReg = MI.getOperand(2).getReg(); 526 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 527 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 528 529 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 530 RI); 531 MachineMemOperand *MMO = 532 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 533 MachineMemOperand::MOLoad, 8, 8); 534 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 535 .addFrameIndex(FI) 536 .addImm(0) 537 .addMemOperand(MMO); 538 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 539 .addFrameIndex(FI) 540 .addImm(4) 541 .addMemOperand(MMO); 542 MI.eraseFromParent(); // The pseudo instruction is gone now. 543 return BB; 544 } 545 546 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 547 MachineBasicBlock *BB) { 548 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 549 "Unexpected instruction"); 550 551 MachineFunction &MF = *BB->getParent(); 552 DebugLoc DL = MI.getDebugLoc(); 553 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 554 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 555 unsigned DstReg = MI.getOperand(0).getReg(); 556 unsigned LoReg = MI.getOperand(1).getReg(); 557 unsigned HiReg = MI.getOperand(2).getReg(); 558 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 559 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 560 561 MachineMemOperand *MMO = 562 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 563 MachineMemOperand::MOStore, 8, 8); 564 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 565 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 566 .addFrameIndex(FI) 567 .addImm(0) 568 .addMemOperand(MMO); 569 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 570 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 571 .addFrameIndex(FI) 572 .addImm(4) 573 .addMemOperand(MMO); 574 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 575 MI.eraseFromParent(); // The pseudo instruction is gone now. 576 return BB; 577 } 578 579 MachineBasicBlock * 580 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 581 MachineBasicBlock *BB) const { 582 switch (MI.getOpcode()) { 583 default: 584 llvm_unreachable("Unexpected instr type to insert"); 585 case RISCV::Select_GPR_Using_CC_GPR: 586 case RISCV::Select_FPR32_Using_CC_GPR: 587 case RISCV::Select_FPR64_Using_CC_GPR: 588 break; 589 case RISCV::BuildPairF64Pseudo: 590 return emitBuildPairF64Pseudo(MI, BB); 591 case RISCV::SplitF64Pseudo: 592 return emitSplitF64Pseudo(MI, BB); 593 } 594 595 // To "insert" a SELECT instruction, we actually have to insert the triangle 596 // control-flow pattern. The incoming instruction knows the destination vreg 597 // to set, the condition code register to branch on, the true/false values to 598 // select between, and the condcode to use to select the appropriate branch. 599 // 600 // We produce the following control flow: 601 // HeadMBB 602 // | \ 603 // | IfFalseMBB 604 // | / 605 // TailMBB 606 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 607 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 608 DebugLoc DL = MI.getDebugLoc(); 609 MachineFunction::iterator I = ++BB->getIterator(); 610 611 MachineBasicBlock *HeadMBB = BB; 612 MachineFunction *F = BB->getParent(); 613 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 614 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 615 616 F->insert(I, IfFalseMBB); 617 F->insert(I, TailMBB); 618 // Move all remaining instructions to TailMBB. 619 TailMBB->splice(TailMBB->begin(), HeadMBB, 620 std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end()); 621 // Update machine-CFG edges by transferring all successors of the current 622 // block to the new block which will contain the Phi node for the select. 623 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 624 // Set the successors for HeadMBB. 625 HeadMBB->addSuccessor(IfFalseMBB); 626 HeadMBB->addSuccessor(TailMBB); 627 628 // Insert appropriate branch. 629 unsigned LHS = MI.getOperand(1).getReg(); 630 unsigned RHS = MI.getOperand(2).getReg(); 631 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 632 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 633 634 BuildMI(HeadMBB, DL, TII.get(Opcode)) 635 .addReg(LHS) 636 .addReg(RHS) 637 .addMBB(TailMBB); 638 639 // IfFalseMBB just falls through to TailMBB. 640 IfFalseMBB->addSuccessor(TailMBB); 641 642 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 643 BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), 644 MI.getOperand(0).getReg()) 645 .addReg(MI.getOperand(4).getReg()) 646 .addMBB(HeadMBB) 647 .addReg(MI.getOperand(5).getReg()) 648 .addMBB(IfFalseMBB); 649 650 MI.eraseFromParent(); // The pseudo instruction is gone now. 651 return TailMBB; 652 } 653 654 // Calling Convention Implementation. 655 // The expectations for frontend ABI lowering vary from target to target. 656 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 657 // details, but this is a longer term goal. For now, we simply try to keep the 658 // role of the frontend as simple and well-defined as possible. The rules can 659 // be summarised as: 660 // * Never split up large scalar arguments. We handle them here. 661 // * If a hardfloat calling convention is being used, and the struct may be 662 // passed in a pair of registers (fp+fp, int+fp), and both registers are 663 // available, then pass as two separate arguments. If either the GPRs or FPRs 664 // are exhausted, then pass according to the rule below. 665 // * If a struct could never be passed in registers or directly in a stack 666 // slot (as it is larger than 2*XLEN and the floating point rules don't 667 // apply), then pass it using a pointer with the byval attribute. 668 // * If a struct is less than 2*XLEN, then coerce to either a two-element 669 // word-sized array or a 2*XLEN scalar (depending on alignment). 670 // * The frontend can determine whether a struct is returned by reference or 671 // not based on its size and fields. If it will be returned by reference, the 672 // frontend must modify the prototype so a pointer with the sret annotation is 673 // passed as the first argument. This is not necessary for large scalar 674 // returns. 675 // * Struct return values and varargs should be coerced to structs containing 676 // register-size fields in the same situations they would be for fixed 677 // arguments. 678 679 static const MCPhysReg ArgGPRs[] = { 680 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 681 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 682 }; 683 684 // Pass a 2*XLEN argument that has been split into two XLEN values through 685 // registers or the stack as necessary. 686 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 687 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 688 MVT ValVT2, MVT LocVT2, 689 ISD::ArgFlagsTy ArgFlags2) { 690 unsigned XLenInBytes = XLen / 8; 691 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 692 // At least one half can be passed via register. 693 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 694 VA1.getLocVT(), CCValAssign::Full)); 695 } else { 696 // Both halves must be passed on the stack, with proper alignment. 697 unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign()); 698 State.addLoc( 699 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 700 State.AllocateStack(XLenInBytes, StackAlign), 701 VA1.getLocVT(), CCValAssign::Full)); 702 State.addLoc(CCValAssign::getMem( 703 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 704 CCValAssign::Full)); 705 return false; 706 } 707 708 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 709 // The second half can also be passed via register. 710 State.addLoc( 711 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 712 } else { 713 // The second half is passed via the stack, without additional alignment. 714 State.addLoc(CCValAssign::getMem( 715 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 716 CCValAssign::Full)); 717 } 718 719 return false; 720 } 721 722 // Implements the RISC-V calling convention. Returns true upon failure. 723 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, 724 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 725 CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { 726 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 727 assert(XLen == 32 || XLen == 64); 728 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 729 if (ValVT == MVT::f32) { 730 LocVT = MVT::i32; 731 LocInfo = CCValAssign::BCvt; 732 } 733 734 // Any return value split in to more than two values can't be returned 735 // directly. 736 if (IsRet && ValNo > 1) 737 return true; 738 739 // If this is a variadic argument, the RISC-V calling convention requires 740 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 741 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 742 // be used regardless of whether the original argument was split during 743 // legalisation or not. The argument will not be passed by registers if the 744 // original type is larger than 2*XLEN, so the register alignment rule does 745 // not apply. 746 unsigned TwoXLenInBytes = (2 * XLen) / 8; 747 if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && 748 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 749 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 750 // Skip 'odd' register if necessary. 751 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 752 State.AllocateReg(ArgGPRs); 753 } 754 755 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 756 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 757 State.getPendingArgFlags(); 758 759 assert(PendingLocs.size() == PendingArgFlags.size() && 760 "PendingLocs and PendingArgFlags out of sync"); 761 762 // Handle passing f64 on RV32D with a soft float ABI. 763 if (XLen == 32 && ValVT == MVT::f64) { 764 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 765 "Can't lower f64 if it is split"); 766 // Depending on available argument GPRS, f64 may be passed in a pair of 767 // GPRs, split between a GPR and the stack, or passed completely on the 768 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 769 // cases. 770 unsigned Reg = State.AllocateReg(ArgGPRs); 771 LocVT = MVT::i32; 772 if (!Reg) { 773 unsigned StackOffset = State.AllocateStack(8, 8); 774 State.addLoc( 775 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 776 return false; 777 } 778 if (!State.AllocateReg(ArgGPRs)) 779 State.AllocateStack(4, 4); 780 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 781 return false; 782 } 783 784 // Split arguments might be passed indirectly, so keep track of the pending 785 // values. 786 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 787 LocVT = XLenVT; 788 LocInfo = CCValAssign::Indirect; 789 PendingLocs.push_back( 790 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 791 PendingArgFlags.push_back(ArgFlags); 792 if (!ArgFlags.isSplitEnd()) { 793 return false; 794 } 795 } 796 797 // If the split argument only had two elements, it should be passed directly 798 // in registers or on the stack. 799 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 800 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 801 // Apply the normal calling convention rules to the first half of the 802 // split argument. 803 CCValAssign VA = PendingLocs[0]; 804 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 805 PendingLocs.clear(); 806 PendingArgFlags.clear(); 807 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 808 ArgFlags); 809 } 810 811 // Allocate to a register if possible, or else a stack slot. 812 unsigned Reg = State.AllocateReg(ArgGPRs); 813 unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); 814 815 // If we reach this point and PendingLocs is non-empty, we must be at the 816 // end of a split argument that must be passed indirectly. 817 if (!PendingLocs.empty()) { 818 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 819 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 820 821 for (auto &It : PendingLocs) { 822 if (Reg) 823 It.convertToReg(Reg); 824 else 825 It.convertToMem(StackOffset); 826 State.addLoc(It); 827 } 828 PendingLocs.clear(); 829 PendingArgFlags.clear(); 830 return false; 831 } 832 833 assert(LocVT == XLenVT && "Expected an XLenVT at this stage"); 834 835 if (Reg) { 836 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 837 return false; 838 } 839 840 if (ValVT == MVT::f32) { 841 LocVT = MVT::f32; 842 LocInfo = CCValAssign::Full; 843 } 844 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 845 return false; 846 } 847 848 void RISCVTargetLowering::analyzeInputArgs( 849 MachineFunction &MF, CCState &CCInfo, 850 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 851 unsigned NumArgs = Ins.size(); 852 FunctionType *FType = MF.getFunction().getFunctionType(); 853 854 for (unsigned i = 0; i != NumArgs; ++i) { 855 MVT ArgVT = Ins[i].VT; 856 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 857 858 Type *ArgTy = nullptr; 859 if (IsRet) 860 ArgTy = FType->getReturnType(); 861 else if (Ins[i].isOrigArg()) 862 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 863 864 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 865 ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { 866 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 867 << EVT(ArgVT).getEVTString() << '\n'); 868 llvm_unreachable(nullptr); 869 } 870 } 871 } 872 873 void RISCVTargetLowering::analyzeOutputArgs( 874 MachineFunction &MF, CCState &CCInfo, 875 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 876 CallLoweringInfo *CLI) const { 877 unsigned NumArgs = Outs.size(); 878 879 for (unsigned i = 0; i != NumArgs; i++) { 880 MVT ArgVT = Outs[i].VT; 881 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 882 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 883 884 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 885 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 886 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 887 << EVT(ArgVT).getEVTString() << "\n"); 888 llvm_unreachable(nullptr); 889 } 890 } 891 } 892 893 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 894 // values. 895 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 896 const CCValAssign &VA, const SDLoc &DL) { 897 switch (VA.getLocInfo()) { 898 default: 899 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 900 case CCValAssign::Full: 901 break; 902 case CCValAssign::BCvt: 903 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 904 break; 905 } 906 return Val; 907 } 908 909 // The caller is responsible for loading the full value if the argument is 910 // passed with CCValAssign::Indirect. 911 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 912 const CCValAssign &VA, const SDLoc &DL) { 913 MachineFunction &MF = DAG.getMachineFunction(); 914 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 915 EVT LocVT = VA.getLocVT(); 916 SDValue Val; 917 918 unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 919 RegInfo.addLiveIn(VA.getLocReg(), VReg); 920 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 921 922 if (VA.getLocInfo() == CCValAssign::Indirect) 923 return Val; 924 925 return convertLocVTToValVT(DAG, Val, VA, DL); 926 } 927 928 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 929 const CCValAssign &VA, const SDLoc &DL) { 930 EVT LocVT = VA.getLocVT(); 931 932 switch (VA.getLocInfo()) { 933 default: 934 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 935 case CCValAssign::Full: 936 break; 937 case CCValAssign::BCvt: 938 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 939 break; 940 } 941 return Val; 942 } 943 944 // The caller is responsible for loading the full value if the argument is 945 // passed with CCValAssign::Indirect. 946 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 947 const CCValAssign &VA, const SDLoc &DL) { 948 MachineFunction &MF = DAG.getMachineFunction(); 949 MachineFrameInfo &MFI = MF.getFrameInfo(); 950 EVT LocVT = VA.getLocVT(); 951 EVT ValVT = VA.getValVT(); 952 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 953 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 954 VA.getLocMemOffset(), /*Immutable=*/true); 955 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 956 SDValue Val; 957 958 ISD::LoadExtType ExtType; 959 switch (VA.getLocInfo()) { 960 default: 961 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 962 case CCValAssign::Full: 963 case CCValAssign::Indirect: 964 ExtType = ISD::NON_EXTLOAD; 965 break; 966 } 967 Val = DAG.getExtLoad( 968 ExtType, DL, LocVT, Chain, FIN, 969 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 970 return Val; 971 } 972 973 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 974 const CCValAssign &VA, const SDLoc &DL) { 975 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 976 "Unexpected VA"); 977 MachineFunction &MF = DAG.getMachineFunction(); 978 MachineFrameInfo &MFI = MF.getFrameInfo(); 979 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 980 981 if (VA.isMemLoc()) { 982 // f64 is passed on the stack. 983 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 984 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 985 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 986 MachinePointerInfo::getFixedStack(MF, FI)); 987 } 988 989 assert(VA.isRegLoc() && "Expected register VA assignment"); 990 991 unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 992 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 993 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 994 SDValue Hi; 995 if (VA.getLocReg() == RISCV::X17) { 996 // Second half of f64 is passed on the stack. 997 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 998 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 999 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 1000 MachinePointerInfo::getFixedStack(MF, FI)); 1001 } else { 1002 // Second half of f64 is passed in another GPR. 1003 unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1004 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 1005 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 1006 } 1007 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1008 } 1009 1010 // Transform physical registers into virtual registers. 1011 SDValue RISCVTargetLowering::LowerFormalArguments( 1012 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1013 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1014 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1015 1016 switch (CallConv) { 1017 default: 1018 report_fatal_error("Unsupported calling convention"); 1019 case CallingConv::C: 1020 case CallingConv::Fast: 1021 break; 1022 } 1023 1024 MachineFunction &MF = DAG.getMachineFunction(); 1025 1026 const Function &Func = MF.getFunction(); 1027 if (Func.hasFnAttribute("interrupt")) { 1028 if (!Func.arg_empty()) 1029 report_fatal_error( 1030 "Functions with the interrupt attribute cannot have arguments!"); 1031 1032 StringRef Kind = 1033 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1034 1035 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 1036 report_fatal_error( 1037 "Function interrupt attribute argument not supported!"); 1038 } 1039 1040 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1041 MVT XLenVT = Subtarget.getXLenVT(); 1042 unsigned XLenInBytes = Subtarget.getXLen() / 8; 1043 // Used with vargs to acumulate store chains. 1044 std::vector<SDValue> OutChains; 1045 1046 // Assign locations to all of the incoming arguments. 1047 SmallVector<CCValAssign, 16> ArgLocs; 1048 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1049 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 1050 1051 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1052 CCValAssign &VA = ArgLocs[i]; 1053 SDValue ArgValue; 1054 // Passing f64 on RV32D with a soft float ABI must be handled as a special 1055 // case. 1056 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 1057 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 1058 else if (VA.isRegLoc()) 1059 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); 1060 else 1061 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 1062 1063 if (VA.getLocInfo() == CCValAssign::Indirect) { 1064 // If the original argument was split and passed by reference (e.g. i128 1065 // on RV32), we need to load all parts of it here (using the same 1066 // address). 1067 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 1068 MachinePointerInfo())); 1069 unsigned ArgIndex = Ins[i].OrigArgIndex; 1070 assert(Ins[i].PartOffset == 0); 1071 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 1072 CCValAssign &PartVA = ArgLocs[i + 1]; 1073 unsigned PartOffset = Ins[i + 1].PartOffset; 1074 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 1075 DAG.getIntPtrConstant(PartOffset, DL)); 1076 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 1077 MachinePointerInfo())); 1078 ++i; 1079 } 1080 continue; 1081 } 1082 InVals.push_back(ArgValue); 1083 } 1084 1085 if (IsVarArg) { 1086 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 1087 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 1088 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 1089 MachineFrameInfo &MFI = MF.getFrameInfo(); 1090 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1091 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1092 1093 // Offset of the first variable argument from stack pointer, and size of 1094 // the vararg save area. For now, the varargs save area is either zero or 1095 // large enough to hold a0-a7. 1096 int VaArgOffset, VarArgsSaveSize; 1097 1098 // If all registers are allocated, then all varargs must be passed on the 1099 // stack and we don't need to save any argregs. 1100 if (ArgRegs.size() == Idx) { 1101 VaArgOffset = CCInfo.getNextStackOffset(); 1102 VarArgsSaveSize = 0; 1103 } else { 1104 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 1105 VaArgOffset = -VarArgsSaveSize; 1106 } 1107 1108 // Record the frame index of the first variable argument 1109 // which is a value necessary to VASTART. 1110 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1111 RVFI->setVarArgsFrameIndex(FI); 1112 1113 // If saving an odd number of registers then create an extra stack slot to 1114 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 1115 // offsets to even-numbered registered remain 2*XLEN-aligned. 1116 if (Idx % 2) { 1117 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, 1118 true); 1119 VarArgsSaveSize += XLenInBytes; 1120 } 1121 1122 // Copy the integer registers that may have been used for passing varargs 1123 // to the vararg save area. 1124 for (unsigned I = Idx; I < ArgRegs.size(); 1125 ++I, VaArgOffset += XLenInBytes) { 1126 const unsigned Reg = RegInfo.createVirtualRegister(RC); 1127 RegInfo.addLiveIn(ArgRegs[I], Reg); 1128 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 1129 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1130 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1131 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 1132 MachinePointerInfo::getFixedStack(MF, FI)); 1133 cast<StoreSDNode>(Store.getNode()) 1134 ->getMemOperand() 1135 ->setValue((Value *)nullptr); 1136 OutChains.push_back(Store); 1137 } 1138 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 1139 } 1140 1141 // All stores are grouped in one node to allow the matching between 1142 // the size of Ins and InVals. This only happens for vararg functions. 1143 if (!OutChains.empty()) { 1144 OutChains.push_back(Chain); 1145 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 1146 } 1147 1148 return Chain; 1149 } 1150 1151 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 1152 /// for tail call optimization. 1153 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 1154 bool RISCVTargetLowering::IsEligibleForTailCallOptimization( 1155 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 1156 const SmallVector<CCValAssign, 16> &ArgLocs) const { 1157 1158 auto &Callee = CLI.Callee; 1159 auto CalleeCC = CLI.CallConv; 1160 auto IsVarArg = CLI.IsVarArg; 1161 auto &Outs = CLI.Outs; 1162 auto &Caller = MF.getFunction(); 1163 auto CallerCC = Caller.getCallingConv(); 1164 1165 // Do not tail call opt functions with "disable-tail-calls" attribute. 1166 if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") 1167 return false; 1168 1169 // Exception-handling functions need a special set of instructions to 1170 // indicate a return to the hardware. Tail-calling another function would 1171 // probably break this. 1172 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 1173 // should be expanded as new function attributes are introduced. 1174 if (Caller.hasFnAttribute("interrupt")) 1175 return false; 1176 1177 // Do not tail call opt functions with varargs. 1178 if (IsVarArg) 1179 return false; 1180 1181 // Do not tail call opt if the stack is used to pass parameters. 1182 if (CCInfo.getNextStackOffset() != 0) 1183 return false; 1184 1185 // Do not tail call opt if any parameters need to be passed indirectly. 1186 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 1187 // passed indirectly. So the address of the value will be passed in a 1188 // register, or if not available, then the address is put on the stack. In 1189 // order to pass indirectly, space on the stack often needs to be allocated 1190 // in order to store the value. In this case the CCInfo.getNextStackOffset() 1191 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 1192 // are passed CCValAssign::Indirect. 1193 for (auto &VA : ArgLocs) 1194 if (VA.getLocInfo() == CCValAssign::Indirect) 1195 return false; 1196 1197 // Do not tail call opt if either caller or callee uses struct return 1198 // semantics. 1199 auto IsCallerStructRet = Caller.hasStructRetAttr(); 1200 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 1201 if (IsCallerStructRet || IsCalleeStructRet) 1202 return false; 1203 1204 // Externally-defined functions with weak linkage should not be 1205 // tail-called. The behaviour of branch instructions in this situation (as 1206 // used for tail calls) is implementation-defined, so we cannot rely on the 1207 // linker replacing the tail call with a return. 1208 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1209 const GlobalValue *GV = G->getGlobal(); 1210 if (GV->hasExternalWeakLinkage()) 1211 return false; 1212 } 1213 1214 // The callee has to preserve all registers the caller needs to preserve. 1215 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1216 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 1217 if (CalleeCC != CallerCC) { 1218 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 1219 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 1220 return false; 1221 } 1222 1223 // Byval parameters hand the function a pointer directly into the stack area 1224 // we want to reuse during a tail call. Working around this *is* possible 1225 // but less efficient and uglier in LowerCall. 1226 for (auto &Arg : Outs) 1227 if (Arg.Flags.isByVal()) 1228 return false; 1229 1230 return true; 1231 } 1232 1233 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 1234 // and output parameter nodes. 1235 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 1236 SmallVectorImpl<SDValue> &InVals) const { 1237 SelectionDAG &DAG = CLI.DAG; 1238 SDLoc &DL = CLI.DL; 1239 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1240 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1241 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1242 SDValue Chain = CLI.Chain; 1243 SDValue Callee = CLI.Callee; 1244 bool &IsTailCall = CLI.IsTailCall; 1245 CallingConv::ID CallConv = CLI.CallConv; 1246 bool IsVarArg = CLI.IsVarArg; 1247 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1248 MVT XLenVT = Subtarget.getXLenVT(); 1249 1250 MachineFunction &MF = DAG.getMachineFunction(); 1251 1252 // Analyze the operands of the call, assigning locations to each operand. 1253 SmallVector<CCValAssign, 16> ArgLocs; 1254 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1255 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 1256 1257 // Check if it's really possible to do a tail call. 1258 if (IsTailCall) 1259 IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, 1260 ArgLocs); 1261 1262 if (IsTailCall) 1263 ++NumTailCalls; 1264 else if (CLI.CS && CLI.CS.isMustTailCall()) 1265 report_fatal_error("failed to perform tail call elimination on a call " 1266 "site marked musttail"); 1267 1268 // Get a count of how many bytes are to be pushed on the stack. 1269 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 1270 1271 // Create local copies for byval args 1272 SmallVector<SDValue, 8> ByValArgs; 1273 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1274 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1275 if (!Flags.isByVal()) 1276 continue; 1277 1278 SDValue Arg = OutVals[i]; 1279 unsigned Size = Flags.getByValSize(); 1280 unsigned Align = Flags.getByValAlign(); 1281 1282 int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false); 1283 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1284 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 1285 1286 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, 1287 /*IsVolatile=*/false, 1288 /*AlwaysInline=*/false, 1289 IsTailCall, MachinePointerInfo(), 1290 MachinePointerInfo()); 1291 ByValArgs.push_back(FIPtr); 1292 } 1293 1294 if (!IsTailCall) 1295 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 1296 1297 // Copy argument values to their designated locations. 1298 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1299 SmallVector<SDValue, 8> MemOpChains; 1300 SDValue StackPtr; 1301 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 1302 CCValAssign &VA = ArgLocs[i]; 1303 SDValue ArgValue = OutVals[i]; 1304 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1305 1306 // Handle passing f64 on RV32D with a soft float ABI as a special case. 1307 bool IsF64OnRV32DSoftABI = 1308 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 1309 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 1310 SDValue SplitF64 = DAG.getNode( 1311 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 1312 SDValue Lo = SplitF64.getValue(0); 1313 SDValue Hi = SplitF64.getValue(1); 1314 1315 unsigned RegLo = VA.getLocReg(); 1316 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 1317 1318 if (RegLo == RISCV::X17) { 1319 // Second half of f64 is passed on the stack. 1320 // Work out the address of the stack slot. 1321 if (!StackPtr.getNode()) 1322 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1323 // Emit the store. 1324 MemOpChains.push_back( 1325 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 1326 } else { 1327 // Second half of f64 is passed in another GPR. 1328 unsigned RegHigh = RegLo + 1; 1329 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 1330 } 1331 continue; 1332 } 1333 1334 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 1335 // as any other MemLoc. 1336 1337 // Promote the value if needed. 1338 // For now, only handle fully promoted and indirect arguments. 1339 if (VA.getLocInfo() == CCValAssign::Indirect) { 1340 // Store the argument in a stack slot and pass its address. 1341 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 1342 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1343 MemOpChains.push_back( 1344 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 1345 MachinePointerInfo::getFixedStack(MF, FI))); 1346 // If the original argument was split (e.g. i128), we need 1347 // to store all parts of it here (and pass just one address). 1348 unsigned ArgIndex = Outs[i].OrigArgIndex; 1349 assert(Outs[i].PartOffset == 0); 1350 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 1351 SDValue PartValue = OutVals[i + 1]; 1352 unsigned PartOffset = Outs[i + 1].PartOffset; 1353 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 1354 DAG.getIntPtrConstant(PartOffset, DL)); 1355 MemOpChains.push_back( 1356 DAG.getStore(Chain, DL, PartValue, Address, 1357 MachinePointerInfo::getFixedStack(MF, FI))); 1358 ++i; 1359 } 1360 ArgValue = SpillSlot; 1361 } else { 1362 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 1363 } 1364 1365 // Use local copy if it is a byval arg. 1366 if (Flags.isByVal()) 1367 ArgValue = ByValArgs[j++]; 1368 1369 if (VA.isRegLoc()) { 1370 // Queue up the argument copies and emit them at the end. 1371 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 1372 } else { 1373 assert(VA.isMemLoc() && "Argument not register or memory"); 1374 assert(!IsTailCall && "Tail call not allowed if stack is used " 1375 "for passing parameters"); 1376 1377 // Work out the address of the stack slot. 1378 if (!StackPtr.getNode()) 1379 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1380 SDValue Address = 1381 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 1382 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 1383 1384 // Emit the store. 1385 MemOpChains.push_back( 1386 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 1387 } 1388 } 1389 1390 // Join the stores, which are independent of one another. 1391 if (!MemOpChains.empty()) 1392 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 1393 1394 SDValue Glue; 1395 1396 // Build a sequence of copy-to-reg nodes, chained and glued together. 1397 for (auto &Reg : RegsToPass) { 1398 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 1399 Glue = Chain.getValue(1); 1400 } 1401 1402 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 1403 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 1404 // split it and then direct call can be matched by PseudoCALL. 1405 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 1406 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0); 1407 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1408 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0); 1409 } 1410 1411 // The first call operand is the chain and the second is the target address. 1412 SmallVector<SDValue, 8> Ops; 1413 Ops.push_back(Chain); 1414 Ops.push_back(Callee); 1415 1416 // Add argument registers to the end of the list so that they are 1417 // known live into the call. 1418 for (auto &Reg : RegsToPass) 1419 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 1420 1421 if (!IsTailCall) { 1422 // Add a register mask operand representing the call-preserved registers. 1423 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1424 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 1425 assert(Mask && "Missing call preserved mask for calling convention"); 1426 Ops.push_back(DAG.getRegisterMask(Mask)); 1427 } 1428 1429 // Glue the call to the argument copies, if any. 1430 if (Glue.getNode()) 1431 Ops.push_back(Glue); 1432 1433 // Emit the call. 1434 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1435 1436 if (IsTailCall) { 1437 MF.getFrameInfo().setHasTailCall(); 1438 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 1439 } 1440 1441 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 1442 Glue = Chain.getValue(1); 1443 1444 // Mark the end of the call, which is glued to the call itself. 1445 Chain = DAG.getCALLSEQ_END(Chain, 1446 DAG.getConstant(NumBytes, DL, PtrVT, true), 1447 DAG.getConstant(0, DL, PtrVT, true), 1448 Glue, DL); 1449 Glue = Chain.getValue(1); 1450 1451 // Assign locations to each value returned by this call. 1452 SmallVector<CCValAssign, 16> RVLocs; 1453 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 1454 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 1455 1456 // Copy all of the result registers out of their specified physreg. 1457 for (auto &VA : RVLocs) { 1458 // Copy the value out 1459 SDValue RetValue = 1460 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 1461 // Glue the RetValue to the end of the call sequence 1462 Chain = RetValue.getValue(1); 1463 Glue = RetValue.getValue(2); 1464 1465 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1466 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 1467 SDValue RetValue2 = 1468 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 1469 Chain = RetValue2.getValue(1); 1470 Glue = RetValue2.getValue(2); 1471 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 1472 RetValue2); 1473 } 1474 1475 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 1476 1477 InVals.push_back(RetValue); 1478 } 1479 1480 return Chain; 1481 } 1482 1483 bool RISCVTargetLowering::CanLowerReturn( 1484 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 1485 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 1486 SmallVector<CCValAssign, 16> RVLocs; 1487 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 1488 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1489 MVT VT = Outs[i].VT; 1490 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1491 if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, 1492 CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) 1493 return false; 1494 } 1495 return true; 1496 } 1497 1498 SDValue 1499 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1500 bool IsVarArg, 1501 const SmallVectorImpl<ISD::OutputArg> &Outs, 1502 const SmallVectorImpl<SDValue> &OutVals, 1503 const SDLoc &DL, SelectionDAG &DAG) const { 1504 // Stores the assignment of the return value to a location. 1505 SmallVector<CCValAssign, 16> RVLocs; 1506 1507 // Info about the registers and stack slot. 1508 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 1509 *DAG.getContext()); 1510 1511 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 1512 nullptr); 1513 1514 SDValue Glue; 1515 SmallVector<SDValue, 4> RetOps(1, Chain); 1516 1517 // Copy the result values into the output registers. 1518 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 1519 SDValue Val = OutVals[i]; 1520 CCValAssign &VA = RVLocs[i]; 1521 assert(VA.isRegLoc() && "Can only return in registers!"); 1522 1523 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1524 // Handle returning f64 on RV32D with a soft float ABI. 1525 assert(VA.isRegLoc() && "Expected return via registers"); 1526 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 1527 DAG.getVTList(MVT::i32, MVT::i32), Val); 1528 SDValue Lo = SplitF64.getValue(0); 1529 SDValue Hi = SplitF64.getValue(1); 1530 unsigned RegLo = VA.getLocReg(); 1531 unsigned RegHi = RegLo + 1; 1532 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 1533 Glue = Chain.getValue(1); 1534 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 1535 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 1536 Glue = Chain.getValue(1); 1537 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 1538 } else { 1539 // Handle a 'normal' return. 1540 Val = convertValVTToLocVT(DAG, Val, VA, DL); 1541 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 1542 1543 // Guarantee that all emitted copies are stuck together. 1544 Glue = Chain.getValue(1); 1545 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 1546 } 1547 } 1548 1549 RetOps[0] = Chain; // Update chain. 1550 1551 // Add the glue node if we have it. 1552 if (Glue.getNode()) { 1553 RetOps.push_back(Glue); 1554 } 1555 1556 // Interrupt service routines use different return instructions. 1557 const Function &Func = DAG.getMachineFunction().getFunction(); 1558 if (Func.hasFnAttribute("interrupt")) { 1559 if (!Func.getReturnType()->isVoidTy()) 1560 report_fatal_error( 1561 "Functions with the interrupt attribute must have void return type!"); 1562 1563 MachineFunction &MF = DAG.getMachineFunction(); 1564 StringRef Kind = 1565 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1566 1567 unsigned RetOpc; 1568 if (Kind == "user") 1569 RetOpc = RISCVISD::URET_FLAG; 1570 else if (Kind == "supervisor") 1571 RetOpc = RISCVISD::SRET_FLAG; 1572 else 1573 RetOpc = RISCVISD::MRET_FLAG; 1574 1575 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 1576 } 1577 1578 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 1579 } 1580 1581 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 1582 switch ((RISCVISD::NodeType)Opcode) { 1583 case RISCVISD::FIRST_NUMBER: 1584 break; 1585 case RISCVISD::RET_FLAG: 1586 return "RISCVISD::RET_FLAG"; 1587 case RISCVISD::URET_FLAG: 1588 return "RISCVISD::URET_FLAG"; 1589 case RISCVISD::SRET_FLAG: 1590 return "RISCVISD::SRET_FLAG"; 1591 case RISCVISD::MRET_FLAG: 1592 return "RISCVISD::MRET_FLAG"; 1593 case RISCVISD::CALL: 1594 return "RISCVISD::CALL"; 1595 case RISCVISD::SELECT_CC: 1596 return "RISCVISD::SELECT_CC"; 1597 case RISCVISD::BuildPairF64: 1598 return "RISCVISD::BuildPairF64"; 1599 case RISCVISD::SplitF64: 1600 return "RISCVISD::SplitF64"; 1601 case RISCVISD::TAIL: 1602 return "RISCVISD::TAIL"; 1603 } 1604 return nullptr; 1605 } 1606 1607 std::pair<unsigned, const TargetRegisterClass *> 1608 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1609 StringRef Constraint, 1610 MVT VT) const { 1611 // First, see if this is a constraint that directly corresponds to a 1612 // RISCV register class. 1613 if (Constraint.size() == 1) { 1614 switch (Constraint[0]) { 1615 case 'r': 1616 return std::make_pair(0U, &RISCV::GPRRegClass); 1617 default: 1618 break; 1619 } 1620 } 1621 1622 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 1623 } 1624 1625 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 1626 Instruction *Inst, 1627 AtomicOrdering Ord) const { 1628 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 1629 return Builder.CreateFence(Ord); 1630 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 1631 return Builder.CreateFence(AtomicOrdering::Release); 1632 return nullptr; 1633 } 1634 1635 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 1636 Instruction *Inst, 1637 AtomicOrdering Ord) const { 1638 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 1639 return Builder.CreateFence(AtomicOrdering::Acquire); 1640 return nullptr; 1641 } 1642 1643 TargetLowering::AtomicExpansionKind 1644 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 1645 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 1646 if (Size == 8 || Size == 16) 1647 return AtomicExpansionKind::MaskedIntrinsic; 1648 return AtomicExpansionKind::None; 1649 } 1650 1651 static Intrinsic::ID 1652 getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) { 1653 switch (BinOp) { 1654 default: 1655 llvm_unreachable("Unexpected AtomicRMW BinOp"); 1656 case AtomicRMWInst::Xchg: 1657 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 1658 case AtomicRMWInst::Add: 1659 return Intrinsic::riscv_masked_atomicrmw_add_i32; 1660 case AtomicRMWInst::Sub: 1661 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 1662 case AtomicRMWInst::Nand: 1663 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 1664 case AtomicRMWInst::Max: 1665 return Intrinsic::riscv_masked_atomicrmw_max_i32; 1666 case AtomicRMWInst::Min: 1667 return Intrinsic::riscv_masked_atomicrmw_min_i32; 1668 case AtomicRMWInst::UMax: 1669 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 1670 case AtomicRMWInst::UMin: 1671 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 1672 } 1673 } 1674 1675 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 1676 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 1677 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 1678 Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering())); 1679 Type *Tys[] = {AlignedAddr->getType()}; 1680 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 1681 AI->getModule(), 1682 getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys); 1683 1684 // Must pass the shift amount needed to sign extend the loaded value prior 1685 // to performing a signed comparison for min/max. ShiftAmt is the number of 1686 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 1687 // is the number of bits to left+right shift the value in order to 1688 // sign-extend. 1689 if (AI->getOperation() == AtomicRMWInst::Min || 1690 AI->getOperation() == AtomicRMWInst::Max) { 1691 const DataLayout &DL = AI->getModule()->getDataLayout(); 1692 unsigned ValWidth = 1693 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 1694 Value *SextShamt = Builder.CreateSub( 1695 Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt); 1696 return Builder.CreateCall(LrwOpScwLoop, 1697 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 1698 } 1699 1700 return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 1701 } 1702