1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that RISCV uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "RISCVISelLowering.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/SelectionDAGISel.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/raw_ostream.h" 35 36 using namespace llvm; 37 38 #define DEBUG_TYPE "riscv-lower" 39 40 STATISTIC(NumTailCalls, "Number of tail calls"); 41 42 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 43 const RISCVSubtarget &STI) 44 : TargetLowering(TM), Subtarget(STI) { 45 46 MVT XLenVT = Subtarget.getXLenVT(); 47 48 // Set up the register classes. 49 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 50 51 if (Subtarget.hasStdExtF()) 52 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 53 if (Subtarget.hasStdExtD()) 54 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 55 56 // Compute derived properties from the register classes. 57 computeRegisterProperties(STI.getRegisterInfo()); 58 59 setStackPointerRegisterToSaveRestore(RISCV::X2); 60 61 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 62 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 63 64 // TODO: add all necessary setOperationAction calls. 65 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 66 67 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 68 setOperationAction(ISD::BR_CC, XLenVT, Expand); 69 setOperationAction(ISD::SELECT, XLenVT, Custom); 70 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 71 72 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 73 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 74 75 setOperationAction(ISD::VASTART, MVT::Other, Custom); 76 setOperationAction(ISD::VAARG, MVT::Other, Expand); 77 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 78 setOperationAction(ISD::VAEND, MVT::Other, Expand); 79 80 for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) 81 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 82 83 if (Subtarget.is64Bit()) { 84 setTargetDAGCombine(ISD::SHL); 85 setTargetDAGCombine(ISD::SRL); 86 setTargetDAGCombine(ISD::SRA); 87 setTargetDAGCombine(ISD::ANY_EXTEND); 88 } 89 90 if (!Subtarget.hasStdExtM()) { 91 setOperationAction(ISD::MUL, XLenVT, Expand); 92 setOperationAction(ISD::MULHS, XLenVT, Expand); 93 setOperationAction(ISD::MULHU, XLenVT, Expand); 94 setOperationAction(ISD::SDIV, XLenVT, Expand); 95 setOperationAction(ISD::UDIV, XLenVT, Expand); 96 setOperationAction(ISD::SREM, XLenVT, Expand); 97 setOperationAction(ISD::UREM, XLenVT, Expand); 98 } 99 100 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 101 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 102 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 103 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 104 105 setOperationAction(ISD::SHL_PARTS, XLenVT, Expand); 106 setOperationAction(ISD::SRL_PARTS, XLenVT, Expand); 107 setOperationAction(ISD::SRA_PARTS, XLenVT, Expand); 108 109 setOperationAction(ISD::ROTL, XLenVT, Expand); 110 setOperationAction(ISD::ROTR, XLenVT, Expand); 111 setOperationAction(ISD::BSWAP, XLenVT, Expand); 112 setOperationAction(ISD::CTTZ, XLenVT, Expand); 113 setOperationAction(ISD::CTLZ, XLenVT, Expand); 114 setOperationAction(ISD::CTPOP, XLenVT, Expand); 115 116 ISD::CondCode FPCCToExtend[] = { 117 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ, 118 ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, 119 ISD::SETGT, ISD::SETGE, ISD::SETNE}; 120 121 ISD::NodeType FPOpToExtend[] = { 122 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM}; 123 124 if (Subtarget.hasStdExtF()) { 125 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 126 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 127 for (auto CC : FPCCToExtend) 128 setCondCodeAction(CC, MVT::f32, Expand); 129 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 130 setOperationAction(ISD::SELECT, MVT::f32, Custom); 131 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 132 for (auto Op : FPOpToExtend) 133 setOperationAction(Op, MVT::f32, Expand); 134 } 135 136 if (Subtarget.hasStdExtD()) { 137 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 138 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 139 for (auto CC : FPCCToExtend) 140 setCondCodeAction(CC, MVT::f64, Expand); 141 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 142 setOperationAction(ISD::SELECT, MVT::f64, Custom); 143 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 144 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 145 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 146 for (auto Op : FPOpToExtend) 147 setOperationAction(Op, MVT::f64, Expand); 148 } 149 150 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 151 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 152 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 153 154 if (Subtarget.hasStdExtA()) { 155 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 156 setMinCmpXchgSizeInBits(32); 157 } else { 158 setMaxAtomicSizeInBitsSupported(0); 159 } 160 161 setBooleanContents(ZeroOrOneBooleanContent); 162 163 // Function alignments (log2). 164 unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2; 165 setMinFunctionAlignment(FunctionAlignment); 166 setPrefFunctionAlignment(FunctionAlignment); 167 168 // Effectively disable jump table generation. 169 setMinimumJumpTableEntries(INT_MAX); 170 } 171 172 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 173 EVT VT) const { 174 if (!VT.isVector()) 175 return getPointerTy(DL); 176 return VT.changeVectorElementTypeToInteger(); 177 } 178 179 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 180 const CallInst &I, 181 MachineFunction &MF, 182 unsigned Intrinsic) const { 183 switch (Intrinsic) { 184 default: 185 return false; 186 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 187 case Intrinsic::riscv_masked_atomicrmw_add_i32: 188 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 189 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 190 case Intrinsic::riscv_masked_atomicrmw_max_i32: 191 case Intrinsic::riscv_masked_atomicrmw_min_i32: 192 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 193 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 194 case Intrinsic::riscv_masked_cmpxchg_i32: 195 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 196 Info.opc = ISD::INTRINSIC_W_CHAIN; 197 Info.memVT = MVT::getVT(PtrTy->getElementType()); 198 Info.ptrVal = I.getArgOperand(0); 199 Info.offset = 0; 200 Info.align = 4; 201 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 202 MachineMemOperand::MOVolatile; 203 return true; 204 } 205 } 206 207 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 208 const AddrMode &AM, Type *Ty, 209 unsigned AS, 210 Instruction *I) const { 211 // No global is ever allowed as a base. 212 if (AM.BaseGV) 213 return false; 214 215 // Require a 12-bit signed offset. 216 if (!isInt<12>(AM.BaseOffs)) 217 return false; 218 219 switch (AM.Scale) { 220 case 0: // "r+i" or just "i", depending on HasBaseReg. 221 break; 222 case 1: 223 if (!AM.HasBaseReg) // allow "r+i". 224 break; 225 return false; // disallow "r+r" or "r+r+i". 226 default: 227 return false; 228 } 229 230 return true; 231 } 232 233 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 234 return isInt<12>(Imm); 235 } 236 237 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 238 return isInt<12>(Imm); 239 } 240 241 // On RV32, 64-bit integers are split into their high and low parts and held 242 // in two different registers, so the trunc is free since the low register can 243 // just be used. 244 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 245 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 246 return false; 247 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 248 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 249 return (SrcBits == 64 && DestBits == 32); 250 } 251 252 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 253 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 254 !SrcVT.isInteger() || !DstVT.isInteger()) 255 return false; 256 unsigned SrcBits = SrcVT.getSizeInBits(); 257 unsigned DestBits = DstVT.getSizeInBits(); 258 return (SrcBits == 64 && DestBits == 32); 259 } 260 261 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 262 // Zexts are free if they can be combined with a load. 263 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 264 EVT MemVT = LD->getMemoryVT(); 265 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 266 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 267 (LD->getExtensionType() == ISD::NON_EXTLOAD || 268 LD->getExtensionType() == ISD::ZEXTLOAD)) 269 return true; 270 } 271 272 return TargetLowering::isZExtFree(Val, VT2); 273 } 274 275 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 276 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 277 } 278 279 // Changes the condition code and swaps operands if necessary, so the SetCC 280 // operation matches one of the comparisons supported directly in the RISC-V 281 // ISA. 282 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 283 switch (CC) { 284 default: 285 break; 286 case ISD::SETGT: 287 case ISD::SETLE: 288 case ISD::SETUGT: 289 case ISD::SETULE: 290 CC = ISD::getSetCCSwappedOperands(CC); 291 std::swap(LHS, RHS); 292 break; 293 } 294 } 295 296 // Return the RISC-V branch opcode that matches the given DAG integer 297 // condition code. The CondCode must be one of those supported by the RISC-V 298 // ISA (see normaliseSetCC). 299 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 300 switch (CC) { 301 default: 302 llvm_unreachable("Unsupported CondCode"); 303 case ISD::SETEQ: 304 return RISCV::BEQ; 305 case ISD::SETNE: 306 return RISCV::BNE; 307 case ISD::SETLT: 308 return RISCV::BLT; 309 case ISD::SETGE: 310 return RISCV::BGE; 311 case ISD::SETULT: 312 return RISCV::BLTU; 313 case ISD::SETUGE: 314 return RISCV::BGEU; 315 } 316 } 317 318 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 319 SelectionDAG &DAG) const { 320 switch (Op.getOpcode()) { 321 default: 322 report_fatal_error("unimplemented operand"); 323 case ISD::GlobalAddress: 324 return lowerGlobalAddress(Op, DAG); 325 case ISD::BlockAddress: 326 return lowerBlockAddress(Op, DAG); 327 case ISD::ConstantPool: 328 return lowerConstantPool(Op, DAG); 329 case ISD::SELECT: 330 return lowerSELECT(Op, DAG); 331 case ISD::VASTART: 332 return lowerVASTART(Op, DAG); 333 case ISD::FRAMEADDR: 334 return lowerFRAMEADDR(Op, DAG); 335 case ISD::RETURNADDR: 336 return lowerRETURNADDR(Op, DAG); 337 } 338 } 339 340 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 341 SelectionDAG &DAG) const { 342 SDLoc DL(Op); 343 EVT Ty = Op.getValueType(); 344 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 345 const GlobalValue *GV = N->getGlobal(); 346 int64_t Offset = N->getOffset(); 347 MVT XLenVT = Subtarget.getXLenVT(); 348 349 if (isPositionIndependent()) 350 report_fatal_error("Unable to lowerGlobalAddress"); 351 // In order to maximise the opportunity for common subexpression elimination, 352 // emit a separate ADD node for the global address offset instead of folding 353 // it in the global address node. Later peephole optimisations may choose to 354 // fold it back in when profitable. 355 SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); 356 SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); 357 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); 358 SDValue MNLo = 359 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); 360 if (Offset != 0) 361 return DAG.getNode(ISD::ADD, DL, Ty, MNLo, 362 DAG.getConstant(Offset, DL, XLenVT)); 363 return MNLo; 364 } 365 366 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 367 SelectionDAG &DAG) const { 368 SDLoc DL(Op); 369 EVT Ty = Op.getValueType(); 370 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 371 const BlockAddress *BA = N->getBlockAddress(); 372 int64_t Offset = N->getOffset(); 373 374 if (isPositionIndependent()) 375 report_fatal_error("Unable to lowerBlockAddress"); 376 377 SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI); 378 SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO); 379 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0); 380 SDValue MNLo = 381 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0); 382 return MNLo; 383 } 384 385 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 386 SelectionDAG &DAG) const { 387 SDLoc DL(Op); 388 EVT Ty = Op.getValueType(); 389 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 390 const Constant *CPA = N->getConstVal(); 391 int64_t Offset = N->getOffset(); 392 unsigned Alignment = N->getAlignment(); 393 394 if (!isPositionIndependent()) { 395 SDValue CPAHi = 396 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI); 397 SDValue CPALo = 398 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO); 399 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0); 400 SDValue MNLo = 401 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0); 402 return MNLo; 403 } else { 404 report_fatal_error("Unable to lowerConstantPool"); 405 } 406 } 407 408 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 409 SDValue CondV = Op.getOperand(0); 410 SDValue TrueV = Op.getOperand(1); 411 SDValue FalseV = Op.getOperand(2); 412 SDLoc DL(Op); 413 MVT XLenVT = Subtarget.getXLenVT(); 414 415 // If the result type is XLenVT and CondV is the output of a SETCC node 416 // which also operated on XLenVT inputs, then merge the SETCC node into the 417 // lowered RISCVISD::SELECT_CC to take advantage of the integer 418 // compare+branch instructions. i.e.: 419 // (select (setcc lhs, rhs, cc), truev, falsev) 420 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 421 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 422 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 423 SDValue LHS = CondV.getOperand(0); 424 SDValue RHS = CondV.getOperand(1); 425 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 426 ISD::CondCode CCVal = CC->get(); 427 428 normaliseSetCC(LHS, RHS, CCVal); 429 430 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 431 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 432 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 433 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 434 } 435 436 // Otherwise: 437 // (select condv, truev, falsev) 438 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 439 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 440 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 441 442 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 443 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 444 445 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 446 } 447 448 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 449 MachineFunction &MF = DAG.getMachineFunction(); 450 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 451 452 SDLoc DL(Op); 453 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 454 getPointerTy(MF.getDataLayout())); 455 456 // vastart just stores the address of the VarArgsFrameIndex slot into the 457 // memory location argument. 458 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 459 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 460 MachinePointerInfo(SV)); 461 } 462 463 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 464 SelectionDAG &DAG) const { 465 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 466 MachineFunction &MF = DAG.getMachineFunction(); 467 MachineFrameInfo &MFI = MF.getFrameInfo(); 468 MFI.setFrameAddressIsTaken(true); 469 unsigned FrameReg = RI.getFrameRegister(MF); 470 int XLenInBytes = Subtarget.getXLen() / 8; 471 472 EVT VT = Op.getValueType(); 473 SDLoc DL(Op); 474 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 475 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 476 while (Depth--) { 477 int Offset = -(XLenInBytes * 2); 478 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 479 DAG.getIntPtrConstant(Offset, DL)); 480 FrameAddr = 481 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 482 } 483 return FrameAddr; 484 } 485 486 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 487 SelectionDAG &DAG) const { 488 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 489 MachineFunction &MF = DAG.getMachineFunction(); 490 MachineFrameInfo &MFI = MF.getFrameInfo(); 491 MFI.setReturnAddressIsTaken(true); 492 MVT XLenVT = Subtarget.getXLenVT(); 493 int XLenInBytes = Subtarget.getXLen() / 8; 494 495 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 496 return SDValue(); 497 498 EVT VT = Op.getValueType(); 499 SDLoc DL(Op); 500 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 501 if (Depth) { 502 int Off = -XLenInBytes; 503 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 504 SDValue Offset = DAG.getConstant(Off, DL, VT); 505 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 506 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 507 MachinePointerInfo()); 508 } 509 510 // Return the value of the return address register, marking it an implicit 511 // live-in. 512 unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 513 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 514 } 515 516 // Return true if the given node is a shift with a non-constant shift amount. 517 static bool isVariableShift(SDValue Val) { 518 switch (Val.getOpcode()) { 519 default: 520 return false; 521 case ISD::SHL: 522 case ISD::SRA: 523 case ISD::SRL: 524 return Val.getOperand(1).getOpcode() != ISD::Constant; 525 } 526 } 527 528 // Returns true if the given node is an sdiv, udiv, or urem with non-constant 529 // operands. 530 static bool isVariableSDivUDivURem(SDValue Val) { 531 switch (Val.getOpcode()) { 532 default: 533 return false; 534 case ISD::SDIV: 535 case ISD::UDIV: 536 case ISD::UREM: 537 return Val.getOperand(0).getOpcode() != ISD::Constant && 538 Val.getOperand(1).getOpcode() != ISD::Constant; 539 } 540 } 541 542 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 543 DAGCombinerInfo &DCI) const { 544 SelectionDAG &DAG = DCI.DAG; 545 546 switch (N->getOpcode()) { 547 default: 548 break; 549 case ISD::SHL: 550 case ISD::SRL: 551 case ISD::SRA: { 552 assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only"); 553 if (!DCI.isBeforeLegalize()) 554 break; 555 SDValue RHS = N->getOperand(1); 556 if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant || 557 (RHS->getOpcode() == ISD::AssertZext && 558 cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5)) 559 break; 560 SDValue LHS = N->getOperand(0); 561 SDLoc DL(N); 562 SDValue NewRHS = 563 DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS, 564 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5))); 565 return DCI.CombineTo( 566 N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS)); 567 } 568 case ISD::ANY_EXTEND: { 569 // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64, 570 // then instead sign-extend in order to increase the chance of being able 571 // to select the sllw/srlw/sraw/divw/divuw/remuw instructions. 572 SDValue Src = N->getOperand(0); 573 if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32) 574 break; 575 if (!isVariableShift(Src) && 576 !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src))) 577 break; 578 SDLoc DL(N); 579 return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src)); 580 } 581 case RISCVISD::SplitF64: { 582 // If the input to SplitF64 is just BuildPairF64 then the operation is 583 // redundant. Instead, use BuildPairF64's operands directly. 584 SDValue Op0 = N->getOperand(0); 585 if (Op0->getOpcode() != RISCVISD::BuildPairF64) 586 break; 587 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 588 } 589 } 590 591 return SDValue(); 592 } 593 594 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 595 MachineBasicBlock *BB) { 596 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 597 598 MachineFunction &MF = *BB->getParent(); 599 DebugLoc DL = MI.getDebugLoc(); 600 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 601 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 602 unsigned LoReg = MI.getOperand(0).getReg(); 603 unsigned HiReg = MI.getOperand(1).getReg(); 604 unsigned SrcReg = MI.getOperand(2).getReg(); 605 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 606 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 607 608 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 609 RI); 610 MachineMemOperand *MMO = 611 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 612 MachineMemOperand::MOLoad, 8, 8); 613 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 614 .addFrameIndex(FI) 615 .addImm(0) 616 .addMemOperand(MMO); 617 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 618 .addFrameIndex(FI) 619 .addImm(4) 620 .addMemOperand(MMO); 621 MI.eraseFromParent(); // The pseudo instruction is gone now. 622 return BB; 623 } 624 625 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 626 MachineBasicBlock *BB) { 627 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 628 "Unexpected instruction"); 629 630 MachineFunction &MF = *BB->getParent(); 631 DebugLoc DL = MI.getDebugLoc(); 632 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 633 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 634 unsigned DstReg = MI.getOperand(0).getReg(); 635 unsigned LoReg = MI.getOperand(1).getReg(); 636 unsigned HiReg = MI.getOperand(2).getReg(); 637 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 638 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 639 640 MachineMemOperand *MMO = 641 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 642 MachineMemOperand::MOStore, 8, 8); 643 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 644 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 645 .addFrameIndex(FI) 646 .addImm(0) 647 .addMemOperand(MMO); 648 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 649 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 650 .addFrameIndex(FI) 651 .addImm(4) 652 .addMemOperand(MMO); 653 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 654 MI.eraseFromParent(); // The pseudo instruction is gone now. 655 return BB; 656 } 657 658 MachineBasicBlock * 659 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 660 MachineBasicBlock *BB) const { 661 switch (MI.getOpcode()) { 662 default: 663 llvm_unreachable("Unexpected instr type to insert"); 664 case RISCV::Select_GPR_Using_CC_GPR: 665 case RISCV::Select_FPR32_Using_CC_GPR: 666 case RISCV::Select_FPR64_Using_CC_GPR: 667 break; 668 case RISCV::BuildPairF64Pseudo: 669 return emitBuildPairF64Pseudo(MI, BB); 670 case RISCV::SplitF64Pseudo: 671 return emitSplitF64Pseudo(MI, BB); 672 } 673 674 // To "insert" a SELECT instruction, we actually have to insert the triangle 675 // control-flow pattern. The incoming instruction knows the destination vreg 676 // to set, the condition code register to branch on, the true/false values to 677 // select between, and the condcode to use to select the appropriate branch. 678 // 679 // We produce the following control flow: 680 // HeadMBB 681 // | \ 682 // | IfFalseMBB 683 // | / 684 // TailMBB 685 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 686 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 687 DebugLoc DL = MI.getDebugLoc(); 688 MachineFunction::iterator I = ++BB->getIterator(); 689 690 MachineBasicBlock *HeadMBB = BB; 691 MachineFunction *F = BB->getParent(); 692 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 693 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 694 695 F->insert(I, IfFalseMBB); 696 F->insert(I, TailMBB); 697 // Move all remaining instructions to TailMBB. 698 TailMBB->splice(TailMBB->begin(), HeadMBB, 699 std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end()); 700 // Update machine-CFG edges by transferring all successors of the current 701 // block to the new block which will contain the Phi node for the select. 702 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 703 // Set the successors for HeadMBB. 704 HeadMBB->addSuccessor(IfFalseMBB); 705 HeadMBB->addSuccessor(TailMBB); 706 707 // Insert appropriate branch. 708 unsigned LHS = MI.getOperand(1).getReg(); 709 unsigned RHS = MI.getOperand(2).getReg(); 710 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 711 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 712 713 BuildMI(HeadMBB, DL, TII.get(Opcode)) 714 .addReg(LHS) 715 .addReg(RHS) 716 .addMBB(TailMBB); 717 718 // IfFalseMBB just falls through to TailMBB. 719 IfFalseMBB->addSuccessor(TailMBB); 720 721 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 722 BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), 723 MI.getOperand(0).getReg()) 724 .addReg(MI.getOperand(4).getReg()) 725 .addMBB(HeadMBB) 726 .addReg(MI.getOperand(5).getReg()) 727 .addMBB(IfFalseMBB); 728 729 MI.eraseFromParent(); // The pseudo instruction is gone now. 730 return TailMBB; 731 } 732 733 // Calling Convention Implementation. 734 // The expectations for frontend ABI lowering vary from target to target. 735 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 736 // details, but this is a longer term goal. For now, we simply try to keep the 737 // role of the frontend as simple and well-defined as possible. The rules can 738 // be summarised as: 739 // * Never split up large scalar arguments. We handle them here. 740 // * If a hardfloat calling convention is being used, and the struct may be 741 // passed in a pair of registers (fp+fp, int+fp), and both registers are 742 // available, then pass as two separate arguments. If either the GPRs or FPRs 743 // are exhausted, then pass according to the rule below. 744 // * If a struct could never be passed in registers or directly in a stack 745 // slot (as it is larger than 2*XLEN and the floating point rules don't 746 // apply), then pass it using a pointer with the byval attribute. 747 // * If a struct is less than 2*XLEN, then coerce to either a two-element 748 // word-sized array or a 2*XLEN scalar (depending on alignment). 749 // * The frontend can determine whether a struct is returned by reference or 750 // not based on its size and fields. If it will be returned by reference, the 751 // frontend must modify the prototype so a pointer with the sret annotation is 752 // passed as the first argument. This is not necessary for large scalar 753 // returns. 754 // * Struct return values and varargs should be coerced to structs containing 755 // register-size fields in the same situations they would be for fixed 756 // arguments. 757 758 static const MCPhysReg ArgGPRs[] = { 759 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 760 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 761 }; 762 763 // Pass a 2*XLEN argument that has been split into two XLEN values through 764 // registers or the stack as necessary. 765 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 766 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 767 MVT ValVT2, MVT LocVT2, 768 ISD::ArgFlagsTy ArgFlags2) { 769 unsigned XLenInBytes = XLen / 8; 770 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 771 // At least one half can be passed via register. 772 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 773 VA1.getLocVT(), CCValAssign::Full)); 774 } else { 775 // Both halves must be passed on the stack, with proper alignment. 776 unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign()); 777 State.addLoc( 778 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 779 State.AllocateStack(XLenInBytes, StackAlign), 780 VA1.getLocVT(), CCValAssign::Full)); 781 State.addLoc(CCValAssign::getMem( 782 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 783 CCValAssign::Full)); 784 return false; 785 } 786 787 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 788 // The second half can also be passed via register. 789 State.addLoc( 790 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 791 } else { 792 // The second half is passed via the stack, without additional alignment. 793 State.addLoc(CCValAssign::getMem( 794 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 795 CCValAssign::Full)); 796 } 797 798 return false; 799 } 800 801 // Implements the RISC-V calling convention. Returns true upon failure. 802 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, 803 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 804 CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { 805 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 806 assert(XLen == 32 || XLen == 64); 807 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 808 if (ValVT == MVT::f32) { 809 LocVT = MVT::i32; 810 LocInfo = CCValAssign::BCvt; 811 } 812 813 // Any return value split in to more than two values can't be returned 814 // directly. 815 if (IsRet && ValNo > 1) 816 return true; 817 818 // If this is a variadic argument, the RISC-V calling convention requires 819 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 820 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 821 // be used regardless of whether the original argument was split during 822 // legalisation or not. The argument will not be passed by registers if the 823 // original type is larger than 2*XLEN, so the register alignment rule does 824 // not apply. 825 unsigned TwoXLenInBytes = (2 * XLen) / 8; 826 if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && 827 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 828 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 829 // Skip 'odd' register if necessary. 830 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 831 State.AllocateReg(ArgGPRs); 832 } 833 834 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 835 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 836 State.getPendingArgFlags(); 837 838 assert(PendingLocs.size() == PendingArgFlags.size() && 839 "PendingLocs and PendingArgFlags out of sync"); 840 841 // Handle passing f64 on RV32D with a soft float ABI. 842 if (XLen == 32 && ValVT == MVT::f64) { 843 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 844 "Can't lower f64 if it is split"); 845 // Depending on available argument GPRS, f64 may be passed in a pair of 846 // GPRs, split between a GPR and the stack, or passed completely on the 847 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 848 // cases. 849 unsigned Reg = State.AllocateReg(ArgGPRs); 850 LocVT = MVT::i32; 851 if (!Reg) { 852 unsigned StackOffset = State.AllocateStack(8, 8); 853 State.addLoc( 854 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 855 return false; 856 } 857 if (!State.AllocateReg(ArgGPRs)) 858 State.AllocateStack(4, 4); 859 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 860 return false; 861 } 862 863 // Split arguments might be passed indirectly, so keep track of the pending 864 // values. 865 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 866 LocVT = XLenVT; 867 LocInfo = CCValAssign::Indirect; 868 PendingLocs.push_back( 869 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 870 PendingArgFlags.push_back(ArgFlags); 871 if (!ArgFlags.isSplitEnd()) { 872 return false; 873 } 874 } 875 876 // If the split argument only had two elements, it should be passed directly 877 // in registers or on the stack. 878 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 879 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 880 // Apply the normal calling convention rules to the first half of the 881 // split argument. 882 CCValAssign VA = PendingLocs[0]; 883 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 884 PendingLocs.clear(); 885 PendingArgFlags.clear(); 886 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 887 ArgFlags); 888 } 889 890 // Allocate to a register if possible, or else a stack slot. 891 unsigned Reg = State.AllocateReg(ArgGPRs); 892 unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); 893 894 // If we reach this point and PendingLocs is non-empty, we must be at the 895 // end of a split argument that must be passed indirectly. 896 if (!PendingLocs.empty()) { 897 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 898 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 899 900 for (auto &It : PendingLocs) { 901 if (Reg) 902 It.convertToReg(Reg); 903 else 904 It.convertToMem(StackOffset); 905 State.addLoc(It); 906 } 907 PendingLocs.clear(); 908 PendingArgFlags.clear(); 909 return false; 910 } 911 912 assert(LocVT == XLenVT && "Expected an XLenVT at this stage"); 913 914 if (Reg) { 915 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 916 return false; 917 } 918 919 if (ValVT == MVT::f32) { 920 LocVT = MVT::f32; 921 LocInfo = CCValAssign::Full; 922 } 923 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 924 return false; 925 } 926 927 void RISCVTargetLowering::analyzeInputArgs( 928 MachineFunction &MF, CCState &CCInfo, 929 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 930 unsigned NumArgs = Ins.size(); 931 FunctionType *FType = MF.getFunction().getFunctionType(); 932 933 for (unsigned i = 0; i != NumArgs; ++i) { 934 MVT ArgVT = Ins[i].VT; 935 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 936 937 Type *ArgTy = nullptr; 938 if (IsRet) 939 ArgTy = FType->getReturnType(); 940 else if (Ins[i].isOrigArg()) 941 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 942 943 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 944 ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { 945 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 946 << EVT(ArgVT).getEVTString() << '\n'); 947 llvm_unreachable(nullptr); 948 } 949 } 950 } 951 952 void RISCVTargetLowering::analyzeOutputArgs( 953 MachineFunction &MF, CCState &CCInfo, 954 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 955 CallLoweringInfo *CLI) const { 956 unsigned NumArgs = Outs.size(); 957 958 for (unsigned i = 0; i != NumArgs; i++) { 959 MVT ArgVT = Outs[i].VT; 960 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 961 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 962 963 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 964 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 965 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 966 << EVT(ArgVT).getEVTString() << "\n"); 967 llvm_unreachable(nullptr); 968 } 969 } 970 } 971 972 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 973 // values. 974 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 975 const CCValAssign &VA, const SDLoc &DL) { 976 switch (VA.getLocInfo()) { 977 default: 978 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 979 case CCValAssign::Full: 980 break; 981 case CCValAssign::BCvt: 982 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 983 break; 984 } 985 return Val; 986 } 987 988 // The caller is responsible for loading the full value if the argument is 989 // passed with CCValAssign::Indirect. 990 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 991 const CCValAssign &VA, const SDLoc &DL) { 992 MachineFunction &MF = DAG.getMachineFunction(); 993 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 994 EVT LocVT = VA.getLocVT(); 995 SDValue Val; 996 997 unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 998 RegInfo.addLiveIn(VA.getLocReg(), VReg); 999 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 1000 1001 if (VA.getLocInfo() == CCValAssign::Indirect) 1002 return Val; 1003 1004 return convertLocVTToValVT(DAG, Val, VA, DL); 1005 } 1006 1007 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 1008 const CCValAssign &VA, const SDLoc &DL) { 1009 EVT LocVT = VA.getLocVT(); 1010 1011 switch (VA.getLocInfo()) { 1012 default: 1013 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1014 case CCValAssign::Full: 1015 break; 1016 case CCValAssign::BCvt: 1017 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 1018 break; 1019 } 1020 return Val; 1021 } 1022 1023 // The caller is responsible for loading the full value if the argument is 1024 // passed with CCValAssign::Indirect. 1025 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 1026 const CCValAssign &VA, const SDLoc &DL) { 1027 MachineFunction &MF = DAG.getMachineFunction(); 1028 MachineFrameInfo &MFI = MF.getFrameInfo(); 1029 EVT LocVT = VA.getLocVT(); 1030 EVT ValVT = VA.getValVT(); 1031 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 1032 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 1033 VA.getLocMemOffset(), /*Immutable=*/true); 1034 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1035 SDValue Val; 1036 1037 ISD::LoadExtType ExtType; 1038 switch (VA.getLocInfo()) { 1039 default: 1040 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1041 case CCValAssign::Full: 1042 case CCValAssign::Indirect: 1043 ExtType = ISD::NON_EXTLOAD; 1044 break; 1045 } 1046 Val = DAG.getExtLoad( 1047 ExtType, DL, LocVT, Chain, FIN, 1048 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 1049 return Val; 1050 } 1051 1052 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 1053 const CCValAssign &VA, const SDLoc &DL) { 1054 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 1055 "Unexpected VA"); 1056 MachineFunction &MF = DAG.getMachineFunction(); 1057 MachineFrameInfo &MFI = MF.getFrameInfo(); 1058 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1059 1060 if (VA.isMemLoc()) { 1061 // f64 is passed on the stack. 1062 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 1063 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1064 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 1065 MachinePointerInfo::getFixedStack(MF, FI)); 1066 } 1067 1068 assert(VA.isRegLoc() && "Expected register VA assignment"); 1069 1070 unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1071 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 1072 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 1073 SDValue Hi; 1074 if (VA.getLocReg() == RISCV::X17) { 1075 // Second half of f64 is passed on the stack. 1076 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 1077 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1078 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 1079 MachinePointerInfo::getFixedStack(MF, FI)); 1080 } else { 1081 // Second half of f64 is passed in another GPR. 1082 unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1083 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 1084 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 1085 } 1086 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1087 } 1088 1089 // Transform physical registers into virtual registers. 1090 SDValue RISCVTargetLowering::LowerFormalArguments( 1091 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1092 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1093 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1094 1095 switch (CallConv) { 1096 default: 1097 report_fatal_error("Unsupported calling convention"); 1098 case CallingConv::C: 1099 case CallingConv::Fast: 1100 break; 1101 } 1102 1103 MachineFunction &MF = DAG.getMachineFunction(); 1104 1105 const Function &Func = MF.getFunction(); 1106 if (Func.hasFnAttribute("interrupt")) { 1107 if (!Func.arg_empty()) 1108 report_fatal_error( 1109 "Functions with the interrupt attribute cannot have arguments!"); 1110 1111 StringRef Kind = 1112 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1113 1114 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 1115 report_fatal_error( 1116 "Function interrupt attribute argument not supported!"); 1117 } 1118 1119 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1120 MVT XLenVT = Subtarget.getXLenVT(); 1121 unsigned XLenInBytes = Subtarget.getXLen() / 8; 1122 // Used with vargs to acumulate store chains. 1123 std::vector<SDValue> OutChains; 1124 1125 // Assign locations to all of the incoming arguments. 1126 SmallVector<CCValAssign, 16> ArgLocs; 1127 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1128 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 1129 1130 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1131 CCValAssign &VA = ArgLocs[i]; 1132 SDValue ArgValue; 1133 // Passing f64 on RV32D with a soft float ABI must be handled as a special 1134 // case. 1135 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 1136 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 1137 else if (VA.isRegLoc()) 1138 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); 1139 else 1140 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 1141 1142 if (VA.getLocInfo() == CCValAssign::Indirect) { 1143 // If the original argument was split and passed by reference (e.g. i128 1144 // on RV32), we need to load all parts of it here (using the same 1145 // address). 1146 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 1147 MachinePointerInfo())); 1148 unsigned ArgIndex = Ins[i].OrigArgIndex; 1149 assert(Ins[i].PartOffset == 0); 1150 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 1151 CCValAssign &PartVA = ArgLocs[i + 1]; 1152 unsigned PartOffset = Ins[i + 1].PartOffset; 1153 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 1154 DAG.getIntPtrConstant(PartOffset, DL)); 1155 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 1156 MachinePointerInfo())); 1157 ++i; 1158 } 1159 continue; 1160 } 1161 InVals.push_back(ArgValue); 1162 } 1163 1164 if (IsVarArg) { 1165 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 1166 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 1167 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 1168 MachineFrameInfo &MFI = MF.getFrameInfo(); 1169 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1170 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1171 1172 // Offset of the first variable argument from stack pointer, and size of 1173 // the vararg save area. For now, the varargs save area is either zero or 1174 // large enough to hold a0-a7. 1175 int VaArgOffset, VarArgsSaveSize; 1176 1177 // If all registers are allocated, then all varargs must be passed on the 1178 // stack and we don't need to save any argregs. 1179 if (ArgRegs.size() == Idx) { 1180 VaArgOffset = CCInfo.getNextStackOffset(); 1181 VarArgsSaveSize = 0; 1182 } else { 1183 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 1184 VaArgOffset = -VarArgsSaveSize; 1185 } 1186 1187 // Record the frame index of the first variable argument 1188 // which is a value necessary to VASTART. 1189 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1190 RVFI->setVarArgsFrameIndex(FI); 1191 1192 // If saving an odd number of registers then create an extra stack slot to 1193 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 1194 // offsets to even-numbered registered remain 2*XLEN-aligned. 1195 if (Idx % 2) { 1196 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, 1197 true); 1198 VarArgsSaveSize += XLenInBytes; 1199 } 1200 1201 // Copy the integer registers that may have been used for passing varargs 1202 // to the vararg save area. 1203 for (unsigned I = Idx; I < ArgRegs.size(); 1204 ++I, VaArgOffset += XLenInBytes) { 1205 const unsigned Reg = RegInfo.createVirtualRegister(RC); 1206 RegInfo.addLiveIn(ArgRegs[I], Reg); 1207 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 1208 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1209 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1210 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 1211 MachinePointerInfo::getFixedStack(MF, FI)); 1212 cast<StoreSDNode>(Store.getNode()) 1213 ->getMemOperand() 1214 ->setValue((Value *)nullptr); 1215 OutChains.push_back(Store); 1216 } 1217 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 1218 } 1219 1220 // All stores are grouped in one node to allow the matching between 1221 // the size of Ins and InVals. This only happens for vararg functions. 1222 if (!OutChains.empty()) { 1223 OutChains.push_back(Chain); 1224 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 1225 } 1226 1227 return Chain; 1228 } 1229 1230 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 1231 /// for tail call optimization. 1232 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 1233 bool RISCVTargetLowering::IsEligibleForTailCallOptimization( 1234 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 1235 const SmallVector<CCValAssign, 16> &ArgLocs) const { 1236 1237 auto &Callee = CLI.Callee; 1238 auto CalleeCC = CLI.CallConv; 1239 auto IsVarArg = CLI.IsVarArg; 1240 auto &Outs = CLI.Outs; 1241 auto &Caller = MF.getFunction(); 1242 auto CallerCC = Caller.getCallingConv(); 1243 1244 // Do not tail call opt functions with "disable-tail-calls" attribute. 1245 if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") 1246 return false; 1247 1248 // Exception-handling functions need a special set of instructions to 1249 // indicate a return to the hardware. Tail-calling another function would 1250 // probably break this. 1251 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 1252 // should be expanded as new function attributes are introduced. 1253 if (Caller.hasFnAttribute("interrupt")) 1254 return false; 1255 1256 // Do not tail call opt functions with varargs. 1257 if (IsVarArg) 1258 return false; 1259 1260 // Do not tail call opt if the stack is used to pass parameters. 1261 if (CCInfo.getNextStackOffset() != 0) 1262 return false; 1263 1264 // Do not tail call opt if any parameters need to be passed indirectly. 1265 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 1266 // passed indirectly. So the address of the value will be passed in a 1267 // register, or if not available, then the address is put on the stack. In 1268 // order to pass indirectly, space on the stack often needs to be allocated 1269 // in order to store the value. In this case the CCInfo.getNextStackOffset() 1270 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 1271 // are passed CCValAssign::Indirect. 1272 for (auto &VA : ArgLocs) 1273 if (VA.getLocInfo() == CCValAssign::Indirect) 1274 return false; 1275 1276 // Do not tail call opt if either caller or callee uses struct return 1277 // semantics. 1278 auto IsCallerStructRet = Caller.hasStructRetAttr(); 1279 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 1280 if (IsCallerStructRet || IsCalleeStructRet) 1281 return false; 1282 1283 // Externally-defined functions with weak linkage should not be 1284 // tail-called. The behaviour of branch instructions in this situation (as 1285 // used for tail calls) is implementation-defined, so we cannot rely on the 1286 // linker replacing the tail call with a return. 1287 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1288 const GlobalValue *GV = G->getGlobal(); 1289 if (GV->hasExternalWeakLinkage()) 1290 return false; 1291 } 1292 1293 // The callee has to preserve all registers the caller needs to preserve. 1294 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1295 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 1296 if (CalleeCC != CallerCC) { 1297 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 1298 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 1299 return false; 1300 } 1301 1302 // Byval parameters hand the function a pointer directly into the stack area 1303 // we want to reuse during a tail call. Working around this *is* possible 1304 // but less efficient and uglier in LowerCall. 1305 for (auto &Arg : Outs) 1306 if (Arg.Flags.isByVal()) 1307 return false; 1308 1309 return true; 1310 } 1311 1312 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 1313 // and output parameter nodes. 1314 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 1315 SmallVectorImpl<SDValue> &InVals) const { 1316 SelectionDAG &DAG = CLI.DAG; 1317 SDLoc &DL = CLI.DL; 1318 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1319 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1320 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1321 SDValue Chain = CLI.Chain; 1322 SDValue Callee = CLI.Callee; 1323 bool &IsTailCall = CLI.IsTailCall; 1324 CallingConv::ID CallConv = CLI.CallConv; 1325 bool IsVarArg = CLI.IsVarArg; 1326 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1327 MVT XLenVT = Subtarget.getXLenVT(); 1328 1329 MachineFunction &MF = DAG.getMachineFunction(); 1330 1331 // Analyze the operands of the call, assigning locations to each operand. 1332 SmallVector<CCValAssign, 16> ArgLocs; 1333 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1334 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 1335 1336 // Check if it's really possible to do a tail call. 1337 if (IsTailCall) 1338 IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, 1339 ArgLocs); 1340 1341 if (IsTailCall) 1342 ++NumTailCalls; 1343 else if (CLI.CS && CLI.CS.isMustTailCall()) 1344 report_fatal_error("failed to perform tail call elimination on a call " 1345 "site marked musttail"); 1346 1347 // Get a count of how many bytes are to be pushed on the stack. 1348 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 1349 1350 // Create local copies for byval args 1351 SmallVector<SDValue, 8> ByValArgs; 1352 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1353 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1354 if (!Flags.isByVal()) 1355 continue; 1356 1357 SDValue Arg = OutVals[i]; 1358 unsigned Size = Flags.getByValSize(); 1359 unsigned Align = Flags.getByValAlign(); 1360 1361 int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false); 1362 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1363 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 1364 1365 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, 1366 /*IsVolatile=*/false, 1367 /*AlwaysInline=*/false, 1368 IsTailCall, MachinePointerInfo(), 1369 MachinePointerInfo()); 1370 ByValArgs.push_back(FIPtr); 1371 } 1372 1373 if (!IsTailCall) 1374 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 1375 1376 // Copy argument values to their designated locations. 1377 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1378 SmallVector<SDValue, 8> MemOpChains; 1379 SDValue StackPtr; 1380 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 1381 CCValAssign &VA = ArgLocs[i]; 1382 SDValue ArgValue = OutVals[i]; 1383 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1384 1385 // Handle passing f64 on RV32D with a soft float ABI as a special case. 1386 bool IsF64OnRV32DSoftABI = 1387 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 1388 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 1389 SDValue SplitF64 = DAG.getNode( 1390 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 1391 SDValue Lo = SplitF64.getValue(0); 1392 SDValue Hi = SplitF64.getValue(1); 1393 1394 unsigned RegLo = VA.getLocReg(); 1395 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 1396 1397 if (RegLo == RISCV::X17) { 1398 // Second half of f64 is passed on the stack. 1399 // Work out the address of the stack slot. 1400 if (!StackPtr.getNode()) 1401 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1402 // Emit the store. 1403 MemOpChains.push_back( 1404 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 1405 } else { 1406 // Second half of f64 is passed in another GPR. 1407 unsigned RegHigh = RegLo + 1; 1408 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 1409 } 1410 continue; 1411 } 1412 1413 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 1414 // as any other MemLoc. 1415 1416 // Promote the value if needed. 1417 // For now, only handle fully promoted and indirect arguments. 1418 if (VA.getLocInfo() == CCValAssign::Indirect) { 1419 // Store the argument in a stack slot and pass its address. 1420 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 1421 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1422 MemOpChains.push_back( 1423 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 1424 MachinePointerInfo::getFixedStack(MF, FI))); 1425 // If the original argument was split (e.g. i128), we need 1426 // to store all parts of it here (and pass just one address). 1427 unsigned ArgIndex = Outs[i].OrigArgIndex; 1428 assert(Outs[i].PartOffset == 0); 1429 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 1430 SDValue PartValue = OutVals[i + 1]; 1431 unsigned PartOffset = Outs[i + 1].PartOffset; 1432 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 1433 DAG.getIntPtrConstant(PartOffset, DL)); 1434 MemOpChains.push_back( 1435 DAG.getStore(Chain, DL, PartValue, Address, 1436 MachinePointerInfo::getFixedStack(MF, FI))); 1437 ++i; 1438 } 1439 ArgValue = SpillSlot; 1440 } else { 1441 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 1442 } 1443 1444 // Use local copy if it is a byval arg. 1445 if (Flags.isByVal()) 1446 ArgValue = ByValArgs[j++]; 1447 1448 if (VA.isRegLoc()) { 1449 // Queue up the argument copies and emit them at the end. 1450 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 1451 } else { 1452 assert(VA.isMemLoc() && "Argument not register or memory"); 1453 assert(!IsTailCall && "Tail call not allowed if stack is used " 1454 "for passing parameters"); 1455 1456 // Work out the address of the stack slot. 1457 if (!StackPtr.getNode()) 1458 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1459 SDValue Address = 1460 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 1461 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 1462 1463 // Emit the store. 1464 MemOpChains.push_back( 1465 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 1466 } 1467 } 1468 1469 // Join the stores, which are independent of one another. 1470 if (!MemOpChains.empty()) 1471 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 1472 1473 SDValue Glue; 1474 1475 // Build a sequence of copy-to-reg nodes, chained and glued together. 1476 for (auto &Reg : RegsToPass) { 1477 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 1478 Glue = Chain.getValue(1); 1479 } 1480 1481 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 1482 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 1483 // split it and then direct call can be matched by PseudoCALL. 1484 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 1485 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0); 1486 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1487 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0); 1488 } 1489 1490 // The first call operand is the chain and the second is the target address. 1491 SmallVector<SDValue, 8> Ops; 1492 Ops.push_back(Chain); 1493 Ops.push_back(Callee); 1494 1495 // Add argument registers to the end of the list so that they are 1496 // known live into the call. 1497 for (auto &Reg : RegsToPass) 1498 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 1499 1500 if (!IsTailCall) { 1501 // Add a register mask operand representing the call-preserved registers. 1502 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1503 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 1504 assert(Mask && "Missing call preserved mask for calling convention"); 1505 Ops.push_back(DAG.getRegisterMask(Mask)); 1506 } 1507 1508 // Glue the call to the argument copies, if any. 1509 if (Glue.getNode()) 1510 Ops.push_back(Glue); 1511 1512 // Emit the call. 1513 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1514 1515 if (IsTailCall) { 1516 MF.getFrameInfo().setHasTailCall(); 1517 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 1518 } 1519 1520 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 1521 Glue = Chain.getValue(1); 1522 1523 // Mark the end of the call, which is glued to the call itself. 1524 Chain = DAG.getCALLSEQ_END(Chain, 1525 DAG.getConstant(NumBytes, DL, PtrVT, true), 1526 DAG.getConstant(0, DL, PtrVT, true), 1527 Glue, DL); 1528 Glue = Chain.getValue(1); 1529 1530 // Assign locations to each value returned by this call. 1531 SmallVector<CCValAssign, 16> RVLocs; 1532 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 1533 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 1534 1535 // Copy all of the result registers out of their specified physreg. 1536 for (auto &VA : RVLocs) { 1537 // Copy the value out 1538 SDValue RetValue = 1539 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 1540 // Glue the RetValue to the end of the call sequence 1541 Chain = RetValue.getValue(1); 1542 Glue = RetValue.getValue(2); 1543 1544 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1545 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 1546 SDValue RetValue2 = 1547 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 1548 Chain = RetValue2.getValue(1); 1549 Glue = RetValue2.getValue(2); 1550 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 1551 RetValue2); 1552 } 1553 1554 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 1555 1556 InVals.push_back(RetValue); 1557 } 1558 1559 return Chain; 1560 } 1561 1562 bool RISCVTargetLowering::CanLowerReturn( 1563 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 1564 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 1565 SmallVector<CCValAssign, 16> RVLocs; 1566 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 1567 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1568 MVT VT = Outs[i].VT; 1569 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1570 if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, 1571 CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) 1572 return false; 1573 } 1574 return true; 1575 } 1576 1577 SDValue 1578 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1579 bool IsVarArg, 1580 const SmallVectorImpl<ISD::OutputArg> &Outs, 1581 const SmallVectorImpl<SDValue> &OutVals, 1582 const SDLoc &DL, SelectionDAG &DAG) const { 1583 // Stores the assignment of the return value to a location. 1584 SmallVector<CCValAssign, 16> RVLocs; 1585 1586 // Info about the registers and stack slot. 1587 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 1588 *DAG.getContext()); 1589 1590 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 1591 nullptr); 1592 1593 SDValue Glue; 1594 SmallVector<SDValue, 4> RetOps(1, Chain); 1595 1596 // Copy the result values into the output registers. 1597 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 1598 SDValue Val = OutVals[i]; 1599 CCValAssign &VA = RVLocs[i]; 1600 assert(VA.isRegLoc() && "Can only return in registers!"); 1601 1602 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1603 // Handle returning f64 on RV32D with a soft float ABI. 1604 assert(VA.isRegLoc() && "Expected return via registers"); 1605 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 1606 DAG.getVTList(MVT::i32, MVT::i32), Val); 1607 SDValue Lo = SplitF64.getValue(0); 1608 SDValue Hi = SplitF64.getValue(1); 1609 unsigned RegLo = VA.getLocReg(); 1610 unsigned RegHi = RegLo + 1; 1611 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 1612 Glue = Chain.getValue(1); 1613 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 1614 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 1615 Glue = Chain.getValue(1); 1616 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 1617 } else { 1618 // Handle a 'normal' return. 1619 Val = convertValVTToLocVT(DAG, Val, VA, DL); 1620 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 1621 1622 // Guarantee that all emitted copies are stuck together. 1623 Glue = Chain.getValue(1); 1624 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 1625 } 1626 } 1627 1628 RetOps[0] = Chain; // Update chain. 1629 1630 // Add the glue node if we have it. 1631 if (Glue.getNode()) { 1632 RetOps.push_back(Glue); 1633 } 1634 1635 // Interrupt service routines use different return instructions. 1636 const Function &Func = DAG.getMachineFunction().getFunction(); 1637 if (Func.hasFnAttribute("interrupt")) { 1638 if (!Func.getReturnType()->isVoidTy()) 1639 report_fatal_error( 1640 "Functions with the interrupt attribute must have void return type!"); 1641 1642 MachineFunction &MF = DAG.getMachineFunction(); 1643 StringRef Kind = 1644 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1645 1646 unsigned RetOpc; 1647 if (Kind == "user") 1648 RetOpc = RISCVISD::URET_FLAG; 1649 else if (Kind == "supervisor") 1650 RetOpc = RISCVISD::SRET_FLAG; 1651 else 1652 RetOpc = RISCVISD::MRET_FLAG; 1653 1654 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 1655 } 1656 1657 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 1658 } 1659 1660 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 1661 switch ((RISCVISD::NodeType)Opcode) { 1662 case RISCVISD::FIRST_NUMBER: 1663 break; 1664 case RISCVISD::RET_FLAG: 1665 return "RISCVISD::RET_FLAG"; 1666 case RISCVISD::URET_FLAG: 1667 return "RISCVISD::URET_FLAG"; 1668 case RISCVISD::SRET_FLAG: 1669 return "RISCVISD::SRET_FLAG"; 1670 case RISCVISD::MRET_FLAG: 1671 return "RISCVISD::MRET_FLAG"; 1672 case RISCVISD::CALL: 1673 return "RISCVISD::CALL"; 1674 case RISCVISD::SELECT_CC: 1675 return "RISCVISD::SELECT_CC"; 1676 case RISCVISD::BuildPairF64: 1677 return "RISCVISD::BuildPairF64"; 1678 case RISCVISD::SplitF64: 1679 return "RISCVISD::SplitF64"; 1680 case RISCVISD::TAIL: 1681 return "RISCVISD::TAIL"; 1682 } 1683 return nullptr; 1684 } 1685 1686 std::pair<unsigned, const TargetRegisterClass *> 1687 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1688 StringRef Constraint, 1689 MVT VT) const { 1690 // First, see if this is a constraint that directly corresponds to a 1691 // RISCV register class. 1692 if (Constraint.size() == 1) { 1693 switch (Constraint[0]) { 1694 case 'r': 1695 return std::make_pair(0U, &RISCV::GPRRegClass); 1696 default: 1697 break; 1698 } 1699 } 1700 1701 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 1702 } 1703 1704 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 1705 Instruction *Inst, 1706 AtomicOrdering Ord) const { 1707 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 1708 return Builder.CreateFence(Ord); 1709 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 1710 return Builder.CreateFence(AtomicOrdering::Release); 1711 return nullptr; 1712 } 1713 1714 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 1715 Instruction *Inst, 1716 AtomicOrdering Ord) const { 1717 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 1718 return Builder.CreateFence(AtomicOrdering::Acquire); 1719 return nullptr; 1720 } 1721 1722 TargetLowering::AtomicExpansionKind 1723 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 1724 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 1725 if (Size == 8 || Size == 16) 1726 return AtomicExpansionKind::MaskedIntrinsic; 1727 return AtomicExpansionKind::None; 1728 } 1729 1730 static Intrinsic::ID 1731 getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) { 1732 switch (BinOp) { 1733 default: 1734 llvm_unreachable("Unexpected AtomicRMW BinOp"); 1735 case AtomicRMWInst::Xchg: 1736 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 1737 case AtomicRMWInst::Add: 1738 return Intrinsic::riscv_masked_atomicrmw_add_i32; 1739 case AtomicRMWInst::Sub: 1740 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 1741 case AtomicRMWInst::Nand: 1742 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 1743 case AtomicRMWInst::Max: 1744 return Intrinsic::riscv_masked_atomicrmw_max_i32; 1745 case AtomicRMWInst::Min: 1746 return Intrinsic::riscv_masked_atomicrmw_min_i32; 1747 case AtomicRMWInst::UMax: 1748 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 1749 case AtomicRMWInst::UMin: 1750 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 1751 } 1752 } 1753 1754 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 1755 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 1756 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 1757 Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering())); 1758 Type *Tys[] = {AlignedAddr->getType()}; 1759 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 1760 AI->getModule(), 1761 getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys); 1762 1763 // Must pass the shift amount needed to sign extend the loaded value prior 1764 // to performing a signed comparison for min/max. ShiftAmt is the number of 1765 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 1766 // is the number of bits to left+right shift the value in order to 1767 // sign-extend. 1768 if (AI->getOperation() == AtomicRMWInst::Min || 1769 AI->getOperation() == AtomicRMWInst::Max) { 1770 const DataLayout &DL = AI->getModule()->getDataLayout(); 1771 unsigned ValWidth = 1772 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 1773 Value *SextShamt = Builder.CreateSub( 1774 Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt); 1775 return Builder.CreateCall(LrwOpScwLoop, 1776 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 1777 } 1778 1779 return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 1780 } 1781 1782 TargetLowering::AtomicExpansionKind 1783 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 1784 AtomicCmpXchgInst *CI) const { 1785 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 1786 if (Size == 8 || Size == 16) 1787 return AtomicExpansionKind::MaskedIntrinsic; 1788 return AtomicExpansionKind::None; 1789 } 1790 1791 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 1792 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 1793 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 1794 Value *Ordering = Builder.getInt32(static_cast<uint32_t>(Ord)); 1795 Type *Tys[] = {AlignedAddr->getType()}; 1796 Function *MaskedCmpXchg = Intrinsic::getDeclaration( 1797 CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys); 1798 return Builder.CreateCall(MaskedCmpXchg, 1799 {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 1800 } 1801