1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVRegisterInfo.h" 18 #include "RISCVSubtarget.h" 19 #include "RISCVTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAGISel.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/CodeGen/ValueTypes.h" 29 #include "llvm/IR/DiagnosticInfo.h" 30 #include "llvm/IR/DiagnosticPrinter.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "riscv-lower" 38 39 STATISTIC(NumTailCalls, "Number of tail calls"); 40 41 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 42 const RISCVSubtarget &STI) 43 : TargetLowering(TM), Subtarget(STI) { 44 45 MVT XLenVT = Subtarget.getXLenVT(); 46 47 // Set up the register classes. 48 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 49 50 if (Subtarget.hasStdExtF()) 51 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 52 if (Subtarget.hasStdExtD()) 53 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 54 55 // Compute derived properties from the register classes. 56 computeRegisterProperties(STI.getRegisterInfo()); 57 58 setStackPointerRegisterToSaveRestore(RISCV::X2); 59 60 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 61 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 62 63 // TODO: add all necessary setOperationAction calls. 64 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 65 66 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 67 setOperationAction(ISD::BR_CC, XLenVT, Expand); 68 setOperationAction(ISD::SELECT, XLenVT, Custom); 69 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 70 71 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 72 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 73 74 setOperationAction(ISD::VASTART, MVT::Other, Custom); 75 setOperationAction(ISD::VAARG, MVT::Other, Expand); 76 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 77 setOperationAction(ISD::VAEND, MVT::Other, Expand); 78 79 for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) 80 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 81 82 if (Subtarget.is64Bit()) { 83 setTargetDAGCombine(ISD::SHL); 84 setTargetDAGCombine(ISD::SRL); 85 setTargetDAGCombine(ISD::SRA); 86 setTargetDAGCombine(ISD::ANY_EXTEND); 87 } 88 89 if (!Subtarget.hasStdExtM()) { 90 setOperationAction(ISD::MUL, XLenVT, Expand); 91 setOperationAction(ISD::MULHS, XLenVT, Expand); 92 setOperationAction(ISD::MULHU, XLenVT, Expand); 93 setOperationAction(ISD::SDIV, XLenVT, Expand); 94 setOperationAction(ISD::UDIV, XLenVT, Expand); 95 setOperationAction(ISD::SREM, XLenVT, Expand); 96 setOperationAction(ISD::UREM, XLenVT, Expand); 97 } 98 99 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 100 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 101 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 102 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 103 104 setOperationAction(ISD::SHL_PARTS, XLenVT, Expand); 105 setOperationAction(ISD::SRL_PARTS, XLenVT, Expand); 106 setOperationAction(ISD::SRA_PARTS, XLenVT, Expand); 107 108 setOperationAction(ISD::ROTL, XLenVT, Expand); 109 setOperationAction(ISD::ROTR, XLenVT, Expand); 110 setOperationAction(ISD::BSWAP, XLenVT, Expand); 111 setOperationAction(ISD::CTTZ, XLenVT, Expand); 112 setOperationAction(ISD::CTLZ, XLenVT, Expand); 113 setOperationAction(ISD::CTPOP, XLenVT, Expand); 114 115 ISD::CondCode FPCCToExtend[] = { 116 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ, 117 ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, 118 ISD::SETGT, ISD::SETGE, ISD::SETNE}; 119 120 ISD::NodeType FPOpToExtend[] = { 121 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM}; 122 123 if (Subtarget.hasStdExtF()) { 124 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 125 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 126 for (auto CC : FPCCToExtend) 127 setCondCodeAction(CC, MVT::f32, Expand); 128 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 129 setOperationAction(ISD::SELECT, MVT::f32, Custom); 130 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 131 for (auto Op : FPOpToExtend) 132 setOperationAction(Op, MVT::f32, Expand); 133 } 134 135 if (Subtarget.hasStdExtD()) { 136 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 137 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 138 for (auto CC : FPCCToExtend) 139 setCondCodeAction(CC, MVT::f64, Expand); 140 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 141 setOperationAction(ISD::SELECT, MVT::f64, Custom); 142 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 143 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 144 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 145 for (auto Op : FPOpToExtend) 146 setOperationAction(Op, MVT::f64, Expand); 147 } 148 149 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 150 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 151 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 152 153 if (Subtarget.hasStdExtA()) { 154 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 155 setMinCmpXchgSizeInBits(32); 156 } else { 157 setMaxAtomicSizeInBitsSupported(0); 158 } 159 160 setBooleanContents(ZeroOrOneBooleanContent); 161 162 // Function alignments (log2). 163 unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2; 164 setMinFunctionAlignment(FunctionAlignment); 165 setPrefFunctionAlignment(FunctionAlignment); 166 167 // Effectively disable jump table generation. 168 setMinimumJumpTableEntries(INT_MAX); 169 } 170 171 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 172 EVT VT) const { 173 if (!VT.isVector()) 174 return getPointerTy(DL); 175 return VT.changeVectorElementTypeToInteger(); 176 } 177 178 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 179 const CallInst &I, 180 MachineFunction &MF, 181 unsigned Intrinsic) const { 182 switch (Intrinsic) { 183 default: 184 return false; 185 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 186 case Intrinsic::riscv_masked_atomicrmw_add_i32: 187 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 188 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 189 case Intrinsic::riscv_masked_atomicrmw_max_i32: 190 case Intrinsic::riscv_masked_atomicrmw_min_i32: 191 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 192 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 193 case Intrinsic::riscv_masked_cmpxchg_i32: 194 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 195 Info.opc = ISD::INTRINSIC_W_CHAIN; 196 Info.memVT = MVT::getVT(PtrTy->getElementType()); 197 Info.ptrVal = I.getArgOperand(0); 198 Info.offset = 0; 199 Info.align = 4; 200 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 201 MachineMemOperand::MOVolatile; 202 return true; 203 } 204 } 205 206 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 207 const AddrMode &AM, Type *Ty, 208 unsigned AS, 209 Instruction *I) const { 210 // No global is ever allowed as a base. 211 if (AM.BaseGV) 212 return false; 213 214 // Require a 12-bit signed offset. 215 if (!isInt<12>(AM.BaseOffs)) 216 return false; 217 218 switch (AM.Scale) { 219 case 0: // "r+i" or just "i", depending on HasBaseReg. 220 break; 221 case 1: 222 if (!AM.HasBaseReg) // allow "r+i". 223 break; 224 return false; // disallow "r+r" or "r+r+i". 225 default: 226 return false; 227 } 228 229 return true; 230 } 231 232 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 233 return isInt<12>(Imm); 234 } 235 236 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 237 return isInt<12>(Imm); 238 } 239 240 // On RV32, 64-bit integers are split into their high and low parts and held 241 // in two different registers, so the trunc is free since the low register can 242 // just be used. 243 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 244 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 245 return false; 246 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 247 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 248 return (SrcBits == 64 && DestBits == 32); 249 } 250 251 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 252 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 253 !SrcVT.isInteger() || !DstVT.isInteger()) 254 return false; 255 unsigned SrcBits = SrcVT.getSizeInBits(); 256 unsigned DestBits = DstVT.getSizeInBits(); 257 return (SrcBits == 64 && DestBits == 32); 258 } 259 260 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 261 // Zexts are free if they can be combined with a load. 262 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 263 EVT MemVT = LD->getMemoryVT(); 264 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 265 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 266 (LD->getExtensionType() == ISD::NON_EXTLOAD || 267 LD->getExtensionType() == ISD::ZEXTLOAD)) 268 return true; 269 } 270 271 return TargetLowering::isZExtFree(Val, VT2); 272 } 273 274 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 275 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 276 } 277 278 // Changes the condition code and swaps operands if necessary, so the SetCC 279 // operation matches one of the comparisons supported directly in the RISC-V 280 // ISA. 281 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 282 switch (CC) { 283 default: 284 break; 285 case ISD::SETGT: 286 case ISD::SETLE: 287 case ISD::SETUGT: 288 case ISD::SETULE: 289 CC = ISD::getSetCCSwappedOperands(CC); 290 std::swap(LHS, RHS); 291 break; 292 } 293 } 294 295 // Return the RISC-V branch opcode that matches the given DAG integer 296 // condition code. The CondCode must be one of those supported by the RISC-V 297 // ISA (see normaliseSetCC). 298 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 299 switch (CC) { 300 default: 301 llvm_unreachable("Unsupported CondCode"); 302 case ISD::SETEQ: 303 return RISCV::BEQ; 304 case ISD::SETNE: 305 return RISCV::BNE; 306 case ISD::SETLT: 307 return RISCV::BLT; 308 case ISD::SETGE: 309 return RISCV::BGE; 310 case ISD::SETULT: 311 return RISCV::BLTU; 312 case ISD::SETUGE: 313 return RISCV::BGEU; 314 } 315 } 316 317 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 318 SelectionDAG &DAG) const { 319 switch (Op.getOpcode()) { 320 default: 321 report_fatal_error("unimplemented operand"); 322 case ISD::GlobalAddress: 323 return lowerGlobalAddress(Op, DAG); 324 case ISD::BlockAddress: 325 return lowerBlockAddress(Op, DAG); 326 case ISD::ConstantPool: 327 return lowerConstantPool(Op, DAG); 328 case ISD::SELECT: 329 return lowerSELECT(Op, DAG); 330 case ISD::VASTART: 331 return lowerVASTART(Op, DAG); 332 case ISD::FRAMEADDR: 333 return lowerFRAMEADDR(Op, DAG); 334 case ISD::RETURNADDR: 335 return lowerRETURNADDR(Op, DAG); 336 } 337 } 338 339 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 340 SelectionDAG &DAG) const { 341 SDLoc DL(Op); 342 EVT Ty = Op.getValueType(); 343 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 344 const GlobalValue *GV = N->getGlobal(); 345 int64_t Offset = N->getOffset(); 346 MVT XLenVT = Subtarget.getXLenVT(); 347 348 if (isPositionIndependent()) 349 report_fatal_error("Unable to lowerGlobalAddress"); 350 // In order to maximise the opportunity for common subexpression elimination, 351 // emit a separate ADD node for the global address offset instead of folding 352 // it in the global address node. Later peephole optimisations may choose to 353 // fold it back in when profitable. 354 SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); 355 SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); 356 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); 357 SDValue MNLo = 358 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); 359 if (Offset != 0) 360 return DAG.getNode(ISD::ADD, DL, Ty, MNLo, 361 DAG.getConstant(Offset, DL, XLenVT)); 362 return MNLo; 363 } 364 365 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 366 SelectionDAG &DAG) const { 367 SDLoc DL(Op); 368 EVT Ty = Op.getValueType(); 369 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 370 const BlockAddress *BA = N->getBlockAddress(); 371 int64_t Offset = N->getOffset(); 372 373 if (isPositionIndependent()) 374 report_fatal_error("Unable to lowerBlockAddress"); 375 376 SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI); 377 SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO); 378 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0); 379 SDValue MNLo = 380 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0); 381 return MNLo; 382 } 383 384 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 385 SelectionDAG &DAG) const { 386 SDLoc DL(Op); 387 EVT Ty = Op.getValueType(); 388 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 389 const Constant *CPA = N->getConstVal(); 390 int64_t Offset = N->getOffset(); 391 unsigned Alignment = N->getAlignment(); 392 393 if (!isPositionIndependent()) { 394 SDValue CPAHi = 395 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI); 396 SDValue CPALo = 397 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO); 398 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0); 399 SDValue MNLo = 400 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0); 401 return MNLo; 402 } else { 403 report_fatal_error("Unable to lowerConstantPool"); 404 } 405 } 406 407 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 408 SDValue CondV = Op.getOperand(0); 409 SDValue TrueV = Op.getOperand(1); 410 SDValue FalseV = Op.getOperand(2); 411 SDLoc DL(Op); 412 MVT XLenVT = Subtarget.getXLenVT(); 413 414 // If the result type is XLenVT and CondV is the output of a SETCC node 415 // which also operated on XLenVT inputs, then merge the SETCC node into the 416 // lowered RISCVISD::SELECT_CC to take advantage of the integer 417 // compare+branch instructions. i.e.: 418 // (select (setcc lhs, rhs, cc), truev, falsev) 419 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 420 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 421 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 422 SDValue LHS = CondV.getOperand(0); 423 SDValue RHS = CondV.getOperand(1); 424 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 425 ISD::CondCode CCVal = CC->get(); 426 427 normaliseSetCC(LHS, RHS, CCVal); 428 429 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 430 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 431 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 432 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 433 } 434 435 // Otherwise: 436 // (select condv, truev, falsev) 437 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 438 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 439 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 440 441 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 442 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 443 444 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 445 } 446 447 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 448 MachineFunction &MF = DAG.getMachineFunction(); 449 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 450 451 SDLoc DL(Op); 452 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 453 getPointerTy(MF.getDataLayout())); 454 455 // vastart just stores the address of the VarArgsFrameIndex slot into the 456 // memory location argument. 457 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 458 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 459 MachinePointerInfo(SV)); 460 } 461 462 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 463 SelectionDAG &DAG) const { 464 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 465 MachineFunction &MF = DAG.getMachineFunction(); 466 MachineFrameInfo &MFI = MF.getFrameInfo(); 467 MFI.setFrameAddressIsTaken(true); 468 unsigned FrameReg = RI.getFrameRegister(MF); 469 int XLenInBytes = Subtarget.getXLen() / 8; 470 471 EVT VT = Op.getValueType(); 472 SDLoc DL(Op); 473 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 474 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 475 while (Depth--) { 476 int Offset = -(XLenInBytes * 2); 477 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 478 DAG.getIntPtrConstant(Offset, DL)); 479 FrameAddr = 480 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 481 } 482 return FrameAddr; 483 } 484 485 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 486 SelectionDAG &DAG) const { 487 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 488 MachineFunction &MF = DAG.getMachineFunction(); 489 MachineFrameInfo &MFI = MF.getFrameInfo(); 490 MFI.setReturnAddressIsTaken(true); 491 MVT XLenVT = Subtarget.getXLenVT(); 492 int XLenInBytes = Subtarget.getXLen() / 8; 493 494 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 495 return SDValue(); 496 497 EVT VT = Op.getValueType(); 498 SDLoc DL(Op); 499 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 500 if (Depth) { 501 int Off = -XLenInBytes; 502 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 503 SDValue Offset = DAG.getConstant(Off, DL, VT); 504 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 505 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 506 MachinePointerInfo()); 507 } 508 509 // Return the value of the return address register, marking it an implicit 510 // live-in. 511 unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 512 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 513 } 514 515 // Return true if the given node is a shift with a non-constant shift amount. 516 static bool isVariableShift(SDValue Val) { 517 switch (Val.getOpcode()) { 518 default: 519 return false; 520 case ISD::SHL: 521 case ISD::SRA: 522 case ISD::SRL: 523 return Val.getOperand(1).getOpcode() != ISD::Constant; 524 } 525 } 526 527 // Returns true if the given node is an sdiv, udiv, or urem with non-constant 528 // operands. 529 static bool isVariableSDivUDivURem(SDValue Val) { 530 switch (Val.getOpcode()) { 531 default: 532 return false; 533 case ISD::SDIV: 534 case ISD::UDIV: 535 case ISD::UREM: 536 return Val.getOperand(0).getOpcode() != ISD::Constant && 537 Val.getOperand(1).getOpcode() != ISD::Constant; 538 } 539 } 540 541 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 542 DAGCombinerInfo &DCI) const { 543 SelectionDAG &DAG = DCI.DAG; 544 545 switch (N->getOpcode()) { 546 default: 547 break; 548 case ISD::SHL: 549 case ISD::SRL: 550 case ISD::SRA: { 551 assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only"); 552 if (!DCI.isBeforeLegalize()) 553 break; 554 SDValue RHS = N->getOperand(1); 555 if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant || 556 (RHS->getOpcode() == ISD::AssertZext && 557 cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5)) 558 break; 559 SDValue LHS = N->getOperand(0); 560 SDLoc DL(N); 561 SDValue NewRHS = 562 DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS, 563 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5))); 564 return DCI.CombineTo( 565 N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS)); 566 } 567 case ISD::ANY_EXTEND: { 568 // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64, 569 // then instead sign-extend in order to increase the chance of being able 570 // to select the sllw/srlw/sraw/divw/divuw/remuw instructions. 571 SDValue Src = N->getOperand(0); 572 if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32) 573 break; 574 if (!isVariableShift(Src) && 575 !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src))) 576 break; 577 SDLoc DL(N); 578 // Don't add the new node to the DAGCombiner worklist, in order to avoid 579 // an infinite cycle due to SimplifyDemandedBits converting the 580 // SIGN_EXTEND back to ANY_EXTEND. 581 return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src), 582 false); 583 } 584 case RISCVISD::SplitF64: { 585 // If the input to SplitF64 is just BuildPairF64 then the operation is 586 // redundant. Instead, use BuildPairF64's operands directly. 587 SDValue Op0 = N->getOperand(0); 588 if (Op0->getOpcode() != RISCVISD::BuildPairF64) 589 break; 590 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 591 } 592 } 593 594 return SDValue(); 595 } 596 597 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 598 MachineBasicBlock *BB) { 599 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 600 601 MachineFunction &MF = *BB->getParent(); 602 DebugLoc DL = MI.getDebugLoc(); 603 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 604 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 605 unsigned LoReg = MI.getOperand(0).getReg(); 606 unsigned HiReg = MI.getOperand(1).getReg(); 607 unsigned SrcReg = MI.getOperand(2).getReg(); 608 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 609 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 610 611 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 612 RI); 613 MachineMemOperand *MMO = 614 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 615 MachineMemOperand::MOLoad, 8, 8); 616 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 617 .addFrameIndex(FI) 618 .addImm(0) 619 .addMemOperand(MMO); 620 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 621 .addFrameIndex(FI) 622 .addImm(4) 623 .addMemOperand(MMO); 624 MI.eraseFromParent(); // The pseudo instruction is gone now. 625 return BB; 626 } 627 628 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 629 MachineBasicBlock *BB) { 630 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 631 "Unexpected instruction"); 632 633 MachineFunction &MF = *BB->getParent(); 634 DebugLoc DL = MI.getDebugLoc(); 635 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 636 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 637 unsigned DstReg = MI.getOperand(0).getReg(); 638 unsigned LoReg = MI.getOperand(1).getReg(); 639 unsigned HiReg = MI.getOperand(2).getReg(); 640 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 641 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 642 643 MachineMemOperand *MMO = 644 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 645 MachineMemOperand::MOStore, 8, 8); 646 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 647 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 648 .addFrameIndex(FI) 649 .addImm(0) 650 .addMemOperand(MMO); 651 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 652 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 653 .addFrameIndex(FI) 654 .addImm(4) 655 .addMemOperand(MMO); 656 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 657 MI.eraseFromParent(); // The pseudo instruction is gone now. 658 return BB; 659 } 660 661 MachineBasicBlock * 662 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 663 MachineBasicBlock *BB) const { 664 switch (MI.getOpcode()) { 665 default: 666 llvm_unreachable("Unexpected instr type to insert"); 667 case RISCV::Select_GPR_Using_CC_GPR: 668 case RISCV::Select_FPR32_Using_CC_GPR: 669 case RISCV::Select_FPR64_Using_CC_GPR: 670 break; 671 case RISCV::BuildPairF64Pseudo: 672 return emitBuildPairF64Pseudo(MI, BB); 673 case RISCV::SplitF64Pseudo: 674 return emitSplitF64Pseudo(MI, BB); 675 } 676 677 // To "insert" a SELECT instruction, we actually have to insert the triangle 678 // control-flow pattern. The incoming instruction knows the destination vreg 679 // to set, the condition code register to branch on, the true/false values to 680 // select between, and the condcode to use to select the appropriate branch. 681 // 682 // We produce the following control flow: 683 // HeadMBB 684 // | \ 685 // | IfFalseMBB 686 // | / 687 // TailMBB 688 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 689 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 690 DebugLoc DL = MI.getDebugLoc(); 691 MachineFunction::iterator I = ++BB->getIterator(); 692 693 MachineBasicBlock *HeadMBB = BB; 694 MachineFunction *F = BB->getParent(); 695 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 696 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 697 698 F->insert(I, IfFalseMBB); 699 F->insert(I, TailMBB); 700 // Move all remaining instructions to TailMBB. 701 TailMBB->splice(TailMBB->begin(), HeadMBB, 702 std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end()); 703 // Update machine-CFG edges by transferring all successors of the current 704 // block to the new block which will contain the Phi node for the select. 705 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 706 // Set the successors for HeadMBB. 707 HeadMBB->addSuccessor(IfFalseMBB); 708 HeadMBB->addSuccessor(TailMBB); 709 710 // Insert appropriate branch. 711 unsigned LHS = MI.getOperand(1).getReg(); 712 unsigned RHS = MI.getOperand(2).getReg(); 713 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 714 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 715 716 BuildMI(HeadMBB, DL, TII.get(Opcode)) 717 .addReg(LHS) 718 .addReg(RHS) 719 .addMBB(TailMBB); 720 721 // IfFalseMBB just falls through to TailMBB. 722 IfFalseMBB->addSuccessor(TailMBB); 723 724 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 725 BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), 726 MI.getOperand(0).getReg()) 727 .addReg(MI.getOperand(4).getReg()) 728 .addMBB(HeadMBB) 729 .addReg(MI.getOperand(5).getReg()) 730 .addMBB(IfFalseMBB); 731 732 MI.eraseFromParent(); // The pseudo instruction is gone now. 733 return TailMBB; 734 } 735 736 // Calling Convention Implementation. 737 // The expectations for frontend ABI lowering vary from target to target. 738 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 739 // details, but this is a longer term goal. For now, we simply try to keep the 740 // role of the frontend as simple and well-defined as possible. The rules can 741 // be summarised as: 742 // * Never split up large scalar arguments. We handle them here. 743 // * If a hardfloat calling convention is being used, and the struct may be 744 // passed in a pair of registers (fp+fp, int+fp), and both registers are 745 // available, then pass as two separate arguments. If either the GPRs or FPRs 746 // are exhausted, then pass according to the rule below. 747 // * If a struct could never be passed in registers or directly in a stack 748 // slot (as it is larger than 2*XLEN and the floating point rules don't 749 // apply), then pass it using a pointer with the byval attribute. 750 // * If a struct is less than 2*XLEN, then coerce to either a two-element 751 // word-sized array or a 2*XLEN scalar (depending on alignment). 752 // * The frontend can determine whether a struct is returned by reference or 753 // not based on its size and fields. If it will be returned by reference, the 754 // frontend must modify the prototype so a pointer with the sret annotation is 755 // passed as the first argument. This is not necessary for large scalar 756 // returns. 757 // * Struct return values and varargs should be coerced to structs containing 758 // register-size fields in the same situations they would be for fixed 759 // arguments. 760 761 static const MCPhysReg ArgGPRs[] = { 762 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 763 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 764 }; 765 766 // Pass a 2*XLEN argument that has been split into two XLEN values through 767 // registers or the stack as necessary. 768 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 769 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 770 MVT ValVT2, MVT LocVT2, 771 ISD::ArgFlagsTy ArgFlags2) { 772 unsigned XLenInBytes = XLen / 8; 773 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 774 // At least one half can be passed via register. 775 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 776 VA1.getLocVT(), CCValAssign::Full)); 777 } else { 778 // Both halves must be passed on the stack, with proper alignment. 779 unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign()); 780 State.addLoc( 781 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 782 State.AllocateStack(XLenInBytes, StackAlign), 783 VA1.getLocVT(), CCValAssign::Full)); 784 State.addLoc(CCValAssign::getMem( 785 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 786 CCValAssign::Full)); 787 return false; 788 } 789 790 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 791 // The second half can also be passed via register. 792 State.addLoc( 793 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 794 } else { 795 // The second half is passed via the stack, without additional alignment. 796 State.addLoc(CCValAssign::getMem( 797 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 798 CCValAssign::Full)); 799 } 800 801 return false; 802 } 803 804 // Implements the RISC-V calling convention. Returns true upon failure. 805 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, 806 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 807 CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { 808 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 809 assert(XLen == 32 || XLen == 64); 810 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 811 if (ValVT == MVT::f32) { 812 LocVT = MVT::i32; 813 LocInfo = CCValAssign::BCvt; 814 } 815 816 // Any return value split in to more than two values can't be returned 817 // directly. 818 if (IsRet && ValNo > 1) 819 return true; 820 821 // If this is a variadic argument, the RISC-V calling convention requires 822 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 823 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 824 // be used regardless of whether the original argument was split during 825 // legalisation or not. The argument will not be passed by registers if the 826 // original type is larger than 2*XLEN, so the register alignment rule does 827 // not apply. 828 unsigned TwoXLenInBytes = (2 * XLen) / 8; 829 if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && 830 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 831 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 832 // Skip 'odd' register if necessary. 833 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 834 State.AllocateReg(ArgGPRs); 835 } 836 837 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 838 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 839 State.getPendingArgFlags(); 840 841 assert(PendingLocs.size() == PendingArgFlags.size() && 842 "PendingLocs and PendingArgFlags out of sync"); 843 844 // Handle passing f64 on RV32D with a soft float ABI. 845 if (XLen == 32 && ValVT == MVT::f64) { 846 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 847 "Can't lower f64 if it is split"); 848 // Depending on available argument GPRS, f64 may be passed in a pair of 849 // GPRs, split between a GPR and the stack, or passed completely on the 850 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 851 // cases. 852 unsigned Reg = State.AllocateReg(ArgGPRs); 853 LocVT = MVT::i32; 854 if (!Reg) { 855 unsigned StackOffset = State.AllocateStack(8, 8); 856 State.addLoc( 857 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 858 return false; 859 } 860 if (!State.AllocateReg(ArgGPRs)) 861 State.AllocateStack(4, 4); 862 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 863 return false; 864 } 865 866 // Split arguments might be passed indirectly, so keep track of the pending 867 // values. 868 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 869 LocVT = XLenVT; 870 LocInfo = CCValAssign::Indirect; 871 PendingLocs.push_back( 872 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 873 PendingArgFlags.push_back(ArgFlags); 874 if (!ArgFlags.isSplitEnd()) { 875 return false; 876 } 877 } 878 879 // If the split argument only had two elements, it should be passed directly 880 // in registers or on the stack. 881 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 882 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 883 // Apply the normal calling convention rules to the first half of the 884 // split argument. 885 CCValAssign VA = PendingLocs[0]; 886 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 887 PendingLocs.clear(); 888 PendingArgFlags.clear(); 889 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 890 ArgFlags); 891 } 892 893 // Allocate to a register if possible, or else a stack slot. 894 unsigned Reg = State.AllocateReg(ArgGPRs); 895 unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); 896 897 // If we reach this point and PendingLocs is non-empty, we must be at the 898 // end of a split argument that must be passed indirectly. 899 if (!PendingLocs.empty()) { 900 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 901 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 902 903 for (auto &It : PendingLocs) { 904 if (Reg) 905 It.convertToReg(Reg); 906 else 907 It.convertToMem(StackOffset); 908 State.addLoc(It); 909 } 910 PendingLocs.clear(); 911 PendingArgFlags.clear(); 912 return false; 913 } 914 915 assert(LocVT == XLenVT && "Expected an XLenVT at this stage"); 916 917 if (Reg) { 918 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 919 return false; 920 } 921 922 if (ValVT == MVT::f32) { 923 LocVT = MVT::f32; 924 LocInfo = CCValAssign::Full; 925 } 926 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 927 return false; 928 } 929 930 void RISCVTargetLowering::analyzeInputArgs( 931 MachineFunction &MF, CCState &CCInfo, 932 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 933 unsigned NumArgs = Ins.size(); 934 FunctionType *FType = MF.getFunction().getFunctionType(); 935 936 for (unsigned i = 0; i != NumArgs; ++i) { 937 MVT ArgVT = Ins[i].VT; 938 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 939 940 Type *ArgTy = nullptr; 941 if (IsRet) 942 ArgTy = FType->getReturnType(); 943 else if (Ins[i].isOrigArg()) 944 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 945 946 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 947 ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { 948 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 949 << EVT(ArgVT).getEVTString() << '\n'); 950 llvm_unreachable(nullptr); 951 } 952 } 953 } 954 955 void RISCVTargetLowering::analyzeOutputArgs( 956 MachineFunction &MF, CCState &CCInfo, 957 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 958 CallLoweringInfo *CLI) const { 959 unsigned NumArgs = Outs.size(); 960 961 for (unsigned i = 0; i != NumArgs; i++) { 962 MVT ArgVT = Outs[i].VT; 963 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 964 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 965 966 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 967 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 968 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 969 << EVT(ArgVT).getEVTString() << "\n"); 970 llvm_unreachable(nullptr); 971 } 972 } 973 } 974 975 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 976 // values. 977 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 978 const CCValAssign &VA, const SDLoc &DL) { 979 switch (VA.getLocInfo()) { 980 default: 981 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 982 case CCValAssign::Full: 983 break; 984 case CCValAssign::BCvt: 985 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 986 break; 987 } 988 return Val; 989 } 990 991 // The caller is responsible for loading the full value if the argument is 992 // passed with CCValAssign::Indirect. 993 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 994 const CCValAssign &VA, const SDLoc &DL) { 995 MachineFunction &MF = DAG.getMachineFunction(); 996 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 997 EVT LocVT = VA.getLocVT(); 998 SDValue Val; 999 1000 unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1001 RegInfo.addLiveIn(VA.getLocReg(), VReg); 1002 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 1003 1004 if (VA.getLocInfo() == CCValAssign::Indirect) 1005 return Val; 1006 1007 return convertLocVTToValVT(DAG, Val, VA, DL); 1008 } 1009 1010 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 1011 const CCValAssign &VA, const SDLoc &DL) { 1012 EVT LocVT = VA.getLocVT(); 1013 1014 switch (VA.getLocInfo()) { 1015 default: 1016 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1017 case CCValAssign::Full: 1018 break; 1019 case CCValAssign::BCvt: 1020 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 1021 break; 1022 } 1023 return Val; 1024 } 1025 1026 // The caller is responsible for loading the full value if the argument is 1027 // passed with CCValAssign::Indirect. 1028 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 1029 const CCValAssign &VA, const SDLoc &DL) { 1030 MachineFunction &MF = DAG.getMachineFunction(); 1031 MachineFrameInfo &MFI = MF.getFrameInfo(); 1032 EVT LocVT = VA.getLocVT(); 1033 EVT ValVT = VA.getValVT(); 1034 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 1035 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 1036 VA.getLocMemOffset(), /*Immutable=*/true); 1037 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1038 SDValue Val; 1039 1040 ISD::LoadExtType ExtType; 1041 switch (VA.getLocInfo()) { 1042 default: 1043 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1044 case CCValAssign::Full: 1045 case CCValAssign::Indirect: 1046 ExtType = ISD::NON_EXTLOAD; 1047 break; 1048 } 1049 Val = DAG.getExtLoad( 1050 ExtType, DL, LocVT, Chain, FIN, 1051 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 1052 return Val; 1053 } 1054 1055 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 1056 const CCValAssign &VA, const SDLoc &DL) { 1057 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 1058 "Unexpected VA"); 1059 MachineFunction &MF = DAG.getMachineFunction(); 1060 MachineFrameInfo &MFI = MF.getFrameInfo(); 1061 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1062 1063 if (VA.isMemLoc()) { 1064 // f64 is passed on the stack. 1065 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 1066 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1067 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 1068 MachinePointerInfo::getFixedStack(MF, FI)); 1069 } 1070 1071 assert(VA.isRegLoc() && "Expected register VA assignment"); 1072 1073 unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1074 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 1075 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 1076 SDValue Hi; 1077 if (VA.getLocReg() == RISCV::X17) { 1078 // Second half of f64 is passed on the stack. 1079 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 1080 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1081 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 1082 MachinePointerInfo::getFixedStack(MF, FI)); 1083 } else { 1084 // Second half of f64 is passed in another GPR. 1085 unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1086 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 1087 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 1088 } 1089 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1090 } 1091 1092 // Transform physical registers into virtual registers. 1093 SDValue RISCVTargetLowering::LowerFormalArguments( 1094 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1095 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1096 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1097 1098 switch (CallConv) { 1099 default: 1100 report_fatal_error("Unsupported calling convention"); 1101 case CallingConv::C: 1102 case CallingConv::Fast: 1103 break; 1104 } 1105 1106 MachineFunction &MF = DAG.getMachineFunction(); 1107 1108 const Function &Func = MF.getFunction(); 1109 if (Func.hasFnAttribute("interrupt")) { 1110 if (!Func.arg_empty()) 1111 report_fatal_error( 1112 "Functions with the interrupt attribute cannot have arguments!"); 1113 1114 StringRef Kind = 1115 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1116 1117 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 1118 report_fatal_error( 1119 "Function interrupt attribute argument not supported!"); 1120 } 1121 1122 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1123 MVT XLenVT = Subtarget.getXLenVT(); 1124 unsigned XLenInBytes = Subtarget.getXLen() / 8; 1125 // Used with vargs to acumulate store chains. 1126 std::vector<SDValue> OutChains; 1127 1128 // Assign locations to all of the incoming arguments. 1129 SmallVector<CCValAssign, 16> ArgLocs; 1130 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1131 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 1132 1133 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1134 CCValAssign &VA = ArgLocs[i]; 1135 SDValue ArgValue; 1136 // Passing f64 on RV32D with a soft float ABI must be handled as a special 1137 // case. 1138 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 1139 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 1140 else if (VA.isRegLoc()) 1141 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); 1142 else 1143 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 1144 1145 if (VA.getLocInfo() == CCValAssign::Indirect) { 1146 // If the original argument was split and passed by reference (e.g. i128 1147 // on RV32), we need to load all parts of it here (using the same 1148 // address). 1149 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 1150 MachinePointerInfo())); 1151 unsigned ArgIndex = Ins[i].OrigArgIndex; 1152 assert(Ins[i].PartOffset == 0); 1153 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 1154 CCValAssign &PartVA = ArgLocs[i + 1]; 1155 unsigned PartOffset = Ins[i + 1].PartOffset; 1156 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 1157 DAG.getIntPtrConstant(PartOffset, DL)); 1158 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 1159 MachinePointerInfo())); 1160 ++i; 1161 } 1162 continue; 1163 } 1164 InVals.push_back(ArgValue); 1165 } 1166 1167 if (IsVarArg) { 1168 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 1169 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 1170 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 1171 MachineFrameInfo &MFI = MF.getFrameInfo(); 1172 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1173 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1174 1175 // Offset of the first variable argument from stack pointer, and size of 1176 // the vararg save area. For now, the varargs save area is either zero or 1177 // large enough to hold a0-a7. 1178 int VaArgOffset, VarArgsSaveSize; 1179 1180 // If all registers are allocated, then all varargs must be passed on the 1181 // stack and we don't need to save any argregs. 1182 if (ArgRegs.size() == Idx) { 1183 VaArgOffset = CCInfo.getNextStackOffset(); 1184 VarArgsSaveSize = 0; 1185 } else { 1186 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 1187 VaArgOffset = -VarArgsSaveSize; 1188 } 1189 1190 // Record the frame index of the first variable argument 1191 // which is a value necessary to VASTART. 1192 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1193 RVFI->setVarArgsFrameIndex(FI); 1194 1195 // If saving an odd number of registers then create an extra stack slot to 1196 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 1197 // offsets to even-numbered registered remain 2*XLEN-aligned. 1198 if (Idx % 2) { 1199 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, 1200 true); 1201 VarArgsSaveSize += XLenInBytes; 1202 } 1203 1204 // Copy the integer registers that may have been used for passing varargs 1205 // to the vararg save area. 1206 for (unsigned I = Idx; I < ArgRegs.size(); 1207 ++I, VaArgOffset += XLenInBytes) { 1208 const unsigned Reg = RegInfo.createVirtualRegister(RC); 1209 RegInfo.addLiveIn(ArgRegs[I], Reg); 1210 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 1211 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1212 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1213 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 1214 MachinePointerInfo::getFixedStack(MF, FI)); 1215 cast<StoreSDNode>(Store.getNode()) 1216 ->getMemOperand() 1217 ->setValue((Value *)nullptr); 1218 OutChains.push_back(Store); 1219 } 1220 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 1221 } 1222 1223 // All stores are grouped in one node to allow the matching between 1224 // the size of Ins and InVals. This only happens for vararg functions. 1225 if (!OutChains.empty()) { 1226 OutChains.push_back(Chain); 1227 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 1228 } 1229 1230 return Chain; 1231 } 1232 1233 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 1234 /// for tail call optimization. 1235 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 1236 bool RISCVTargetLowering::IsEligibleForTailCallOptimization( 1237 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 1238 const SmallVector<CCValAssign, 16> &ArgLocs) const { 1239 1240 auto &Callee = CLI.Callee; 1241 auto CalleeCC = CLI.CallConv; 1242 auto IsVarArg = CLI.IsVarArg; 1243 auto &Outs = CLI.Outs; 1244 auto &Caller = MF.getFunction(); 1245 auto CallerCC = Caller.getCallingConv(); 1246 1247 // Do not tail call opt functions with "disable-tail-calls" attribute. 1248 if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") 1249 return false; 1250 1251 // Exception-handling functions need a special set of instructions to 1252 // indicate a return to the hardware. Tail-calling another function would 1253 // probably break this. 1254 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 1255 // should be expanded as new function attributes are introduced. 1256 if (Caller.hasFnAttribute("interrupt")) 1257 return false; 1258 1259 // Do not tail call opt functions with varargs. 1260 if (IsVarArg) 1261 return false; 1262 1263 // Do not tail call opt if the stack is used to pass parameters. 1264 if (CCInfo.getNextStackOffset() != 0) 1265 return false; 1266 1267 // Do not tail call opt if any parameters need to be passed indirectly. 1268 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 1269 // passed indirectly. So the address of the value will be passed in a 1270 // register, or if not available, then the address is put on the stack. In 1271 // order to pass indirectly, space on the stack often needs to be allocated 1272 // in order to store the value. In this case the CCInfo.getNextStackOffset() 1273 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 1274 // are passed CCValAssign::Indirect. 1275 for (auto &VA : ArgLocs) 1276 if (VA.getLocInfo() == CCValAssign::Indirect) 1277 return false; 1278 1279 // Do not tail call opt if either caller or callee uses struct return 1280 // semantics. 1281 auto IsCallerStructRet = Caller.hasStructRetAttr(); 1282 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 1283 if (IsCallerStructRet || IsCalleeStructRet) 1284 return false; 1285 1286 // Externally-defined functions with weak linkage should not be 1287 // tail-called. The behaviour of branch instructions in this situation (as 1288 // used for tail calls) is implementation-defined, so we cannot rely on the 1289 // linker replacing the tail call with a return. 1290 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1291 const GlobalValue *GV = G->getGlobal(); 1292 if (GV->hasExternalWeakLinkage()) 1293 return false; 1294 } 1295 1296 // The callee has to preserve all registers the caller needs to preserve. 1297 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1298 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 1299 if (CalleeCC != CallerCC) { 1300 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 1301 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 1302 return false; 1303 } 1304 1305 // Byval parameters hand the function a pointer directly into the stack area 1306 // we want to reuse during a tail call. Working around this *is* possible 1307 // but less efficient and uglier in LowerCall. 1308 for (auto &Arg : Outs) 1309 if (Arg.Flags.isByVal()) 1310 return false; 1311 1312 return true; 1313 } 1314 1315 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 1316 // and output parameter nodes. 1317 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 1318 SmallVectorImpl<SDValue> &InVals) const { 1319 SelectionDAG &DAG = CLI.DAG; 1320 SDLoc &DL = CLI.DL; 1321 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1322 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1323 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1324 SDValue Chain = CLI.Chain; 1325 SDValue Callee = CLI.Callee; 1326 bool &IsTailCall = CLI.IsTailCall; 1327 CallingConv::ID CallConv = CLI.CallConv; 1328 bool IsVarArg = CLI.IsVarArg; 1329 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1330 MVT XLenVT = Subtarget.getXLenVT(); 1331 1332 MachineFunction &MF = DAG.getMachineFunction(); 1333 1334 // Analyze the operands of the call, assigning locations to each operand. 1335 SmallVector<CCValAssign, 16> ArgLocs; 1336 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1337 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 1338 1339 // Check if it's really possible to do a tail call. 1340 if (IsTailCall) 1341 IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, 1342 ArgLocs); 1343 1344 if (IsTailCall) 1345 ++NumTailCalls; 1346 else if (CLI.CS && CLI.CS.isMustTailCall()) 1347 report_fatal_error("failed to perform tail call elimination on a call " 1348 "site marked musttail"); 1349 1350 // Get a count of how many bytes are to be pushed on the stack. 1351 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 1352 1353 // Create local copies for byval args 1354 SmallVector<SDValue, 8> ByValArgs; 1355 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1356 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1357 if (!Flags.isByVal()) 1358 continue; 1359 1360 SDValue Arg = OutVals[i]; 1361 unsigned Size = Flags.getByValSize(); 1362 unsigned Align = Flags.getByValAlign(); 1363 1364 int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false); 1365 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1366 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 1367 1368 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, 1369 /*IsVolatile=*/false, 1370 /*AlwaysInline=*/false, 1371 IsTailCall, MachinePointerInfo(), 1372 MachinePointerInfo()); 1373 ByValArgs.push_back(FIPtr); 1374 } 1375 1376 if (!IsTailCall) 1377 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 1378 1379 // Copy argument values to their designated locations. 1380 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1381 SmallVector<SDValue, 8> MemOpChains; 1382 SDValue StackPtr; 1383 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 1384 CCValAssign &VA = ArgLocs[i]; 1385 SDValue ArgValue = OutVals[i]; 1386 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1387 1388 // Handle passing f64 on RV32D with a soft float ABI as a special case. 1389 bool IsF64OnRV32DSoftABI = 1390 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 1391 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 1392 SDValue SplitF64 = DAG.getNode( 1393 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 1394 SDValue Lo = SplitF64.getValue(0); 1395 SDValue Hi = SplitF64.getValue(1); 1396 1397 unsigned RegLo = VA.getLocReg(); 1398 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 1399 1400 if (RegLo == RISCV::X17) { 1401 // Second half of f64 is passed on the stack. 1402 // Work out the address of the stack slot. 1403 if (!StackPtr.getNode()) 1404 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1405 // Emit the store. 1406 MemOpChains.push_back( 1407 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 1408 } else { 1409 // Second half of f64 is passed in another GPR. 1410 unsigned RegHigh = RegLo + 1; 1411 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 1412 } 1413 continue; 1414 } 1415 1416 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 1417 // as any other MemLoc. 1418 1419 // Promote the value if needed. 1420 // For now, only handle fully promoted and indirect arguments. 1421 if (VA.getLocInfo() == CCValAssign::Indirect) { 1422 // Store the argument in a stack slot and pass its address. 1423 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 1424 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1425 MemOpChains.push_back( 1426 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 1427 MachinePointerInfo::getFixedStack(MF, FI))); 1428 // If the original argument was split (e.g. i128), we need 1429 // to store all parts of it here (and pass just one address). 1430 unsigned ArgIndex = Outs[i].OrigArgIndex; 1431 assert(Outs[i].PartOffset == 0); 1432 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 1433 SDValue PartValue = OutVals[i + 1]; 1434 unsigned PartOffset = Outs[i + 1].PartOffset; 1435 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 1436 DAG.getIntPtrConstant(PartOffset, DL)); 1437 MemOpChains.push_back( 1438 DAG.getStore(Chain, DL, PartValue, Address, 1439 MachinePointerInfo::getFixedStack(MF, FI))); 1440 ++i; 1441 } 1442 ArgValue = SpillSlot; 1443 } else { 1444 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 1445 } 1446 1447 // Use local copy if it is a byval arg. 1448 if (Flags.isByVal()) 1449 ArgValue = ByValArgs[j++]; 1450 1451 if (VA.isRegLoc()) { 1452 // Queue up the argument copies and emit them at the end. 1453 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 1454 } else { 1455 assert(VA.isMemLoc() && "Argument not register or memory"); 1456 assert(!IsTailCall && "Tail call not allowed if stack is used " 1457 "for passing parameters"); 1458 1459 // Work out the address of the stack slot. 1460 if (!StackPtr.getNode()) 1461 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1462 SDValue Address = 1463 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 1464 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 1465 1466 // Emit the store. 1467 MemOpChains.push_back( 1468 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 1469 } 1470 } 1471 1472 // Join the stores, which are independent of one another. 1473 if (!MemOpChains.empty()) 1474 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 1475 1476 SDValue Glue; 1477 1478 // Build a sequence of copy-to-reg nodes, chained and glued together. 1479 for (auto &Reg : RegsToPass) { 1480 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 1481 Glue = Chain.getValue(1); 1482 } 1483 1484 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 1485 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 1486 // split it and then direct call can be matched by PseudoCALL. 1487 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 1488 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0); 1489 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1490 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0); 1491 } 1492 1493 // The first call operand is the chain and the second is the target address. 1494 SmallVector<SDValue, 8> Ops; 1495 Ops.push_back(Chain); 1496 Ops.push_back(Callee); 1497 1498 // Add argument registers to the end of the list so that they are 1499 // known live into the call. 1500 for (auto &Reg : RegsToPass) 1501 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 1502 1503 if (!IsTailCall) { 1504 // Add a register mask operand representing the call-preserved registers. 1505 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1506 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 1507 assert(Mask && "Missing call preserved mask for calling convention"); 1508 Ops.push_back(DAG.getRegisterMask(Mask)); 1509 } 1510 1511 // Glue the call to the argument copies, if any. 1512 if (Glue.getNode()) 1513 Ops.push_back(Glue); 1514 1515 // Emit the call. 1516 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1517 1518 if (IsTailCall) { 1519 MF.getFrameInfo().setHasTailCall(); 1520 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 1521 } 1522 1523 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 1524 Glue = Chain.getValue(1); 1525 1526 // Mark the end of the call, which is glued to the call itself. 1527 Chain = DAG.getCALLSEQ_END(Chain, 1528 DAG.getConstant(NumBytes, DL, PtrVT, true), 1529 DAG.getConstant(0, DL, PtrVT, true), 1530 Glue, DL); 1531 Glue = Chain.getValue(1); 1532 1533 // Assign locations to each value returned by this call. 1534 SmallVector<CCValAssign, 16> RVLocs; 1535 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 1536 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 1537 1538 // Copy all of the result registers out of their specified physreg. 1539 for (auto &VA : RVLocs) { 1540 // Copy the value out 1541 SDValue RetValue = 1542 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 1543 // Glue the RetValue to the end of the call sequence 1544 Chain = RetValue.getValue(1); 1545 Glue = RetValue.getValue(2); 1546 1547 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1548 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 1549 SDValue RetValue2 = 1550 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 1551 Chain = RetValue2.getValue(1); 1552 Glue = RetValue2.getValue(2); 1553 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 1554 RetValue2); 1555 } 1556 1557 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 1558 1559 InVals.push_back(RetValue); 1560 } 1561 1562 return Chain; 1563 } 1564 1565 bool RISCVTargetLowering::CanLowerReturn( 1566 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 1567 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 1568 SmallVector<CCValAssign, 16> RVLocs; 1569 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 1570 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1571 MVT VT = Outs[i].VT; 1572 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1573 if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, 1574 CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) 1575 return false; 1576 } 1577 return true; 1578 } 1579 1580 SDValue 1581 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1582 bool IsVarArg, 1583 const SmallVectorImpl<ISD::OutputArg> &Outs, 1584 const SmallVectorImpl<SDValue> &OutVals, 1585 const SDLoc &DL, SelectionDAG &DAG) const { 1586 // Stores the assignment of the return value to a location. 1587 SmallVector<CCValAssign, 16> RVLocs; 1588 1589 // Info about the registers and stack slot. 1590 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 1591 *DAG.getContext()); 1592 1593 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 1594 nullptr); 1595 1596 SDValue Glue; 1597 SmallVector<SDValue, 4> RetOps(1, Chain); 1598 1599 // Copy the result values into the output registers. 1600 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 1601 SDValue Val = OutVals[i]; 1602 CCValAssign &VA = RVLocs[i]; 1603 assert(VA.isRegLoc() && "Can only return in registers!"); 1604 1605 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1606 // Handle returning f64 on RV32D with a soft float ABI. 1607 assert(VA.isRegLoc() && "Expected return via registers"); 1608 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 1609 DAG.getVTList(MVT::i32, MVT::i32), Val); 1610 SDValue Lo = SplitF64.getValue(0); 1611 SDValue Hi = SplitF64.getValue(1); 1612 unsigned RegLo = VA.getLocReg(); 1613 unsigned RegHi = RegLo + 1; 1614 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 1615 Glue = Chain.getValue(1); 1616 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 1617 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 1618 Glue = Chain.getValue(1); 1619 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 1620 } else { 1621 // Handle a 'normal' return. 1622 Val = convertValVTToLocVT(DAG, Val, VA, DL); 1623 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 1624 1625 // Guarantee that all emitted copies are stuck together. 1626 Glue = Chain.getValue(1); 1627 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 1628 } 1629 } 1630 1631 RetOps[0] = Chain; // Update chain. 1632 1633 // Add the glue node if we have it. 1634 if (Glue.getNode()) { 1635 RetOps.push_back(Glue); 1636 } 1637 1638 // Interrupt service routines use different return instructions. 1639 const Function &Func = DAG.getMachineFunction().getFunction(); 1640 if (Func.hasFnAttribute("interrupt")) { 1641 if (!Func.getReturnType()->isVoidTy()) 1642 report_fatal_error( 1643 "Functions with the interrupt attribute must have void return type!"); 1644 1645 MachineFunction &MF = DAG.getMachineFunction(); 1646 StringRef Kind = 1647 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1648 1649 unsigned RetOpc; 1650 if (Kind == "user") 1651 RetOpc = RISCVISD::URET_FLAG; 1652 else if (Kind == "supervisor") 1653 RetOpc = RISCVISD::SRET_FLAG; 1654 else 1655 RetOpc = RISCVISD::MRET_FLAG; 1656 1657 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 1658 } 1659 1660 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 1661 } 1662 1663 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 1664 switch ((RISCVISD::NodeType)Opcode) { 1665 case RISCVISD::FIRST_NUMBER: 1666 break; 1667 case RISCVISD::RET_FLAG: 1668 return "RISCVISD::RET_FLAG"; 1669 case RISCVISD::URET_FLAG: 1670 return "RISCVISD::URET_FLAG"; 1671 case RISCVISD::SRET_FLAG: 1672 return "RISCVISD::SRET_FLAG"; 1673 case RISCVISD::MRET_FLAG: 1674 return "RISCVISD::MRET_FLAG"; 1675 case RISCVISD::CALL: 1676 return "RISCVISD::CALL"; 1677 case RISCVISD::SELECT_CC: 1678 return "RISCVISD::SELECT_CC"; 1679 case RISCVISD::BuildPairF64: 1680 return "RISCVISD::BuildPairF64"; 1681 case RISCVISD::SplitF64: 1682 return "RISCVISD::SplitF64"; 1683 case RISCVISD::TAIL: 1684 return "RISCVISD::TAIL"; 1685 } 1686 return nullptr; 1687 } 1688 1689 std::pair<unsigned, const TargetRegisterClass *> 1690 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1691 StringRef Constraint, 1692 MVT VT) const { 1693 // First, see if this is a constraint that directly corresponds to a 1694 // RISCV register class. 1695 if (Constraint.size() == 1) { 1696 switch (Constraint[0]) { 1697 case 'r': 1698 return std::make_pair(0U, &RISCV::GPRRegClass); 1699 default: 1700 break; 1701 } 1702 } 1703 1704 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 1705 } 1706 1707 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 1708 Instruction *Inst, 1709 AtomicOrdering Ord) const { 1710 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 1711 return Builder.CreateFence(Ord); 1712 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 1713 return Builder.CreateFence(AtomicOrdering::Release); 1714 return nullptr; 1715 } 1716 1717 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 1718 Instruction *Inst, 1719 AtomicOrdering Ord) const { 1720 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 1721 return Builder.CreateFence(AtomicOrdering::Acquire); 1722 return nullptr; 1723 } 1724 1725 TargetLowering::AtomicExpansionKind 1726 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 1727 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 1728 if (Size == 8 || Size == 16) 1729 return AtomicExpansionKind::MaskedIntrinsic; 1730 return AtomicExpansionKind::None; 1731 } 1732 1733 static Intrinsic::ID 1734 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 1735 if (XLen == 32) { 1736 switch (BinOp) { 1737 default: 1738 llvm_unreachable("Unexpected AtomicRMW BinOp"); 1739 case AtomicRMWInst::Xchg: 1740 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 1741 case AtomicRMWInst::Add: 1742 return Intrinsic::riscv_masked_atomicrmw_add_i32; 1743 case AtomicRMWInst::Sub: 1744 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 1745 case AtomicRMWInst::Nand: 1746 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 1747 case AtomicRMWInst::Max: 1748 return Intrinsic::riscv_masked_atomicrmw_max_i32; 1749 case AtomicRMWInst::Min: 1750 return Intrinsic::riscv_masked_atomicrmw_min_i32; 1751 case AtomicRMWInst::UMax: 1752 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 1753 case AtomicRMWInst::UMin: 1754 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 1755 } 1756 } 1757 1758 if (XLen == 64) { 1759 switch (BinOp) { 1760 default: 1761 llvm_unreachable("Unexpected AtomicRMW BinOp"); 1762 case AtomicRMWInst::Xchg: 1763 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 1764 case AtomicRMWInst::Add: 1765 return Intrinsic::riscv_masked_atomicrmw_add_i64; 1766 case AtomicRMWInst::Sub: 1767 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 1768 case AtomicRMWInst::Nand: 1769 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 1770 case AtomicRMWInst::Max: 1771 return Intrinsic::riscv_masked_atomicrmw_max_i64; 1772 case AtomicRMWInst::Min: 1773 return Intrinsic::riscv_masked_atomicrmw_min_i64; 1774 case AtomicRMWInst::UMax: 1775 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 1776 case AtomicRMWInst::UMin: 1777 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 1778 } 1779 } 1780 1781 llvm_unreachable("Unexpected XLen\n"); 1782 } 1783 1784 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 1785 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 1786 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 1787 unsigned XLen = Subtarget.getXLen(); 1788 Value *Ordering = 1789 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 1790 Type *Tys[] = {AlignedAddr->getType()}; 1791 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 1792 AI->getModule(), 1793 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 1794 1795 if (XLen == 64) { 1796 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 1797 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 1798 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 1799 } 1800 1801 Value *Result; 1802 1803 // Must pass the shift amount needed to sign extend the loaded value prior 1804 // to performing a signed comparison for min/max. ShiftAmt is the number of 1805 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 1806 // is the number of bits to left+right shift the value in order to 1807 // sign-extend. 1808 if (AI->getOperation() == AtomicRMWInst::Min || 1809 AI->getOperation() == AtomicRMWInst::Max) { 1810 const DataLayout &DL = AI->getModule()->getDataLayout(); 1811 unsigned ValWidth = 1812 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 1813 Value *SextShamt = 1814 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 1815 Result = Builder.CreateCall(LrwOpScwLoop, 1816 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 1817 } else { 1818 Result = 1819 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 1820 } 1821 1822 if (XLen == 64) 1823 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 1824 return Result; 1825 } 1826 1827 TargetLowering::AtomicExpansionKind 1828 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 1829 AtomicCmpXchgInst *CI) const { 1830 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 1831 if (Size == 8 || Size == 16) 1832 return AtomicExpansionKind::MaskedIntrinsic; 1833 return AtomicExpansionKind::None; 1834 } 1835 1836 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 1837 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 1838 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 1839 unsigned XLen = Subtarget.getXLen(); 1840 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 1841 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 1842 if (XLen == 64) { 1843 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 1844 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 1845 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 1846 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 1847 } 1848 Type *Tys[] = {AlignedAddr->getType()}; 1849 Function *MaskedCmpXchg = 1850 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 1851 Value *Result = Builder.CreateCall( 1852 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 1853 if (XLen == 64) 1854 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 1855 return Result; 1856 } 1857