1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVRegisterInfo.h" 18 #include "RISCVSubtarget.h" 19 #include "RISCVTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAGISel.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/CodeGen/ValueTypes.h" 29 #include "llvm/IR/DiagnosticInfo.h" 30 #include "llvm/IR/DiagnosticPrinter.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "riscv-lower" 38 39 STATISTIC(NumTailCalls, "Number of tail calls"); 40 41 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 42 const RISCVSubtarget &STI) 43 : TargetLowering(TM), Subtarget(STI) { 44 45 MVT XLenVT = Subtarget.getXLenVT(); 46 47 // Set up the register classes. 48 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 49 50 if (Subtarget.hasStdExtF()) 51 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 52 if (Subtarget.hasStdExtD()) 53 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 54 55 // Compute derived properties from the register classes. 56 computeRegisterProperties(STI.getRegisterInfo()); 57 58 setStackPointerRegisterToSaveRestore(RISCV::X2); 59 60 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 61 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 62 63 // TODO: add all necessary setOperationAction calls. 64 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 65 66 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 67 setOperationAction(ISD::BR_CC, XLenVT, Expand); 68 setOperationAction(ISD::SELECT, XLenVT, Custom); 69 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 70 71 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 72 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 73 74 setOperationAction(ISD::VASTART, MVT::Other, Custom); 75 setOperationAction(ISD::VAARG, MVT::Other, Expand); 76 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 77 setOperationAction(ISD::VAEND, MVT::Other, Expand); 78 79 for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) 80 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 81 82 if (Subtarget.is64Bit()) { 83 setOperationAction(ISD::SHL, MVT::i32, Custom); 84 setOperationAction(ISD::SRA, MVT::i32, Custom); 85 setOperationAction(ISD::SRL, MVT::i32, Custom); 86 } 87 88 if (!Subtarget.hasStdExtM()) { 89 setOperationAction(ISD::MUL, XLenVT, Expand); 90 setOperationAction(ISD::MULHS, XLenVT, Expand); 91 setOperationAction(ISD::MULHU, XLenVT, Expand); 92 setOperationAction(ISD::SDIV, XLenVT, Expand); 93 setOperationAction(ISD::UDIV, XLenVT, Expand); 94 setOperationAction(ISD::SREM, XLenVT, Expand); 95 setOperationAction(ISD::UREM, XLenVT, Expand); 96 } 97 98 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 99 setOperationAction(ISD::SDIV, MVT::i32, Custom); 100 setOperationAction(ISD::UDIV, MVT::i32, Custom); 101 setOperationAction(ISD::UREM, MVT::i32, Custom); 102 } 103 104 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 105 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 106 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 107 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 108 109 setOperationAction(ISD::SHL_PARTS, XLenVT, Expand); 110 setOperationAction(ISD::SRL_PARTS, XLenVT, Expand); 111 setOperationAction(ISD::SRA_PARTS, XLenVT, Expand); 112 113 setOperationAction(ISD::ROTL, XLenVT, Expand); 114 setOperationAction(ISD::ROTR, XLenVT, Expand); 115 setOperationAction(ISD::BSWAP, XLenVT, Expand); 116 setOperationAction(ISD::CTTZ, XLenVT, Expand); 117 setOperationAction(ISD::CTLZ, XLenVT, Expand); 118 setOperationAction(ISD::CTPOP, XLenVT, Expand); 119 120 ISD::CondCode FPCCToExtend[] = { 121 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ, 122 ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, 123 ISD::SETGT, ISD::SETGE, ISD::SETNE}; 124 125 ISD::NodeType FPOpToExtend[] = { 126 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM}; 127 128 if (Subtarget.hasStdExtF()) { 129 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 130 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 131 for (auto CC : FPCCToExtend) 132 setCondCodeAction(CC, MVT::f32, Expand); 133 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 134 setOperationAction(ISD::SELECT, MVT::f32, Custom); 135 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 136 for (auto Op : FPOpToExtend) 137 setOperationAction(Op, MVT::f32, Expand); 138 } 139 140 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 141 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 142 143 if (Subtarget.hasStdExtD()) { 144 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 145 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 146 for (auto CC : FPCCToExtend) 147 setCondCodeAction(CC, MVT::f64, Expand); 148 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 149 setOperationAction(ISD::SELECT, MVT::f64, Custom); 150 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 151 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 152 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 153 for (auto Op : FPOpToExtend) 154 setOperationAction(Op, MVT::f64, Expand); 155 } 156 157 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 158 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 159 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 160 161 if (Subtarget.hasStdExtA()) { 162 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 163 setMinCmpXchgSizeInBits(32); 164 } else { 165 setMaxAtomicSizeInBitsSupported(0); 166 } 167 168 setBooleanContents(ZeroOrOneBooleanContent); 169 170 // Function alignments (log2). 171 unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2; 172 setMinFunctionAlignment(FunctionAlignment); 173 setPrefFunctionAlignment(FunctionAlignment); 174 175 // Effectively disable jump table generation. 176 setMinimumJumpTableEntries(INT_MAX); 177 } 178 179 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 180 EVT VT) const { 181 if (!VT.isVector()) 182 return getPointerTy(DL); 183 return VT.changeVectorElementTypeToInteger(); 184 } 185 186 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 187 const CallInst &I, 188 MachineFunction &MF, 189 unsigned Intrinsic) const { 190 switch (Intrinsic) { 191 default: 192 return false; 193 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 194 case Intrinsic::riscv_masked_atomicrmw_add_i32: 195 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 196 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 197 case Intrinsic::riscv_masked_atomicrmw_max_i32: 198 case Intrinsic::riscv_masked_atomicrmw_min_i32: 199 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 200 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 201 case Intrinsic::riscv_masked_cmpxchg_i32: 202 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 203 Info.opc = ISD::INTRINSIC_W_CHAIN; 204 Info.memVT = MVT::getVT(PtrTy->getElementType()); 205 Info.ptrVal = I.getArgOperand(0); 206 Info.offset = 0; 207 Info.align = 4; 208 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 209 MachineMemOperand::MOVolatile; 210 return true; 211 } 212 } 213 214 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 215 const AddrMode &AM, Type *Ty, 216 unsigned AS, 217 Instruction *I) const { 218 // No global is ever allowed as a base. 219 if (AM.BaseGV) 220 return false; 221 222 // Require a 12-bit signed offset. 223 if (!isInt<12>(AM.BaseOffs)) 224 return false; 225 226 switch (AM.Scale) { 227 case 0: // "r+i" or just "i", depending on HasBaseReg. 228 break; 229 case 1: 230 if (!AM.HasBaseReg) // allow "r+i". 231 break; 232 return false; // disallow "r+r" or "r+r+i". 233 default: 234 return false; 235 } 236 237 return true; 238 } 239 240 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 241 return isInt<12>(Imm); 242 } 243 244 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 245 return isInt<12>(Imm); 246 } 247 248 // On RV32, 64-bit integers are split into their high and low parts and held 249 // in two different registers, so the trunc is free since the low register can 250 // just be used. 251 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 252 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 253 return false; 254 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 255 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 256 return (SrcBits == 64 && DestBits == 32); 257 } 258 259 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 260 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 261 !SrcVT.isInteger() || !DstVT.isInteger()) 262 return false; 263 unsigned SrcBits = SrcVT.getSizeInBits(); 264 unsigned DestBits = DstVT.getSizeInBits(); 265 return (SrcBits == 64 && DestBits == 32); 266 } 267 268 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 269 // Zexts are free if they can be combined with a load. 270 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 271 EVT MemVT = LD->getMemoryVT(); 272 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 273 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 274 (LD->getExtensionType() == ISD::NON_EXTLOAD || 275 LD->getExtensionType() == ISD::ZEXTLOAD)) 276 return true; 277 } 278 279 return TargetLowering::isZExtFree(Val, VT2); 280 } 281 282 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 283 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 284 } 285 286 // Changes the condition code and swaps operands if necessary, so the SetCC 287 // operation matches one of the comparisons supported directly in the RISC-V 288 // ISA. 289 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 290 switch (CC) { 291 default: 292 break; 293 case ISD::SETGT: 294 case ISD::SETLE: 295 case ISD::SETUGT: 296 case ISD::SETULE: 297 CC = ISD::getSetCCSwappedOperands(CC); 298 std::swap(LHS, RHS); 299 break; 300 } 301 } 302 303 // Return the RISC-V branch opcode that matches the given DAG integer 304 // condition code. The CondCode must be one of those supported by the RISC-V 305 // ISA (see normaliseSetCC). 306 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 307 switch (CC) { 308 default: 309 llvm_unreachable("Unsupported CondCode"); 310 case ISD::SETEQ: 311 return RISCV::BEQ; 312 case ISD::SETNE: 313 return RISCV::BNE; 314 case ISD::SETLT: 315 return RISCV::BLT; 316 case ISD::SETGE: 317 return RISCV::BGE; 318 case ISD::SETULT: 319 return RISCV::BLTU; 320 case ISD::SETUGE: 321 return RISCV::BGEU; 322 } 323 } 324 325 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 326 SelectionDAG &DAG) const { 327 switch (Op.getOpcode()) { 328 default: 329 report_fatal_error("unimplemented operand"); 330 case ISD::GlobalAddress: 331 return lowerGlobalAddress(Op, DAG); 332 case ISD::BlockAddress: 333 return lowerBlockAddress(Op, DAG); 334 case ISD::ConstantPool: 335 return lowerConstantPool(Op, DAG); 336 case ISD::SELECT: 337 return lowerSELECT(Op, DAG); 338 case ISD::VASTART: 339 return lowerVASTART(Op, DAG); 340 case ISD::FRAMEADDR: 341 return lowerFRAMEADDR(Op, DAG); 342 case ISD::RETURNADDR: 343 return lowerRETURNADDR(Op, DAG); 344 case ISD::BITCAST: { 345 assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() && 346 "Unexpected custom legalisation"); 347 SDLoc DL(Op); 348 SDValue Op0 = Op.getOperand(0); 349 if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32) 350 return SDValue(); 351 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 352 SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 353 return FPConv; 354 } 355 } 356 } 357 358 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 359 SelectionDAG &DAG) const { 360 SDLoc DL(Op); 361 EVT Ty = Op.getValueType(); 362 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 363 const GlobalValue *GV = N->getGlobal(); 364 int64_t Offset = N->getOffset(); 365 MVT XLenVT = Subtarget.getXLenVT(); 366 367 if (isPositionIndependent()) 368 report_fatal_error("Unable to lowerGlobalAddress"); 369 // In order to maximise the opportunity for common subexpression elimination, 370 // emit a separate ADD node for the global address offset instead of folding 371 // it in the global address node. Later peephole optimisations may choose to 372 // fold it back in when profitable. 373 SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); 374 SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); 375 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); 376 SDValue MNLo = 377 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); 378 if (Offset != 0) 379 return DAG.getNode(ISD::ADD, DL, Ty, MNLo, 380 DAG.getConstant(Offset, DL, XLenVT)); 381 return MNLo; 382 } 383 384 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 385 SelectionDAG &DAG) const { 386 SDLoc DL(Op); 387 EVT Ty = Op.getValueType(); 388 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 389 const BlockAddress *BA = N->getBlockAddress(); 390 int64_t Offset = N->getOffset(); 391 392 if (isPositionIndependent()) 393 report_fatal_error("Unable to lowerBlockAddress"); 394 395 SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI); 396 SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO); 397 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0); 398 SDValue MNLo = 399 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0); 400 return MNLo; 401 } 402 403 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 404 SelectionDAG &DAG) const { 405 SDLoc DL(Op); 406 EVT Ty = Op.getValueType(); 407 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 408 const Constant *CPA = N->getConstVal(); 409 int64_t Offset = N->getOffset(); 410 unsigned Alignment = N->getAlignment(); 411 412 if (!isPositionIndependent()) { 413 SDValue CPAHi = 414 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI); 415 SDValue CPALo = 416 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO); 417 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0); 418 SDValue MNLo = 419 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0); 420 return MNLo; 421 } else { 422 report_fatal_error("Unable to lowerConstantPool"); 423 } 424 } 425 426 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 427 SDValue CondV = Op.getOperand(0); 428 SDValue TrueV = Op.getOperand(1); 429 SDValue FalseV = Op.getOperand(2); 430 SDLoc DL(Op); 431 MVT XLenVT = Subtarget.getXLenVT(); 432 433 // If the result type is XLenVT and CondV is the output of a SETCC node 434 // which also operated on XLenVT inputs, then merge the SETCC node into the 435 // lowered RISCVISD::SELECT_CC to take advantage of the integer 436 // compare+branch instructions. i.e.: 437 // (select (setcc lhs, rhs, cc), truev, falsev) 438 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 439 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 440 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 441 SDValue LHS = CondV.getOperand(0); 442 SDValue RHS = CondV.getOperand(1); 443 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 444 ISD::CondCode CCVal = CC->get(); 445 446 normaliseSetCC(LHS, RHS, CCVal); 447 448 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 449 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 450 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 451 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 452 } 453 454 // Otherwise: 455 // (select condv, truev, falsev) 456 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 457 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 458 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 459 460 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 461 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 462 463 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 464 } 465 466 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 467 MachineFunction &MF = DAG.getMachineFunction(); 468 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 469 470 SDLoc DL(Op); 471 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 472 getPointerTy(MF.getDataLayout())); 473 474 // vastart just stores the address of the VarArgsFrameIndex slot into the 475 // memory location argument. 476 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 477 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 478 MachinePointerInfo(SV)); 479 } 480 481 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 482 SelectionDAG &DAG) const { 483 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 484 MachineFunction &MF = DAG.getMachineFunction(); 485 MachineFrameInfo &MFI = MF.getFrameInfo(); 486 MFI.setFrameAddressIsTaken(true); 487 unsigned FrameReg = RI.getFrameRegister(MF); 488 int XLenInBytes = Subtarget.getXLen() / 8; 489 490 EVT VT = Op.getValueType(); 491 SDLoc DL(Op); 492 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 493 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 494 while (Depth--) { 495 int Offset = -(XLenInBytes * 2); 496 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 497 DAG.getIntPtrConstant(Offset, DL)); 498 FrameAddr = 499 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 500 } 501 return FrameAddr; 502 } 503 504 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 505 SelectionDAG &DAG) const { 506 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 507 MachineFunction &MF = DAG.getMachineFunction(); 508 MachineFrameInfo &MFI = MF.getFrameInfo(); 509 MFI.setReturnAddressIsTaken(true); 510 MVT XLenVT = Subtarget.getXLenVT(); 511 int XLenInBytes = Subtarget.getXLen() / 8; 512 513 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 514 return SDValue(); 515 516 EVT VT = Op.getValueType(); 517 SDLoc DL(Op); 518 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 519 if (Depth) { 520 int Off = -XLenInBytes; 521 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 522 SDValue Offset = DAG.getConstant(Off, DL, VT); 523 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 524 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 525 MachinePointerInfo()); 526 } 527 528 // Return the value of the return address register, marking it an implicit 529 // live-in. 530 unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 531 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 532 } 533 534 // Returns the opcode of the target-specific SDNode that implements the 32-bit 535 // form of the given Opcode. 536 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 537 switch (Opcode) { 538 default: 539 llvm_unreachable("Unexpected opcode"); 540 case ISD::SHL: 541 return RISCVISD::SLLW; 542 case ISD::SRA: 543 return RISCVISD::SRAW; 544 case ISD::SRL: 545 return RISCVISD::SRLW; 546 case ISD::SDIV: 547 return RISCVISD::DIVW; 548 case ISD::UDIV: 549 return RISCVISD::DIVUW; 550 case ISD::UREM: 551 return RISCVISD::REMUW; 552 } 553 } 554 555 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 556 // Because i32 isn't a legal type for RV64, these operations would otherwise 557 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 558 // later one because the fact the operation was originally of type i32 is 559 // lost. 560 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { 561 SDLoc DL(N); 562 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 563 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 564 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 565 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 566 // ReplaceNodeResults requires we maintain the same type for the return value. 567 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 568 } 569 570 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 571 SmallVectorImpl<SDValue> &Results, 572 SelectionDAG &DAG) const { 573 SDLoc DL(N); 574 switch (N->getOpcode()) { 575 default: 576 llvm_unreachable("Don't know how to custom type legalize this operation!"); 577 case ISD::SHL: 578 case ISD::SRA: 579 case ISD::SRL: 580 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 581 "Unexpected custom legalisation"); 582 if (N->getOperand(1).getOpcode() == ISD::Constant) 583 return; 584 Results.push_back(customLegalizeToWOp(N, DAG)); 585 break; 586 case ISD::SDIV: 587 case ISD::UDIV: 588 case ISD::UREM: 589 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 590 Subtarget.hasStdExtM() && "Unexpected custom legalisation"); 591 if (N->getOperand(0).getOpcode() == ISD::Constant || 592 N->getOperand(1).getOpcode() == ISD::Constant) 593 return; 594 Results.push_back(customLegalizeToWOp(N, DAG)); 595 break; 596 case ISD::BITCAST: { 597 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 598 Subtarget.hasStdExtF() && "Unexpected custom legalisation"); 599 SDLoc DL(N); 600 SDValue Op0 = N->getOperand(0); 601 if (Op0.getValueType() != MVT::f32) 602 return; 603 SDValue FPConv = 604 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 605 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 606 break; 607 } 608 } 609 } 610 611 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 612 DAGCombinerInfo &DCI) const { 613 SelectionDAG &DAG = DCI.DAG; 614 615 switch (N->getOpcode()) { 616 default: 617 break; 618 case RISCVISD::SplitF64: { 619 SDValue Op0 = N->getOperand(0); 620 // If the input to SplitF64 is just BuildPairF64 then the operation is 621 // redundant. Instead, use BuildPairF64's operands directly. 622 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 623 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 624 625 SDLoc DL(N); 626 // This is a target-specific version of a DAGCombine performed in 627 // DAGCombiner::visitBITCAST. It performs the equivalent of: 628 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 629 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 630 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 631 !Op0.getNode()->hasOneUse()) 632 break; 633 SDValue NewSplitF64 = 634 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 635 Op0.getOperand(0)); 636 SDValue Lo = NewSplitF64.getValue(0); 637 SDValue Hi = NewSplitF64.getValue(1); 638 APInt SignBit = APInt::getSignMask(32); 639 if (Op0.getOpcode() == ISD::FNEG) { 640 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 641 DAG.getConstant(SignBit, DL, MVT::i32)); 642 return DCI.CombineTo(N, Lo, NewHi); 643 } 644 assert(Op0.getOpcode() == ISD::FABS); 645 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 646 DAG.getConstant(~SignBit, DL, MVT::i32)); 647 return DCI.CombineTo(N, Lo, NewHi); 648 } 649 case RISCVISD::SLLW: 650 case RISCVISD::SRAW: 651 case RISCVISD::SRLW: { 652 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 653 SDValue LHS = N->getOperand(0); 654 SDValue RHS = N->getOperand(1); 655 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 656 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 657 if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) || 658 (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI))) 659 return SDValue(); 660 break; 661 } 662 case RISCVISD::FMV_X_ANYEXTW_RV64: { 663 SDLoc DL(N); 664 SDValue Op0 = N->getOperand(0); 665 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 666 // conversion is unnecessary and can be replaced with an ANY_EXTEND 667 // of the FMV_W_X_RV64 operand. 668 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 669 SDValue AExtOp = 670 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0)); 671 return DCI.CombineTo(N, AExtOp); 672 } 673 674 // This is a target-specific version of a DAGCombine performed in 675 // DAGCombiner::visitBITCAST. It performs the equivalent of: 676 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 677 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 678 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 679 !Op0.getNode()->hasOneUse()) 680 break; 681 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 682 Op0.getOperand(0)); 683 APInt SignBit = APInt::getSignMask(32).sext(64); 684 if (Op0.getOpcode() == ISD::FNEG) { 685 return DCI.CombineTo(N, 686 DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 687 DAG.getConstant(SignBit, DL, MVT::i64))); 688 } 689 assert(Op0.getOpcode() == ISD::FABS); 690 return DCI.CombineTo(N, 691 DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 692 DAG.getConstant(~SignBit, DL, MVT::i64))); 693 } 694 } 695 696 return SDValue(); 697 } 698 699 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 700 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 701 unsigned Depth) const { 702 switch (Op.getOpcode()) { 703 default: 704 break; 705 case RISCVISD::SLLW: 706 case RISCVISD::SRAW: 707 case RISCVISD::SRLW: 708 case RISCVISD::DIVW: 709 case RISCVISD::DIVUW: 710 case RISCVISD::REMUW: 711 // TODO: As the result is sign-extended, this is conservatively correct. A 712 // more precise answer could be calculated for SRAW depending on known 713 // bits in the shift amount. 714 return 33; 715 } 716 717 return 1; 718 } 719 720 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 721 MachineBasicBlock *BB) { 722 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 723 724 MachineFunction &MF = *BB->getParent(); 725 DebugLoc DL = MI.getDebugLoc(); 726 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 727 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 728 unsigned LoReg = MI.getOperand(0).getReg(); 729 unsigned HiReg = MI.getOperand(1).getReg(); 730 unsigned SrcReg = MI.getOperand(2).getReg(); 731 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 732 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 733 734 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 735 RI); 736 MachineMemOperand *MMO = 737 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 738 MachineMemOperand::MOLoad, 8, 8); 739 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 740 .addFrameIndex(FI) 741 .addImm(0) 742 .addMemOperand(MMO); 743 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 744 .addFrameIndex(FI) 745 .addImm(4) 746 .addMemOperand(MMO); 747 MI.eraseFromParent(); // The pseudo instruction is gone now. 748 return BB; 749 } 750 751 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 752 MachineBasicBlock *BB) { 753 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 754 "Unexpected instruction"); 755 756 MachineFunction &MF = *BB->getParent(); 757 DebugLoc DL = MI.getDebugLoc(); 758 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 759 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 760 unsigned DstReg = MI.getOperand(0).getReg(); 761 unsigned LoReg = MI.getOperand(1).getReg(); 762 unsigned HiReg = MI.getOperand(2).getReg(); 763 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 764 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 765 766 MachineMemOperand *MMO = 767 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 768 MachineMemOperand::MOStore, 8, 8); 769 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 770 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 771 .addFrameIndex(FI) 772 .addImm(0) 773 .addMemOperand(MMO); 774 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 775 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 776 .addFrameIndex(FI) 777 .addImm(4) 778 .addMemOperand(MMO); 779 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 780 MI.eraseFromParent(); // The pseudo instruction is gone now. 781 return BB; 782 } 783 784 MachineBasicBlock * 785 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 786 MachineBasicBlock *BB) const { 787 switch (MI.getOpcode()) { 788 default: 789 llvm_unreachable("Unexpected instr type to insert"); 790 case RISCV::Select_GPR_Using_CC_GPR: 791 case RISCV::Select_FPR32_Using_CC_GPR: 792 case RISCV::Select_FPR64_Using_CC_GPR: 793 break; 794 case RISCV::BuildPairF64Pseudo: 795 return emitBuildPairF64Pseudo(MI, BB); 796 case RISCV::SplitF64Pseudo: 797 return emitSplitF64Pseudo(MI, BB); 798 } 799 800 // To "insert" a SELECT instruction, we actually have to insert the triangle 801 // control-flow pattern. The incoming instruction knows the destination vreg 802 // to set, the condition code register to branch on, the true/false values to 803 // select between, and the condcode to use to select the appropriate branch. 804 // 805 // We produce the following control flow: 806 // HeadMBB 807 // | \ 808 // | IfFalseMBB 809 // | / 810 // TailMBB 811 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 812 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 813 DebugLoc DL = MI.getDebugLoc(); 814 MachineFunction::iterator I = ++BB->getIterator(); 815 816 MachineBasicBlock *HeadMBB = BB; 817 MachineFunction *F = BB->getParent(); 818 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 819 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 820 821 F->insert(I, IfFalseMBB); 822 F->insert(I, TailMBB); 823 // Move all remaining instructions to TailMBB. 824 TailMBB->splice(TailMBB->begin(), HeadMBB, 825 std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end()); 826 // Update machine-CFG edges by transferring all successors of the current 827 // block to the new block which will contain the Phi node for the select. 828 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 829 // Set the successors for HeadMBB. 830 HeadMBB->addSuccessor(IfFalseMBB); 831 HeadMBB->addSuccessor(TailMBB); 832 833 // Insert appropriate branch. 834 unsigned LHS = MI.getOperand(1).getReg(); 835 unsigned RHS = MI.getOperand(2).getReg(); 836 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 837 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 838 839 BuildMI(HeadMBB, DL, TII.get(Opcode)) 840 .addReg(LHS) 841 .addReg(RHS) 842 .addMBB(TailMBB); 843 844 // IfFalseMBB just falls through to TailMBB. 845 IfFalseMBB->addSuccessor(TailMBB); 846 847 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 848 BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), 849 MI.getOperand(0).getReg()) 850 .addReg(MI.getOperand(4).getReg()) 851 .addMBB(HeadMBB) 852 .addReg(MI.getOperand(5).getReg()) 853 .addMBB(IfFalseMBB); 854 855 MI.eraseFromParent(); // The pseudo instruction is gone now. 856 return TailMBB; 857 } 858 859 // Calling Convention Implementation. 860 // The expectations for frontend ABI lowering vary from target to target. 861 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 862 // details, but this is a longer term goal. For now, we simply try to keep the 863 // role of the frontend as simple and well-defined as possible. The rules can 864 // be summarised as: 865 // * Never split up large scalar arguments. We handle them here. 866 // * If a hardfloat calling convention is being used, and the struct may be 867 // passed in a pair of registers (fp+fp, int+fp), and both registers are 868 // available, then pass as two separate arguments. If either the GPRs or FPRs 869 // are exhausted, then pass according to the rule below. 870 // * If a struct could never be passed in registers or directly in a stack 871 // slot (as it is larger than 2*XLEN and the floating point rules don't 872 // apply), then pass it using a pointer with the byval attribute. 873 // * If a struct is less than 2*XLEN, then coerce to either a two-element 874 // word-sized array or a 2*XLEN scalar (depending on alignment). 875 // * The frontend can determine whether a struct is returned by reference or 876 // not based on its size and fields. If it will be returned by reference, the 877 // frontend must modify the prototype so a pointer with the sret annotation is 878 // passed as the first argument. This is not necessary for large scalar 879 // returns. 880 // * Struct return values and varargs should be coerced to structs containing 881 // register-size fields in the same situations they would be for fixed 882 // arguments. 883 884 static const MCPhysReg ArgGPRs[] = { 885 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 886 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 887 }; 888 889 // Pass a 2*XLEN argument that has been split into two XLEN values through 890 // registers or the stack as necessary. 891 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 892 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 893 MVT ValVT2, MVT LocVT2, 894 ISD::ArgFlagsTy ArgFlags2) { 895 unsigned XLenInBytes = XLen / 8; 896 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 897 // At least one half can be passed via register. 898 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 899 VA1.getLocVT(), CCValAssign::Full)); 900 } else { 901 // Both halves must be passed on the stack, with proper alignment. 902 unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign()); 903 State.addLoc( 904 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 905 State.AllocateStack(XLenInBytes, StackAlign), 906 VA1.getLocVT(), CCValAssign::Full)); 907 State.addLoc(CCValAssign::getMem( 908 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 909 CCValAssign::Full)); 910 return false; 911 } 912 913 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 914 // The second half can also be passed via register. 915 State.addLoc( 916 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 917 } else { 918 // The second half is passed via the stack, without additional alignment. 919 State.addLoc(CCValAssign::getMem( 920 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 921 CCValAssign::Full)); 922 } 923 924 return false; 925 } 926 927 // Implements the RISC-V calling convention. Returns true upon failure. 928 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, 929 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 930 CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { 931 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 932 assert(XLen == 32 || XLen == 64); 933 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 934 if (ValVT == MVT::f32) { 935 LocVT = XLenVT; 936 LocInfo = CCValAssign::BCvt; 937 } 938 if (XLen == 64 && ValVT == MVT::f64) { 939 LocVT = MVT::i64; 940 LocInfo = CCValAssign::BCvt; 941 } 942 943 // Any return value split in to more than two values can't be returned 944 // directly. 945 if (IsRet && ValNo > 1) 946 return true; 947 948 // If this is a variadic argument, the RISC-V calling convention requires 949 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 950 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 951 // be used regardless of whether the original argument was split during 952 // legalisation or not. The argument will not be passed by registers if the 953 // original type is larger than 2*XLEN, so the register alignment rule does 954 // not apply. 955 unsigned TwoXLenInBytes = (2 * XLen) / 8; 956 if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && 957 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 958 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 959 // Skip 'odd' register if necessary. 960 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 961 State.AllocateReg(ArgGPRs); 962 } 963 964 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 965 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 966 State.getPendingArgFlags(); 967 968 assert(PendingLocs.size() == PendingArgFlags.size() && 969 "PendingLocs and PendingArgFlags out of sync"); 970 971 // Handle passing f64 on RV32D with a soft float ABI. 972 if (XLen == 32 && ValVT == MVT::f64) { 973 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 974 "Can't lower f64 if it is split"); 975 // Depending on available argument GPRS, f64 may be passed in a pair of 976 // GPRs, split between a GPR and the stack, or passed completely on the 977 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 978 // cases. 979 unsigned Reg = State.AllocateReg(ArgGPRs); 980 LocVT = MVT::i32; 981 if (!Reg) { 982 unsigned StackOffset = State.AllocateStack(8, 8); 983 State.addLoc( 984 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 985 return false; 986 } 987 if (!State.AllocateReg(ArgGPRs)) 988 State.AllocateStack(4, 4); 989 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 990 return false; 991 } 992 993 // Split arguments might be passed indirectly, so keep track of the pending 994 // values. 995 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 996 LocVT = XLenVT; 997 LocInfo = CCValAssign::Indirect; 998 PendingLocs.push_back( 999 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 1000 PendingArgFlags.push_back(ArgFlags); 1001 if (!ArgFlags.isSplitEnd()) { 1002 return false; 1003 } 1004 } 1005 1006 // If the split argument only had two elements, it should be passed directly 1007 // in registers or on the stack. 1008 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 1009 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 1010 // Apply the normal calling convention rules to the first half of the 1011 // split argument. 1012 CCValAssign VA = PendingLocs[0]; 1013 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 1014 PendingLocs.clear(); 1015 PendingArgFlags.clear(); 1016 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 1017 ArgFlags); 1018 } 1019 1020 // Allocate to a register if possible, or else a stack slot. 1021 unsigned Reg = State.AllocateReg(ArgGPRs); 1022 unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); 1023 1024 // If we reach this point and PendingLocs is non-empty, we must be at the 1025 // end of a split argument that must be passed indirectly. 1026 if (!PendingLocs.empty()) { 1027 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 1028 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 1029 1030 for (auto &It : PendingLocs) { 1031 if (Reg) 1032 It.convertToReg(Reg); 1033 else 1034 It.convertToMem(StackOffset); 1035 State.addLoc(It); 1036 } 1037 PendingLocs.clear(); 1038 PendingArgFlags.clear(); 1039 return false; 1040 } 1041 1042 assert(LocVT == XLenVT && "Expected an XLenVT at this stage"); 1043 1044 if (Reg) { 1045 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1046 return false; 1047 } 1048 1049 // When an f32 or f64 is passed on the stack, no bit-conversion is needed. 1050 if (ValVT == MVT::f32 || ValVT == MVT::f64) { 1051 LocVT = ValVT; 1052 LocInfo = CCValAssign::Full; 1053 } 1054 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 1055 return false; 1056 } 1057 1058 void RISCVTargetLowering::analyzeInputArgs( 1059 MachineFunction &MF, CCState &CCInfo, 1060 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 1061 unsigned NumArgs = Ins.size(); 1062 FunctionType *FType = MF.getFunction().getFunctionType(); 1063 1064 for (unsigned i = 0; i != NumArgs; ++i) { 1065 MVT ArgVT = Ins[i].VT; 1066 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 1067 1068 Type *ArgTy = nullptr; 1069 if (IsRet) 1070 ArgTy = FType->getReturnType(); 1071 else if (Ins[i].isOrigArg()) 1072 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 1073 1074 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 1075 ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { 1076 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 1077 << EVT(ArgVT).getEVTString() << '\n'); 1078 llvm_unreachable(nullptr); 1079 } 1080 } 1081 } 1082 1083 void RISCVTargetLowering::analyzeOutputArgs( 1084 MachineFunction &MF, CCState &CCInfo, 1085 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 1086 CallLoweringInfo *CLI) const { 1087 unsigned NumArgs = Outs.size(); 1088 1089 for (unsigned i = 0; i != NumArgs; i++) { 1090 MVT ArgVT = Outs[i].VT; 1091 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1092 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 1093 1094 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 1095 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 1096 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 1097 << EVT(ArgVT).getEVTString() << "\n"); 1098 llvm_unreachable(nullptr); 1099 } 1100 } 1101 } 1102 1103 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 1104 // values. 1105 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 1106 const CCValAssign &VA, const SDLoc &DL) { 1107 switch (VA.getLocInfo()) { 1108 default: 1109 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1110 case CCValAssign::Full: 1111 break; 1112 case CCValAssign::BCvt: 1113 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 1114 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 1115 break; 1116 } 1117 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 1118 break; 1119 } 1120 return Val; 1121 } 1122 1123 // The caller is responsible for loading the full value if the argument is 1124 // passed with CCValAssign::Indirect. 1125 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 1126 const CCValAssign &VA, const SDLoc &DL) { 1127 MachineFunction &MF = DAG.getMachineFunction(); 1128 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1129 EVT LocVT = VA.getLocVT(); 1130 SDValue Val; 1131 1132 unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1133 RegInfo.addLiveIn(VA.getLocReg(), VReg); 1134 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 1135 1136 if (VA.getLocInfo() == CCValAssign::Indirect) 1137 return Val; 1138 1139 return convertLocVTToValVT(DAG, Val, VA, DL); 1140 } 1141 1142 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 1143 const CCValAssign &VA, const SDLoc &DL) { 1144 EVT LocVT = VA.getLocVT(); 1145 1146 switch (VA.getLocInfo()) { 1147 default: 1148 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1149 case CCValAssign::Full: 1150 break; 1151 case CCValAssign::BCvt: 1152 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 1153 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 1154 break; 1155 } 1156 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 1157 break; 1158 } 1159 return Val; 1160 } 1161 1162 // The caller is responsible for loading the full value if the argument is 1163 // passed with CCValAssign::Indirect. 1164 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 1165 const CCValAssign &VA, const SDLoc &DL) { 1166 MachineFunction &MF = DAG.getMachineFunction(); 1167 MachineFrameInfo &MFI = MF.getFrameInfo(); 1168 EVT LocVT = VA.getLocVT(); 1169 EVT ValVT = VA.getValVT(); 1170 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 1171 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 1172 VA.getLocMemOffset(), /*Immutable=*/true); 1173 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1174 SDValue Val; 1175 1176 ISD::LoadExtType ExtType; 1177 switch (VA.getLocInfo()) { 1178 default: 1179 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1180 case CCValAssign::Full: 1181 case CCValAssign::Indirect: 1182 case CCValAssign::BCvt: 1183 ExtType = ISD::NON_EXTLOAD; 1184 break; 1185 } 1186 Val = DAG.getExtLoad( 1187 ExtType, DL, LocVT, Chain, FIN, 1188 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 1189 return Val; 1190 } 1191 1192 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 1193 const CCValAssign &VA, const SDLoc &DL) { 1194 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 1195 "Unexpected VA"); 1196 MachineFunction &MF = DAG.getMachineFunction(); 1197 MachineFrameInfo &MFI = MF.getFrameInfo(); 1198 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1199 1200 if (VA.isMemLoc()) { 1201 // f64 is passed on the stack. 1202 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 1203 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1204 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 1205 MachinePointerInfo::getFixedStack(MF, FI)); 1206 } 1207 1208 assert(VA.isRegLoc() && "Expected register VA assignment"); 1209 1210 unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1211 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 1212 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 1213 SDValue Hi; 1214 if (VA.getLocReg() == RISCV::X17) { 1215 // Second half of f64 is passed on the stack. 1216 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 1217 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1218 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 1219 MachinePointerInfo::getFixedStack(MF, FI)); 1220 } else { 1221 // Second half of f64 is passed in another GPR. 1222 unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1223 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 1224 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 1225 } 1226 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1227 } 1228 1229 // Transform physical registers into virtual registers. 1230 SDValue RISCVTargetLowering::LowerFormalArguments( 1231 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1232 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1233 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1234 1235 switch (CallConv) { 1236 default: 1237 report_fatal_error("Unsupported calling convention"); 1238 case CallingConv::C: 1239 case CallingConv::Fast: 1240 break; 1241 } 1242 1243 MachineFunction &MF = DAG.getMachineFunction(); 1244 1245 const Function &Func = MF.getFunction(); 1246 if (Func.hasFnAttribute("interrupt")) { 1247 if (!Func.arg_empty()) 1248 report_fatal_error( 1249 "Functions with the interrupt attribute cannot have arguments!"); 1250 1251 StringRef Kind = 1252 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1253 1254 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 1255 report_fatal_error( 1256 "Function interrupt attribute argument not supported!"); 1257 } 1258 1259 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1260 MVT XLenVT = Subtarget.getXLenVT(); 1261 unsigned XLenInBytes = Subtarget.getXLen() / 8; 1262 // Used with vargs to acumulate store chains. 1263 std::vector<SDValue> OutChains; 1264 1265 // Assign locations to all of the incoming arguments. 1266 SmallVector<CCValAssign, 16> ArgLocs; 1267 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1268 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 1269 1270 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1271 CCValAssign &VA = ArgLocs[i]; 1272 SDValue ArgValue; 1273 // Passing f64 on RV32D with a soft float ABI must be handled as a special 1274 // case. 1275 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 1276 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 1277 else if (VA.isRegLoc()) 1278 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); 1279 else 1280 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 1281 1282 if (VA.getLocInfo() == CCValAssign::Indirect) { 1283 // If the original argument was split and passed by reference (e.g. i128 1284 // on RV32), we need to load all parts of it here (using the same 1285 // address). 1286 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 1287 MachinePointerInfo())); 1288 unsigned ArgIndex = Ins[i].OrigArgIndex; 1289 assert(Ins[i].PartOffset == 0); 1290 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 1291 CCValAssign &PartVA = ArgLocs[i + 1]; 1292 unsigned PartOffset = Ins[i + 1].PartOffset; 1293 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 1294 DAG.getIntPtrConstant(PartOffset, DL)); 1295 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 1296 MachinePointerInfo())); 1297 ++i; 1298 } 1299 continue; 1300 } 1301 InVals.push_back(ArgValue); 1302 } 1303 1304 if (IsVarArg) { 1305 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 1306 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 1307 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 1308 MachineFrameInfo &MFI = MF.getFrameInfo(); 1309 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1310 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1311 1312 // Offset of the first variable argument from stack pointer, and size of 1313 // the vararg save area. For now, the varargs save area is either zero or 1314 // large enough to hold a0-a7. 1315 int VaArgOffset, VarArgsSaveSize; 1316 1317 // If all registers are allocated, then all varargs must be passed on the 1318 // stack and we don't need to save any argregs. 1319 if (ArgRegs.size() == Idx) { 1320 VaArgOffset = CCInfo.getNextStackOffset(); 1321 VarArgsSaveSize = 0; 1322 } else { 1323 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 1324 VaArgOffset = -VarArgsSaveSize; 1325 } 1326 1327 // Record the frame index of the first variable argument 1328 // which is a value necessary to VASTART. 1329 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1330 RVFI->setVarArgsFrameIndex(FI); 1331 1332 // If saving an odd number of registers then create an extra stack slot to 1333 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 1334 // offsets to even-numbered registered remain 2*XLEN-aligned. 1335 if (Idx % 2) { 1336 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, 1337 true); 1338 VarArgsSaveSize += XLenInBytes; 1339 } 1340 1341 // Copy the integer registers that may have been used for passing varargs 1342 // to the vararg save area. 1343 for (unsigned I = Idx; I < ArgRegs.size(); 1344 ++I, VaArgOffset += XLenInBytes) { 1345 const unsigned Reg = RegInfo.createVirtualRegister(RC); 1346 RegInfo.addLiveIn(ArgRegs[I], Reg); 1347 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 1348 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1349 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1350 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 1351 MachinePointerInfo::getFixedStack(MF, FI)); 1352 cast<StoreSDNode>(Store.getNode()) 1353 ->getMemOperand() 1354 ->setValue((Value *)nullptr); 1355 OutChains.push_back(Store); 1356 } 1357 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 1358 } 1359 1360 // All stores are grouped in one node to allow the matching between 1361 // the size of Ins and InVals. This only happens for vararg functions. 1362 if (!OutChains.empty()) { 1363 OutChains.push_back(Chain); 1364 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 1365 } 1366 1367 return Chain; 1368 } 1369 1370 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 1371 /// for tail call optimization. 1372 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 1373 bool RISCVTargetLowering::IsEligibleForTailCallOptimization( 1374 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 1375 const SmallVector<CCValAssign, 16> &ArgLocs) const { 1376 1377 auto &Callee = CLI.Callee; 1378 auto CalleeCC = CLI.CallConv; 1379 auto IsVarArg = CLI.IsVarArg; 1380 auto &Outs = CLI.Outs; 1381 auto &Caller = MF.getFunction(); 1382 auto CallerCC = Caller.getCallingConv(); 1383 1384 // Do not tail call opt functions with "disable-tail-calls" attribute. 1385 if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") 1386 return false; 1387 1388 // Exception-handling functions need a special set of instructions to 1389 // indicate a return to the hardware. Tail-calling another function would 1390 // probably break this. 1391 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 1392 // should be expanded as new function attributes are introduced. 1393 if (Caller.hasFnAttribute("interrupt")) 1394 return false; 1395 1396 // Do not tail call opt functions with varargs. 1397 if (IsVarArg) 1398 return false; 1399 1400 // Do not tail call opt if the stack is used to pass parameters. 1401 if (CCInfo.getNextStackOffset() != 0) 1402 return false; 1403 1404 // Do not tail call opt if any parameters need to be passed indirectly. 1405 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 1406 // passed indirectly. So the address of the value will be passed in a 1407 // register, or if not available, then the address is put on the stack. In 1408 // order to pass indirectly, space on the stack often needs to be allocated 1409 // in order to store the value. In this case the CCInfo.getNextStackOffset() 1410 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 1411 // are passed CCValAssign::Indirect. 1412 for (auto &VA : ArgLocs) 1413 if (VA.getLocInfo() == CCValAssign::Indirect) 1414 return false; 1415 1416 // Do not tail call opt if either caller or callee uses struct return 1417 // semantics. 1418 auto IsCallerStructRet = Caller.hasStructRetAttr(); 1419 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 1420 if (IsCallerStructRet || IsCalleeStructRet) 1421 return false; 1422 1423 // Externally-defined functions with weak linkage should not be 1424 // tail-called. The behaviour of branch instructions in this situation (as 1425 // used for tail calls) is implementation-defined, so we cannot rely on the 1426 // linker replacing the tail call with a return. 1427 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1428 const GlobalValue *GV = G->getGlobal(); 1429 if (GV->hasExternalWeakLinkage()) 1430 return false; 1431 } 1432 1433 // The callee has to preserve all registers the caller needs to preserve. 1434 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1435 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 1436 if (CalleeCC != CallerCC) { 1437 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 1438 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 1439 return false; 1440 } 1441 1442 // Byval parameters hand the function a pointer directly into the stack area 1443 // we want to reuse during a tail call. Working around this *is* possible 1444 // but less efficient and uglier in LowerCall. 1445 for (auto &Arg : Outs) 1446 if (Arg.Flags.isByVal()) 1447 return false; 1448 1449 return true; 1450 } 1451 1452 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 1453 // and output parameter nodes. 1454 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 1455 SmallVectorImpl<SDValue> &InVals) const { 1456 SelectionDAG &DAG = CLI.DAG; 1457 SDLoc &DL = CLI.DL; 1458 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1459 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1460 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1461 SDValue Chain = CLI.Chain; 1462 SDValue Callee = CLI.Callee; 1463 bool &IsTailCall = CLI.IsTailCall; 1464 CallingConv::ID CallConv = CLI.CallConv; 1465 bool IsVarArg = CLI.IsVarArg; 1466 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1467 MVT XLenVT = Subtarget.getXLenVT(); 1468 1469 MachineFunction &MF = DAG.getMachineFunction(); 1470 1471 // Analyze the operands of the call, assigning locations to each operand. 1472 SmallVector<CCValAssign, 16> ArgLocs; 1473 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1474 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 1475 1476 // Check if it's really possible to do a tail call. 1477 if (IsTailCall) 1478 IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, 1479 ArgLocs); 1480 1481 if (IsTailCall) 1482 ++NumTailCalls; 1483 else if (CLI.CS && CLI.CS.isMustTailCall()) 1484 report_fatal_error("failed to perform tail call elimination on a call " 1485 "site marked musttail"); 1486 1487 // Get a count of how many bytes are to be pushed on the stack. 1488 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 1489 1490 // Create local copies for byval args 1491 SmallVector<SDValue, 8> ByValArgs; 1492 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1493 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1494 if (!Flags.isByVal()) 1495 continue; 1496 1497 SDValue Arg = OutVals[i]; 1498 unsigned Size = Flags.getByValSize(); 1499 unsigned Align = Flags.getByValAlign(); 1500 1501 int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false); 1502 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1503 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 1504 1505 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, 1506 /*IsVolatile=*/false, 1507 /*AlwaysInline=*/false, 1508 IsTailCall, MachinePointerInfo(), 1509 MachinePointerInfo()); 1510 ByValArgs.push_back(FIPtr); 1511 } 1512 1513 if (!IsTailCall) 1514 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 1515 1516 // Copy argument values to their designated locations. 1517 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1518 SmallVector<SDValue, 8> MemOpChains; 1519 SDValue StackPtr; 1520 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 1521 CCValAssign &VA = ArgLocs[i]; 1522 SDValue ArgValue = OutVals[i]; 1523 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1524 1525 // Handle passing f64 on RV32D with a soft float ABI as a special case. 1526 bool IsF64OnRV32DSoftABI = 1527 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 1528 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 1529 SDValue SplitF64 = DAG.getNode( 1530 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 1531 SDValue Lo = SplitF64.getValue(0); 1532 SDValue Hi = SplitF64.getValue(1); 1533 1534 unsigned RegLo = VA.getLocReg(); 1535 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 1536 1537 if (RegLo == RISCV::X17) { 1538 // Second half of f64 is passed on the stack. 1539 // Work out the address of the stack slot. 1540 if (!StackPtr.getNode()) 1541 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1542 // Emit the store. 1543 MemOpChains.push_back( 1544 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 1545 } else { 1546 // Second half of f64 is passed in another GPR. 1547 unsigned RegHigh = RegLo + 1; 1548 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 1549 } 1550 continue; 1551 } 1552 1553 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 1554 // as any other MemLoc. 1555 1556 // Promote the value if needed. 1557 // For now, only handle fully promoted and indirect arguments. 1558 if (VA.getLocInfo() == CCValAssign::Indirect) { 1559 // Store the argument in a stack slot and pass its address. 1560 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 1561 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1562 MemOpChains.push_back( 1563 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 1564 MachinePointerInfo::getFixedStack(MF, FI))); 1565 // If the original argument was split (e.g. i128), we need 1566 // to store all parts of it here (and pass just one address). 1567 unsigned ArgIndex = Outs[i].OrigArgIndex; 1568 assert(Outs[i].PartOffset == 0); 1569 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 1570 SDValue PartValue = OutVals[i + 1]; 1571 unsigned PartOffset = Outs[i + 1].PartOffset; 1572 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 1573 DAG.getIntPtrConstant(PartOffset, DL)); 1574 MemOpChains.push_back( 1575 DAG.getStore(Chain, DL, PartValue, Address, 1576 MachinePointerInfo::getFixedStack(MF, FI))); 1577 ++i; 1578 } 1579 ArgValue = SpillSlot; 1580 } else { 1581 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 1582 } 1583 1584 // Use local copy if it is a byval arg. 1585 if (Flags.isByVal()) 1586 ArgValue = ByValArgs[j++]; 1587 1588 if (VA.isRegLoc()) { 1589 // Queue up the argument copies and emit them at the end. 1590 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 1591 } else { 1592 assert(VA.isMemLoc() && "Argument not register or memory"); 1593 assert(!IsTailCall && "Tail call not allowed if stack is used " 1594 "for passing parameters"); 1595 1596 // Work out the address of the stack slot. 1597 if (!StackPtr.getNode()) 1598 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1599 SDValue Address = 1600 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 1601 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 1602 1603 // Emit the store. 1604 MemOpChains.push_back( 1605 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 1606 } 1607 } 1608 1609 // Join the stores, which are independent of one another. 1610 if (!MemOpChains.empty()) 1611 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 1612 1613 SDValue Glue; 1614 1615 // Build a sequence of copy-to-reg nodes, chained and glued together. 1616 for (auto &Reg : RegsToPass) { 1617 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 1618 Glue = Chain.getValue(1); 1619 } 1620 1621 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 1622 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 1623 // split it and then direct call can be matched by PseudoCALL. 1624 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 1625 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0); 1626 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1627 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0); 1628 } 1629 1630 // The first call operand is the chain and the second is the target address. 1631 SmallVector<SDValue, 8> Ops; 1632 Ops.push_back(Chain); 1633 Ops.push_back(Callee); 1634 1635 // Add argument registers to the end of the list so that they are 1636 // known live into the call. 1637 for (auto &Reg : RegsToPass) 1638 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 1639 1640 if (!IsTailCall) { 1641 // Add a register mask operand representing the call-preserved registers. 1642 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1643 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 1644 assert(Mask && "Missing call preserved mask for calling convention"); 1645 Ops.push_back(DAG.getRegisterMask(Mask)); 1646 } 1647 1648 // Glue the call to the argument copies, if any. 1649 if (Glue.getNode()) 1650 Ops.push_back(Glue); 1651 1652 // Emit the call. 1653 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1654 1655 if (IsTailCall) { 1656 MF.getFrameInfo().setHasTailCall(); 1657 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 1658 } 1659 1660 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 1661 Glue = Chain.getValue(1); 1662 1663 // Mark the end of the call, which is glued to the call itself. 1664 Chain = DAG.getCALLSEQ_END(Chain, 1665 DAG.getConstant(NumBytes, DL, PtrVT, true), 1666 DAG.getConstant(0, DL, PtrVT, true), 1667 Glue, DL); 1668 Glue = Chain.getValue(1); 1669 1670 // Assign locations to each value returned by this call. 1671 SmallVector<CCValAssign, 16> RVLocs; 1672 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 1673 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 1674 1675 // Copy all of the result registers out of their specified physreg. 1676 for (auto &VA : RVLocs) { 1677 // Copy the value out 1678 SDValue RetValue = 1679 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 1680 // Glue the RetValue to the end of the call sequence 1681 Chain = RetValue.getValue(1); 1682 Glue = RetValue.getValue(2); 1683 1684 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1685 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 1686 SDValue RetValue2 = 1687 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 1688 Chain = RetValue2.getValue(1); 1689 Glue = RetValue2.getValue(2); 1690 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 1691 RetValue2); 1692 } 1693 1694 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 1695 1696 InVals.push_back(RetValue); 1697 } 1698 1699 return Chain; 1700 } 1701 1702 bool RISCVTargetLowering::CanLowerReturn( 1703 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 1704 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 1705 SmallVector<CCValAssign, 16> RVLocs; 1706 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 1707 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1708 MVT VT = Outs[i].VT; 1709 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1710 if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, 1711 CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) 1712 return false; 1713 } 1714 return true; 1715 } 1716 1717 SDValue 1718 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1719 bool IsVarArg, 1720 const SmallVectorImpl<ISD::OutputArg> &Outs, 1721 const SmallVectorImpl<SDValue> &OutVals, 1722 const SDLoc &DL, SelectionDAG &DAG) const { 1723 // Stores the assignment of the return value to a location. 1724 SmallVector<CCValAssign, 16> RVLocs; 1725 1726 // Info about the registers and stack slot. 1727 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 1728 *DAG.getContext()); 1729 1730 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 1731 nullptr); 1732 1733 SDValue Glue; 1734 SmallVector<SDValue, 4> RetOps(1, Chain); 1735 1736 // Copy the result values into the output registers. 1737 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 1738 SDValue Val = OutVals[i]; 1739 CCValAssign &VA = RVLocs[i]; 1740 assert(VA.isRegLoc() && "Can only return in registers!"); 1741 1742 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1743 // Handle returning f64 on RV32D with a soft float ABI. 1744 assert(VA.isRegLoc() && "Expected return via registers"); 1745 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 1746 DAG.getVTList(MVT::i32, MVT::i32), Val); 1747 SDValue Lo = SplitF64.getValue(0); 1748 SDValue Hi = SplitF64.getValue(1); 1749 unsigned RegLo = VA.getLocReg(); 1750 unsigned RegHi = RegLo + 1; 1751 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 1752 Glue = Chain.getValue(1); 1753 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 1754 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 1755 Glue = Chain.getValue(1); 1756 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 1757 } else { 1758 // Handle a 'normal' return. 1759 Val = convertValVTToLocVT(DAG, Val, VA, DL); 1760 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 1761 1762 // Guarantee that all emitted copies are stuck together. 1763 Glue = Chain.getValue(1); 1764 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 1765 } 1766 } 1767 1768 RetOps[0] = Chain; // Update chain. 1769 1770 // Add the glue node if we have it. 1771 if (Glue.getNode()) { 1772 RetOps.push_back(Glue); 1773 } 1774 1775 // Interrupt service routines use different return instructions. 1776 const Function &Func = DAG.getMachineFunction().getFunction(); 1777 if (Func.hasFnAttribute("interrupt")) { 1778 if (!Func.getReturnType()->isVoidTy()) 1779 report_fatal_error( 1780 "Functions with the interrupt attribute must have void return type!"); 1781 1782 MachineFunction &MF = DAG.getMachineFunction(); 1783 StringRef Kind = 1784 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1785 1786 unsigned RetOpc; 1787 if (Kind == "user") 1788 RetOpc = RISCVISD::URET_FLAG; 1789 else if (Kind == "supervisor") 1790 RetOpc = RISCVISD::SRET_FLAG; 1791 else 1792 RetOpc = RISCVISD::MRET_FLAG; 1793 1794 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 1795 } 1796 1797 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 1798 } 1799 1800 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 1801 switch ((RISCVISD::NodeType)Opcode) { 1802 case RISCVISD::FIRST_NUMBER: 1803 break; 1804 case RISCVISD::RET_FLAG: 1805 return "RISCVISD::RET_FLAG"; 1806 case RISCVISD::URET_FLAG: 1807 return "RISCVISD::URET_FLAG"; 1808 case RISCVISD::SRET_FLAG: 1809 return "RISCVISD::SRET_FLAG"; 1810 case RISCVISD::MRET_FLAG: 1811 return "RISCVISD::MRET_FLAG"; 1812 case RISCVISD::CALL: 1813 return "RISCVISD::CALL"; 1814 case RISCVISD::SELECT_CC: 1815 return "RISCVISD::SELECT_CC"; 1816 case RISCVISD::BuildPairF64: 1817 return "RISCVISD::BuildPairF64"; 1818 case RISCVISD::SplitF64: 1819 return "RISCVISD::SplitF64"; 1820 case RISCVISD::TAIL: 1821 return "RISCVISD::TAIL"; 1822 case RISCVISD::SLLW: 1823 return "RISCVISD::SLLW"; 1824 case RISCVISD::SRAW: 1825 return "RISCVISD::SRAW"; 1826 case RISCVISD::SRLW: 1827 return "RISCVISD::SRLW"; 1828 case RISCVISD::DIVW: 1829 return "RISCVISD::DIVW"; 1830 case RISCVISD::DIVUW: 1831 return "RISCVISD::DIVUW"; 1832 case RISCVISD::REMUW: 1833 return "RISCVISD::REMUW"; 1834 case RISCVISD::FMV_W_X_RV64: 1835 return "RISCVISD::FMV_W_X_RV64"; 1836 case RISCVISD::FMV_X_ANYEXTW_RV64: 1837 return "RISCVISD::FMV_X_ANYEXTW_RV64"; 1838 } 1839 return nullptr; 1840 } 1841 1842 std::pair<unsigned, const TargetRegisterClass *> 1843 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1844 StringRef Constraint, 1845 MVT VT) const { 1846 // First, see if this is a constraint that directly corresponds to a 1847 // RISCV register class. 1848 if (Constraint.size() == 1) { 1849 switch (Constraint[0]) { 1850 case 'r': 1851 return std::make_pair(0U, &RISCV::GPRRegClass); 1852 default: 1853 break; 1854 } 1855 } 1856 1857 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 1858 } 1859 1860 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 1861 Instruction *Inst, 1862 AtomicOrdering Ord) const { 1863 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 1864 return Builder.CreateFence(Ord); 1865 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 1866 return Builder.CreateFence(AtomicOrdering::Release); 1867 return nullptr; 1868 } 1869 1870 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 1871 Instruction *Inst, 1872 AtomicOrdering Ord) const { 1873 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 1874 return Builder.CreateFence(AtomicOrdering::Acquire); 1875 return nullptr; 1876 } 1877 1878 TargetLowering::AtomicExpansionKind 1879 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 1880 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 1881 // point operations can't be used in an lr/sc sequence without breaking the 1882 // forward-progress guarantee. 1883 if (AI->isFloatingPointOperation()) 1884 return AtomicExpansionKind::CmpXChg; 1885 1886 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 1887 if (Size == 8 || Size == 16) 1888 return AtomicExpansionKind::MaskedIntrinsic; 1889 return AtomicExpansionKind::None; 1890 } 1891 1892 static Intrinsic::ID 1893 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 1894 if (XLen == 32) { 1895 switch (BinOp) { 1896 default: 1897 llvm_unreachable("Unexpected AtomicRMW BinOp"); 1898 case AtomicRMWInst::Xchg: 1899 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 1900 case AtomicRMWInst::Add: 1901 return Intrinsic::riscv_masked_atomicrmw_add_i32; 1902 case AtomicRMWInst::Sub: 1903 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 1904 case AtomicRMWInst::Nand: 1905 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 1906 case AtomicRMWInst::Max: 1907 return Intrinsic::riscv_masked_atomicrmw_max_i32; 1908 case AtomicRMWInst::Min: 1909 return Intrinsic::riscv_masked_atomicrmw_min_i32; 1910 case AtomicRMWInst::UMax: 1911 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 1912 case AtomicRMWInst::UMin: 1913 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 1914 } 1915 } 1916 1917 if (XLen == 64) { 1918 switch (BinOp) { 1919 default: 1920 llvm_unreachable("Unexpected AtomicRMW BinOp"); 1921 case AtomicRMWInst::Xchg: 1922 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 1923 case AtomicRMWInst::Add: 1924 return Intrinsic::riscv_masked_atomicrmw_add_i64; 1925 case AtomicRMWInst::Sub: 1926 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 1927 case AtomicRMWInst::Nand: 1928 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 1929 case AtomicRMWInst::Max: 1930 return Intrinsic::riscv_masked_atomicrmw_max_i64; 1931 case AtomicRMWInst::Min: 1932 return Intrinsic::riscv_masked_atomicrmw_min_i64; 1933 case AtomicRMWInst::UMax: 1934 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 1935 case AtomicRMWInst::UMin: 1936 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 1937 } 1938 } 1939 1940 llvm_unreachable("Unexpected XLen\n"); 1941 } 1942 1943 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 1944 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 1945 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 1946 unsigned XLen = Subtarget.getXLen(); 1947 Value *Ordering = 1948 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 1949 Type *Tys[] = {AlignedAddr->getType()}; 1950 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 1951 AI->getModule(), 1952 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 1953 1954 if (XLen == 64) { 1955 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 1956 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 1957 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 1958 } 1959 1960 Value *Result; 1961 1962 // Must pass the shift amount needed to sign extend the loaded value prior 1963 // to performing a signed comparison for min/max. ShiftAmt is the number of 1964 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 1965 // is the number of bits to left+right shift the value in order to 1966 // sign-extend. 1967 if (AI->getOperation() == AtomicRMWInst::Min || 1968 AI->getOperation() == AtomicRMWInst::Max) { 1969 const DataLayout &DL = AI->getModule()->getDataLayout(); 1970 unsigned ValWidth = 1971 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 1972 Value *SextShamt = 1973 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 1974 Result = Builder.CreateCall(LrwOpScwLoop, 1975 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 1976 } else { 1977 Result = 1978 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 1979 } 1980 1981 if (XLen == 64) 1982 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 1983 return Result; 1984 } 1985 1986 TargetLowering::AtomicExpansionKind 1987 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 1988 AtomicCmpXchgInst *CI) const { 1989 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 1990 if (Size == 8 || Size == 16) 1991 return AtomicExpansionKind::MaskedIntrinsic; 1992 return AtomicExpansionKind::None; 1993 } 1994 1995 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 1996 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 1997 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 1998 unsigned XLen = Subtarget.getXLen(); 1999 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 2000 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 2001 if (XLen == 64) { 2002 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 2003 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 2004 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 2005 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 2006 } 2007 Type *Tys[] = {AlignedAddr->getType()}; 2008 Function *MaskedCmpXchg = 2009 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 2010 Value *Result = Builder.CreateCall( 2011 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 2012 if (XLen == 64) 2013 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 2014 return Result; 2015 } 2016