1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVRegisterInfo.h" 18 #include "RISCVSubtarget.h" 19 #include "RISCVTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAGISel.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/CodeGen/ValueTypes.h" 29 #include "llvm/IR/DiagnosticInfo.h" 30 #include "llvm/IR/DiagnosticPrinter.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "riscv-lower" 38 39 STATISTIC(NumTailCalls, "Number of tail calls"); 40 41 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 42 const RISCVSubtarget &STI) 43 : TargetLowering(TM), Subtarget(STI) { 44 45 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 46 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 47 48 if (ABI != RISCVABI::ABI_ILP32 && ABI != RISCVABI::ABI_LP64) 49 report_fatal_error("Don't know how to lower this ABI"); 50 51 MVT XLenVT = Subtarget.getXLenVT(); 52 53 // Set up the register classes. 54 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 55 56 if (Subtarget.hasStdExtF()) 57 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 58 if (Subtarget.hasStdExtD()) 59 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 60 61 // Compute derived properties from the register classes. 62 computeRegisterProperties(STI.getRegisterInfo()); 63 64 setStackPointerRegisterToSaveRestore(RISCV::X2); 65 66 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 67 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 68 69 // TODO: add all necessary setOperationAction calls. 70 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 71 72 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 73 setOperationAction(ISD::BR_CC, XLenVT, Expand); 74 setOperationAction(ISD::SELECT, XLenVT, Custom); 75 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 76 77 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 78 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 79 80 setOperationAction(ISD::VASTART, MVT::Other, Custom); 81 setOperationAction(ISD::VAARG, MVT::Other, Expand); 82 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 83 setOperationAction(ISD::VAEND, MVT::Other, Expand); 84 85 for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) 86 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 87 88 if (Subtarget.is64Bit()) { 89 setOperationAction(ISD::SHL, MVT::i32, Custom); 90 setOperationAction(ISD::SRA, MVT::i32, Custom); 91 setOperationAction(ISD::SRL, MVT::i32, Custom); 92 } 93 94 if (!Subtarget.hasStdExtM()) { 95 setOperationAction(ISD::MUL, XLenVT, Expand); 96 setOperationAction(ISD::MULHS, XLenVT, Expand); 97 setOperationAction(ISD::MULHU, XLenVT, Expand); 98 setOperationAction(ISD::SDIV, XLenVT, Expand); 99 setOperationAction(ISD::UDIV, XLenVT, Expand); 100 setOperationAction(ISD::SREM, XLenVT, Expand); 101 setOperationAction(ISD::UREM, XLenVT, Expand); 102 } 103 104 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 105 setOperationAction(ISD::SDIV, MVT::i32, Custom); 106 setOperationAction(ISD::UDIV, MVT::i32, Custom); 107 setOperationAction(ISD::UREM, MVT::i32, Custom); 108 } 109 110 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 111 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 112 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 113 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 114 115 setOperationAction(ISD::SHL_PARTS, XLenVT, Expand); 116 setOperationAction(ISD::SRL_PARTS, XLenVT, Expand); 117 setOperationAction(ISD::SRA_PARTS, XLenVT, Expand); 118 119 setOperationAction(ISD::ROTL, XLenVT, Expand); 120 setOperationAction(ISD::ROTR, XLenVT, Expand); 121 setOperationAction(ISD::BSWAP, XLenVT, Expand); 122 setOperationAction(ISD::CTTZ, XLenVT, Expand); 123 setOperationAction(ISD::CTLZ, XLenVT, Expand); 124 setOperationAction(ISD::CTPOP, XLenVT, Expand); 125 126 ISD::CondCode FPCCToExtend[] = { 127 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ, 128 ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, 129 ISD::SETGT, ISD::SETGE, ISD::SETNE}; 130 131 ISD::NodeType FPOpToExtend[] = { 132 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM}; 133 134 if (Subtarget.hasStdExtF()) { 135 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 136 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 137 for (auto CC : FPCCToExtend) 138 setCondCodeAction(CC, MVT::f32, Expand); 139 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 140 setOperationAction(ISD::SELECT, MVT::f32, Custom); 141 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 142 for (auto Op : FPOpToExtend) 143 setOperationAction(Op, MVT::f32, Expand); 144 } 145 146 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 147 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 148 149 if (Subtarget.hasStdExtD()) { 150 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 151 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 152 for (auto CC : FPCCToExtend) 153 setCondCodeAction(CC, MVT::f64, Expand); 154 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 155 setOperationAction(ISD::SELECT, MVT::f64, Custom); 156 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 157 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 158 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 159 for (auto Op : FPOpToExtend) 160 setOperationAction(Op, MVT::f64, Expand); 161 } 162 163 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 164 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 165 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 166 167 if (Subtarget.hasStdExtA()) { 168 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 169 setMinCmpXchgSizeInBits(32); 170 } else { 171 setMaxAtomicSizeInBitsSupported(0); 172 } 173 174 setBooleanContents(ZeroOrOneBooleanContent); 175 176 // Function alignments (log2). 177 unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2; 178 setMinFunctionAlignment(FunctionAlignment); 179 setPrefFunctionAlignment(FunctionAlignment); 180 181 // Effectively disable jump table generation. 182 setMinimumJumpTableEntries(INT_MAX); 183 } 184 185 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 186 EVT VT) const { 187 if (!VT.isVector()) 188 return getPointerTy(DL); 189 return VT.changeVectorElementTypeToInteger(); 190 } 191 192 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 193 const CallInst &I, 194 MachineFunction &MF, 195 unsigned Intrinsic) const { 196 switch (Intrinsic) { 197 default: 198 return false; 199 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 200 case Intrinsic::riscv_masked_atomicrmw_add_i32: 201 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 202 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 203 case Intrinsic::riscv_masked_atomicrmw_max_i32: 204 case Intrinsic::riscv_masked_atomicrmw_min_i32: 205 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 206 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 207 case Intrinsic::riscv_masked_cmpxchg_i32: 208 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 209 Info.opc = ISD::INTRINSIC_W_CHAIN; 210 Info.memVT = MVT::getVT(PtrTy->getElementType()); 211 Info.ptrVal = I.getArgOperand(0); 212 Info.offset = 0; 213 Info.align = 4; 214 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 215 MachineMemOperand::MOVolatile; 216 return true; 217 } 218 } 219 220 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 221 const AddrMode &AM, Type *Ty, 222 unsigned AS, 223 Instruction *I) const { 224 // No global is ever allowed as a base. 225 if (AM.BaseGV) 226 return false; 227 228 // Require a 12-bit signed offset. 229 if (!isInt<12>(AM.BaseOffs)) 230 return false; 231 232 switch (AM.Scale) { 233 case 0: // "r+i" or just "i", depending on HasBaseReg. 234 break; 235 case 1: 236 if (!AM.HasBaseReg) // allow "r+i". 237 break; 238 return false; // disallow "r+r" or "r+r+i". 239 default: 240 return false; 241 } 242 243 return true; 244 } 245 246 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 247 return isInt<12>(Imm); 248 } 249 250 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 251 return isInt<12>(Imm); 252 } 253 254 // On RV32, 64-bit integers are split into their high and low parts and held 255 // in two different registers, so the trunc is free since the low register can 256 // just be used. 257 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 258 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 259 return false; 260 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 261 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 262 return (SrcBits == 64 && DestBits == 32); 263 } 264 265 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 266 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 267 !SrcVT.isInteger() || !DstVT.isInteger()) 268 return false; 269 unsigned SrcBits = SrcVT.getSizeInBits(); 270 unsigned DestBits = DstVT.getSizeInBits(); 271 return (SrcBits == 64 && DestBits == 32); 272 } 273 274 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 275 // Zexts are free if they can be combined with a load. 276 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 277 EVT MemVT = LD->getMemoryVT(); 278 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 279 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 280 (LD->getExtensionType() == ISD::NON_EXTLOAD || 281 LD->getExtensionType() == ISD::ZEXTLOAD)) 282 return true; 283 } 284 285 return TargetLowering::isZExtFree(Val, VT2); 286 } 287 288 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 289 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 290 } 291 292 // Changes the condition code and swaps operands if necessary, so the SetCC 293 // operation matches one of the comparisons supported directly in the RISC-V 294 // ISA. 295 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 296 switch (CC) { 297 default: 298 break; 299 case ISD::SETGT: 300 case ISD::SETLE: 301 case ISD::SETUGT: 302 case ISD::SETULE: 303 CC = ISD::getSetCCSwappedOperands(CC); 304 std::swap(LHS, RHS); 305 break; 306 } 307 } 308 309 // Return the RISC-V branch opcode that matches the given DAG integer 310 // condition code. The CondCode must be one of those supported by the RISC-V 311 // ISA (see normaliseSetCC). 312 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 313 switch (CC) { 314 default: 315 llvm_unreachable("Unsupported CondCode"); 316 case ISD::SETEQ: 317 return RISCV::BEQ; 318 case ISD::SETNE: 319 return RISCV::BNE; 320 case ISD::SETLT: 321 return RISCV::BLT; 322 case ISD::SETGE: 323 return RISCV::BGE; 324 case ISD::SETULT: 325 return RISCV::BLTU; 326 case ISD::SETUGE: 327 return RISCV::BGEU; 328 } 329 } 330 331 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 332 SelectionDAG &DAG) const { 333 switch (Op.getOpcode()) { 334 default: 335 report_fatal_error("unimplemented operand"); 336 case ISD::GlobalAddress: 337 return lowerGlobalAddress(Op, DAG); 338 case ISD::BlockAddress: 339 return lowerBlockAddress(Op, DAG); 340 case ISD::ConstantPool: 341 return lowerConstantPool(Op, DAG); 342 case ISD::SELECT: 343 return lowerSELECT(Op, DAG); 344 case ISD::VASTART: 345 return lowerVASTART(Op, DAG); 346 case ISD::FRAMEADDR: 347 return lowerFRAMEADDR(Op, DAG); 348 case ISD::RETURNADDR: 349 return lowerRETURNADDR(Op, DAG); 350 case ISD::BITCAST: { 351 assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() && 352 "Unexpected custom legalisation"); 353 SDLoc DL(Op); 354 SDValue Op0 = Op.getOperand(0); 355 if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32) 356 return SDValue(); 357 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 358 SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 359 return FPConv; 360 } 361 } 362 } 363 364 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 365 SelectionDAG &DAG) const { 366 SDLoc DL(Op); 367 EVT Ty = Op.getValueType(); 368 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 369 const GlobalValue *GV = N->getGlobal(); 370 int64_t Offset = N->getOffset(); 371 MVT XLenVT = Subtarget.getXLenVT(); 372 373 if (isPositionIndependent()) 374 report_fatal_error("Unable to lowerGlobalAddress"); 375 // In order to maximise the opportunity for common subexpression elimination, 376 // emit a separate ADD node for the global address offset instead of folding 377 // it in the global address node. Later peephole optimisations may choose to 378 // fold it back in when profitable. 379 SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); 380 SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); 381 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); 382 SDValue MNLo = 383 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); 384 if (Offset != 0) 385 return DAG.getNode(ISD::ADD, DL, Ty, MNLo, 386 DAG.getConstant(Offset, DL, XLenVT)); 387 return MNLo; 388 } 389 390 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 391 SelectionDAG &DAG) const { 392 SDLoc DL(Op); 393 EVT Ty = Op.getValueType(); 394 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 395 const BlockAddress *BA = N->getBlockAddress(); 396 int64_t Offset = N->getOffset(); 397 398 if (isPositionIndependent()) 399 report_fatal_error("Unable to lowerBlockAddress"); 400 401 SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI); 402 SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO); 403 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0); 404 SDValue MNLo = 405 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0); 406 return MNLo; 407 } 408 409 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 410 SelectionDAG &DAG) const { 411 SDLoc DL(Op); 412 EVT Ty = Op.getValueType(); 413 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 414 const Constant *CPA = N->getConstVal(); 415 int64_t Offset = N->getOffset(); 416 unsigned Alignment = N->getAlignment(); 417 418 if (!isPositionIndependent()) { 419 SDValue CPAHi = 420 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI); 421 SDValue CPALo = 422 DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO); 423 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0); 424 SDValue MNLo = 425 SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0); 426 return MNLo; 427 } else { 428 report_fatal_error("Unable to lowerConstantPool"); 429 } 430 } 431 432 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 433 SDValue CondV = Op.getOperand(0); 434 SDValue TrueV = Op.getOperand(1); 435 SDValue FalseV = Op.getOperand(2); 436 SDLoc DL(Op); 437 MVT XLenVT = Subtarget.getXLenVT(); 438 439 // If the result type is XLenVT and CondV is the output of a SETCC node 440 // which also operated on XLenVT inputs, then merge the SETCC node into the 441 // lowered RISCVISD::SELECT_CC to take advantage of the integer 442 // compare+branch instructions. i.e.: 443 // (select (setcc lhs, rhs, cc), truev, falsev) 444 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 445 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 446 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 447 SDValue LHS = CondV.getOperand(0); 448 SDValue RHS = CondV.getOperand(1); 449 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 450 ISD::CondCode CCVal = CC->get(); 451 452 normaliseSetCC(LHS, RHS, CCVal); 453 454 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 455 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 456 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 457 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 458 } 459 460 // Otherwise: 461 // (select condv, truev, falsev) 462 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 463 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 464 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 465 466 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 467 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 468 469 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 470 } 471 472 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 473 MachineFunction &MF = DAG.getMachineFunction(); 474 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 475 476 SDLoc DL(Op); 477 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 478 getPointerTy(MF.getDataLayout())); 479 480 // vastart just stores the address of the VarArgsFrameIndex slot into the 481 // memory location argument. 482 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 483 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 484 MachinePointerInfo(SV)); 485 } 486 487 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 488 SelectionDAG &DAG) const { 489 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 490 MachineFunction &MF = DAG.getMachineFunction(); 491 MachineFrameInfo &MFI = MF.getFrameInfo(); 492 MFI.setFrameAddressIsTaken(true); 493 unsigned FrameReg = RI.getFrameRegister(MF); 494 int XLenInBytes = Subtarget.getXLen() / 8; 495 496 EVT VT = Op.getValueType(); 497 SDLoc DL(Op); 498 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 499 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 500 while (Depth--) { 501 int Offset = -(XLenInBytes * 2); 502 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 503 DAG.getIntPtrConstant(Offset, DL)); 504 FrameAddr = 505 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 506 } 507 return FrameAddr; 508 } 509 510 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 511 SelectionDAG &DAG) const { 512 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 513 MachineFunction &MF = DAG.getMachineFunction(); 514 MachineFrameInfo &MFI = MF.getFrameInfo(); 515 MFI.setReturnAddressIsTaken(true); 516 MVT XLenVT = Subtarget.getXLenVT(); 517 int XLenInBytes = Subtarget.getXLen() / 8; 518 519 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 520 return SDValue(); 521 522 EVT VT = Op.getValueType(); 523 SDLoc DL(Op); 524 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 525 if (Depth) { 526 int Off = -XLenInBytes; 527 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 528 SDValue Offset = DAG.getConstant(Off, DL, VT); 529 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 530 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 531 MachinePointerInfo()); 532 } 533 534 // Return the value of the return address register, marking it an implicit 535 // live-in. 536 unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 537 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 538 } 539 540 // Returns the opcode of the target-specific SDNode that implements the 32-bit 541 // form of the given Opcode. 542 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 543 switch (Opcode) { 544 default: 545 llvm_unreachable("Unexpected opcode"); 546 case ISD::SHL: 547 return RISCVISD::SLLW; 548 case ISD::SRA: 549 return RISCVISD::SRAW; 550 case ISD::SRL: 551 return RISCVISD::SRLW; 552 case ISD::SDIV: 553 return RISCVISD::DIVW; 554 case ISD::UDIV: 555 return RISCVISD::DIVUW; 556 case ISD::UREM: 557 return RISCVISD::REMUW; 558 } 559 } 560 561 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 562 // Because i32 isn't a legal type for RV64, these operations would otherwise 563 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 564 // later one because the fact the operation was originally of type i32 is 565 // lost. 566 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { 567 SDLoc DL(N); 568 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 569 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 570 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 571 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 572 // ReplaceNodeResults requires we maintain the same type for the return value. 573 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 574 } 575 576 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 577 SmallVectorImpl<SDValue> &Results, 578 SelectionDAG &DAG) const { 579 SDLoc DL(N); 580 switch (N->getOpcode()) { 581 default: 582 llvm_unreachable("Don't know how to custom type legalize this operation!"); 583 case ISD::SHL: 584 case ISD::SRA: 585 case ISD::SRL: 586 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 587 "Unexpected custom legalisation"); 588 if (N->getOperand(1).getOpcode() == ISD::Constant) 589 return; 590 Results.push_back(customLegalizeToWOp(N, DAG)); 591 break; 592 case ISD::SDIV: 593 case ISD::UDIV: 594 case ISD::UREM: 595 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 596 Subtarget.hasStdExtM() && "Unexpected custom legalisation"); 597 if (N->getOperand(0).getOpcode() == ISD::Constant || 598 N->getOperand(1).getOpcode() == ISD::Constant) 599 return; 600 Results.push_back(customLegalizeToWOp(N, DAG)); 601 break; 602 case ISD::BITCAST: { 603 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 604 Subtarget.hasStdExtF() && "Unexpected custom legalisation"); 605 SDLoc DL(N); 606 SDValue Op0 = N->getOperand(0); 607 if (Op0.getValueType() != MVT::f32) 608 return; 609 SDValue FPConv = 610 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 611 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 612 break; 613 } 614 } 615 } 616 617 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 618 DAGCombinerInfo &DCI) const { 619 SelectionDAG &DAG = DCI.DAG; 620 621 switch (N->getOpcode()) { 622 default: 623 break; 624 case RISCVISD::SplitF64: { 625 SDValue Op0 = N->getOperand(0); 626 // If the input to SplitF64 is just BuildPairF64 then the operation is 627 // redundant. Instead, use BuildPairF64's operands directly. 628 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 629 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 630 631 SDLoc DL(N); 632 // This is a target-specific version of a DAGCombine performed in 633 // DAGCombiner::visitBITCAST. It performs the equivalent of: 634 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 635 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 636 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 637 !Op0.getNode()->hasOneUse()) 638 break; 639 SDValue NewSplitF64 = 640 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 641 Op0.getOperand(0)); 642 SDValue Lo = NewSplitF64.getValue(0); 643 SDValue Hi = NewSplitF64.getValue(1); 644 APInt SignBit = APInt::getSignMask(32); 645 if (Op0.getOpcode() == ISD::FNEG) { 646 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 647 DAG.getConstant(SignBit, DL, MVT::i32)); 648 return DCI.CombineTo(N, Lo, NewHi); 649 } 650 assert(Op0.getOpcode() == ISD::FABS); 651 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 652 DAG.getConstant(~SignBit, DL, MVT::i32)); 653 return DCI.CombineTo(N, Lo, NewHi); 654 } 655 case RISCVISD::SLLW: 656 case RISCVISD::SRAW: 657 case RISCVISD::SRLW: { 658 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 659 SDValue LHS = N->getOperand(0); 660 SDValue RHS = N->getOperand(1); 661 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 662 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 663 if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) || 664 (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI))) 665 return SDValue(); 666 break; 667 } 668 case RISCVISD::FMV_X_ANYEXTW_RV64: { 669 SDLoc DL(N); 670 SDValue Op0 = N->getOperand(0); 671 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 672 // conversion is unnecessary and can be replaced with an ANY_EXTEND 673 // of the FMV_W_X_RV64 operand. 674 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 675 SDValue AExtOp = 676 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0)); 677 return DCI.CombineTo(N, AExtOp); 678 } 679 680 // This is a target-specific version of a DAGCombine performed in 681 // DAGCombiner::visitBITCAST. It performs the equivalent of: 682 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 683 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 684 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 685 !Op0.getNode()->hasOneUse()) 686 break; 687 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 688 Op0.getOperand(0)); 689 APInt SignBit = APInt::getSignMask(32).sext(64); 690 if (Op0.getOpcode() == ISD::FNEG) { 691 return DCI.CombineTo(N, 692 DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 693 DAG.getConstant(SignBit, DL, MVT::i64))); 694 } 695 assert(Op0.getOpcode() == ISD::FABS); 696 return DCI.CombineTo(N, 697 DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 698 DAG.getConstant(~SignBit, DL, MVT::i64))); 699 } 700 } 701 702 return SDValue(); 703 } 704 705 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 706 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 707 unsigned Depth) const { 708 switch (Op.getOpcode()) { 709 default: 710 break; 711 case RISCVISD::SLLW: 712 case RISCVISD::SRAW: 713 case RISCVISD::SRLW: 714 case RISCVISD::DIVW: 715 case RISCVISD::DIVUW: 716 case RISCVISD::REMUW: 717 // TODO: As the result is sign-extended, this is conservatively correct. A 718 // more precise answer could be calculated for SRAW depending on known 719 // bits in the shift amount. 720 return 33; 721 } 722 723 return 1; 724 } 725 726 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 727 MachineBasicBlock *BB) { 728 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 729 730 MachineFunction &MF = *BB->getParent(); 731 DebugLoc DL = MI.getDebugLoc(); 732 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 733 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 734 unsigned LoReg = MI.getOperand(0).getReg(); 735 unsigned HiReg = MI.getOperand(1).getReg(); 736 unsigned SrcReg = MI.getOperand(2).getReg(); 737 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 738 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 739 740 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 741 RI); 742 MachineMemOperand *MMO = 743 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 744 MachineMemOperand::MOLoad, 8, 8); 745 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 746 .addFrameIndex(FI) 747 .addImm(0) 748 .addMemOperand(MMO); 749 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 750 .addFrameIndex(FI) 751 .addImm(4) 752 .addMemOperand(MMO); 753 MI.eraseFromParent(); // The pseudo instruction is gone now. 754 return BB; 755 } 756 757 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 758 MachineBasicBlock *BB) { 759 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 760 "Unexpected instruction"); 761 762 MachineFunction &MF = *BB->getParent(); 763 DebugLoc DL = MI.getDebugLoc(); 764 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 765 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 766 unsigned DstReg = MI.getOperand(0).getReg(); 767 unsigned LoReg = MI.getOperand(1).getReg(); 768 unsigned HiReg = MI.getOperand(2).getReg(); 769 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 770 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); 771 772 MachineMemOperand *MMO = 773 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 774 MachineMemOperand::MOStore, 8, 8); 775 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 776 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 777 .addFrameIndex(FI) 778 .addImm(0) 779 .addMemOperand(MMO); 780 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 781 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 782 .addFrameIndex(FI) 783 .addImm(4) 784 .addMemOperand(MMO); 785 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 786 MI.eraseFromParent(); // The pseudo instruction is gone now. 787 return BB; 788 } 789 790 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 791 MachineBasicBlock *BB) { 792 // To "insert" a SELECT instruction, we actually have to insert the triangle 793 // control-flow pattern. The incoming instruction knows the destination vreg 794 // to set, the condition code register to branch on, the true/false values to 795 // select between, and the condcode to use to select the appropriate branch. 796 // 797 // We produce the following control flow: 798 // HeadMBB 799 // | \ 800 // | IfFalseMBB 801 // | / 802 // TailMBB 803 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 804 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 805 DebugLoc DL = MI.getDebugLoc(); 806 MachineFunction::iterator I = ++BB->getIterator(); 807 808 MachineBasicBlock *HeadMBB = BB; 809 MachineFunction *F = BB->getParent(); 810 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 811 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 812 813 F->insert(I, IfFalseMBB); 814 F->insert(I, TailMBB); 815 // Move all remaining instructions to TailMBB. 816 TailMBB->splice(TailMBB->begin(), HeadMBB, std::next(MI.getIterator()), 817 HeadMBB->end()); 818 // Update machine-CFG edges by transferring all successors of the current 819 // block to the new block which will contain the Phi node for the select. 820 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 821 // Set the successors for HeadMBB. 822 HeadMBB->addSuccessor(IfFalseMBB); 823 HeadMBB->addSuccessor(TailMBB); 824 825 // Insert appropriate branch. 826 unsigned LHS = MI.getOperand(1).getReg(); 827 unsigned RHS = MI.getOperand(2).getReg(); 828 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 829 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 830 831 BuildMI(HeadMBB, DL, TII.get(Opcode)) 832 .addReg(LHS) 833 .addReg(RHS) 834 .addMBB(TailMBB); 835 836 // IfFalseMBB just falls through to TailMBB. 837 IfFalseMBB->addSuccessor(TailMBB); 838 839 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 840 BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), 841 MI.getOperand(0).getReg()) 842 .addReg(MI.getOperand(4).getReg()) 843 .addMBB(HeadMBB) 844 .addReg(MI.getOperand(5).getReg()) 845 .addMBB(IfFalseMBB); 846 847 MI.eraseFromParent(); // The pseudo instruction is gone now. 848 return TailMBB; 849 } 850 851 MachineBasicBlock * 852 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 853 MachineBasicBlock *BB) const { 854 switch (MI.getOpcode()) { 855 default: 856 llvm_unreachable("Unexpected instr type to insert"); 857 case RISCV::Select_GPR_Using_CC_GPR: 858 case RISCV::Select_FPR32_Using_CC_GPR: 859 case RISCV::Select_FPR64_Using_CC_GPR: 860 return emitSelectPseudo(MI, BB); 861 case RISCV::BuildPairF64Pseudo: 862 return emitBuildPairF64Pseudo(MI, BB); 863 case RISCV::SplitF64Pseudo: 864 return emitSplitF64Pseudo(MI, BB); 865 } 866 } 867 868 // Calling Convention Implementation. 869 // The expectations for frontend ABI lowering vary from target to target. 870 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 871 // details, but this is a longer term goal. For now, we simply try to keep the 872 // role of the frontend as simple and well-defined as possible. The rules can 873 // be summarised as: 874 // * Never split up large scalar arguments. We handle them here. 875 // * If a hardfloat calling convention is being used, and the struct may be 876 // passed in a pair of registers (fp+fp, int+fp), and both registers are 877 // available, then pass as two separate arguments. If either the GPRs or FPRs 878 // are exhausted, then pass according to the rule below. 879 // * If a struct could never be passed in registers or directly in a stack 880 // slot (as it is larger than 2*XLEN and the floating point rules don't 881 // apply), then pass it using a pointer with the byval attribute. 882 // * If a struct is less than 2*XLEN, then coerce to either a two-element 883 // word-sized array or a 2*XLEN scalar (depending on alignment). 884 // * The frontend can determine whether a struct is returned by reference or 885 // not based on its size and fields. If it will be returned by reference, the 886 // frontend must modify the prototype so a pointer with the sret annotation is 887 // passed as the first argument. This is not necessary for large scalar 888 // returns. 889 // * Struct return values and varargs should be coerced to structs containing 890 // register-size fields in the same situations they would be for fixed 891 // arguments. 892 893 static const MCPhysReg ArgGPRs[] = { 894 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 895 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 896 }; 897 898 // Pass a 2*XLEN argument that has been split into two XLEN values through 899 // registers or the stack as necessary. 900 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 901 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 902 MVT ValVT2, MVT LocVT2, 903 ISD::ArgFlagsTy ArgFlags2) { 904 unsigned XLenInBytes = XLen / 8; 905 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 906 // At least one half can be passed via register. 907 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 908 VA1.getLocVT(), CCValAssign::Full)); 909 } else { 910 // Both halves must be passed on the stack, with proper alignment. 911 unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign()); 912 State.addLoc( 913 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 914 State.AllocateStack(XLenInBytes, StackAlign), 915 VA1.getLocVT(), CCValAssign::Full)); 916 State.addLoc(CCValAssign::getMem( 917 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 918 CCValAssign::Full)); 919 return false; 920 } 921 922 if (unsigned Reg = State.AllocateReg(ArgGPRs)) { 923 // The second half can also be passed via register. 924 State.addLoc( 925 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 926 } else { 927 // The second half is passed via the stack, without additional alignment. 928 State.addLoc(CCValAssign::getMem( 929 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, 930 CCValAssign::Full)); 931 } 932 933 return false; 934 } 935 936 // Implements the RISC-V calling convention. Returns true upon failure. 937 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, 938 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 939 CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { 940 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 941 assert(XLen == 32 || XLen == 64); 942 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 943 944 // Any return value split in to more than two values can't be returned 945 // directly. 946 if (IsRet && ValNo > 1) 947 return true; 948 949 if (ValVT == MVT::f32) { 950 LocVT = XLenVT; 951 LocInfo = CCValAssign::BCvt; 952 } else if (XLen == 64 && ValVT == MVT::f64) { 953 LocVT = MVT::i64; 954 LocInfo = CCValAssign::BCvt; 955 } 956 957 // If this is a variadic argument, the RISC-V calling convention requires 958 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 959 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 960 // be used regardless of whether the original argument was split during 961 // legalisation or not. The argument will not be passed by registers if the 962 // original type is larger than 2*XLEN, so the register alignment rule does 963 // not apply. 964 unsigned TwoXLenInBytes = (2 * XLen) / 8; 965 if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && 966 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 967 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 968 // Skip 'odd' register if necessary. 969 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 970 State.AllocateReg(ArgGPRs); 971 } 972 973 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 974 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 975 State.getPendingArgFlags(); 976 977 assert(PendingLocs.size() == PendingArgFlags.size() && 978 "PendingLocs and PendingArgFlags out of sync"); 979 980 // Handle passing f64 on RV32D with a soft float ABI. 981 if (XLen == 32 && ValVT == MVT::f64) { 982 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 983 "Can't lower f64 if it is split"); 984 // Depending on available argument GPRS, f64 may be passed in a pair of 985 // GPRs, split between a GPR and the stack, or passed completely on the 986 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 987 // cases. 988 unsigned Reg = State.AllocateReg(ArgGPRs); 989 LocVT = MVT::i32; 990 if (!Reg) { 991 unsigned StackOffset = State.AllocateStack(8, 8); 992 State.addLoc( 993 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 994 return false; 995 } 996 if (!State.AllocateReg(ArgGPRs)) 997 State.AllocateStack(4, 4); 998 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 999 return false; 1000 } 1001 1002 // Split arguments might be passed indirectly, so keep track of the pending 1003 // values. 1004 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 1005 LocVT = XLenVT; 1006 LocInfo = CCValAssign::Indirect; 1007 PendingLocs.push_back( 1008 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 1009 PendingArgFlags.push_back(ArgFlags); 1010 if (!ArgFlags.isSplitEnd()) { 1011 return false; 1012 } 1013 } 1014 1015 // If the split argument only had two elements, it should be passed directly 1016 // in registers or on the stack. 1017 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 1018 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 1019 // Apply the normal calling convention rules to the first half of the 1020 // split argument. 1021 CCValAssign VA = PendingLocs[0]; 1022 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 1023 PendingLocs.clear(); 1024 PendingArgFlags.clear(); 1025 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 1026 ArgFlags); 1027 } 1028 1029 // Allocate to a register if possible, or else a stack slot. 1030 unsigned Reg = State.AllocateReg(ArgGPRs); 1031 unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); 1032 1033 // If we reach this point and PendingLocs is non-empty, we must be at the 1034 // end of a split argument that must be passed indirectly. 1035 if (!PendingLocs.empty()) { 1036 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 1037 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 1038 1039 for (auto &It : PendingLocs) { 1040 if (Reg) 1041 It.convertToReg(Reg); 1042 else 1043 It.convertToMem(StackOffset); 1044 State.addLoc(It); 1045 } 1046 PendingLocs.clear(); 1047 PendingArgFlags.clear(); 1048 return false; 1049 } 1050 1051 assert(LocVT == XLenVT && "Expected an XLenVT at this stage"); 1052 1053 if (Reg) { 1054 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1055 return false; 1056 } 1057 1058 // When an f32 or f64 is passed on the stack, no bit-conversion is needed. 1059 if (ValVT == MVT::f32 || ValVT == MVT::f64) { 1060 LocVT = ValVT; 1061 LocInfo = CCValAssign::Full; 1062 } 1063 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 1064 return false; 1065 } 1066 1067 void RISCVTargetLowering::analyzeInputArgs( 1068 MachineFunction &MF, CCState &CCInfo, 1069 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 1070 unsigned NumArgs = Ins.size(); 1071 FunctionType *FType = MF.getFunction().getFunctionType(); 1072 1073 for (unsigned i = 0; i != NumArgs; ++i) { 1074 MVT ArgVT = Ins[i].VT; 1075 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 1076 1077 Type *ArgTy = nullptr; 1078 if (IsRet) 1079 ArgTy = FType->getReturnType(); 1080 else if (Ins[i].isOrigArg()) 1081 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 1082 1083 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 1084 ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { 1085 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 1086 << EVT(ArgVT).getEVTString() << '\n'); 1087 llvm_unreachable(nullptr); 1088 } 1089 } 1090 } 1091 1092 void RISCVTargetLowering::analyzeOutputArgs( 1093 MachineFunction &MF, CCState &CCInfo, 1094 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 1095 CallLoweringInfo *CLI) const { 1096 unsigned NumArgs = Outs.size(); 1097 1098 for (unsigned i = 0; i != NumArgs; i++) { 1099 MVT ArgVT = Outs[i].VT; 1100 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1101 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 1102 1103 if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, 1104 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 1105 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 1106 << EVT(ArgVT).getEVTString() << "\n"); 1107 llvm_unreachable(nullptr); 1108 } 1109 } 1110 } 1111 1112 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 1113 // values. 1114 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 1115 const CCValAssign &VA, const SDLoc &DL) { 1116 switch (VA.getLocInfo()) { 1117 default: 1118 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1119 case CCValAssign::Full: 1120 break; 1121 case CCValAssign::BCvt: 1122 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 1123 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 1124 break; 1125 } 1126 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 1127 break; 1128 } 1129 return Val; 1130 } 1131 1132 // The caller is responsible for loading the full value if the argument is 1133 // passed with CCValAssign::Indirect. 1134 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 1135 const CCValAssign &VA, const SDLoc &DL) { 1136 MachineFunction &MF = DAG.getMachineFunction(); 1137 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1138 EVT LocVT = VA.getLocVT(); 1139 SDValue Val; 1140 1141 unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1142 RegInfo.addLiveIn(VA.getLocReg(), VReg); 1143 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 1144 1145 if (VA.getLocInfo() == CCValAssign::Indirect) 1146 return Val; 1147 1148 return convertLocVTToValVT(DAG, Val, VA, DL); 1149 } 1150 1151 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 1152 const CCValAssign &VA, const SDLoc &DL) { 1153 EVT LocVT = VA.getLocVT(); 1154 1155 switch (VA.getLocInfo()) { 1156 default: 1157 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1158 case CCValAssign::Full: 1159 break; 1160 case CCValAssign::BCvt: 1161 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 1162 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 1163 break; 1164 } 1165 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 1166 break; 1167 } 1168 return Val; 1169 } 1170 1171 // The caller is responsible for loading the full value if the argument is 1172 // passed with CCValAssign::Indirect. 1173 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 1174 const CCValAssign &VA, const SDLoc &DL) { 1175 MachineFunction &MF = DAG.getMachineFunction(); 1176 MachineFrameInfo &MFI = MF.getFrameInfo(); 1177 EVT LocVT = VA.getLocVT(); 1178 EVT ValVT = VA.getValVT(); 1179 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 1180 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 1181 VA.getLocMemOffset(), /*Immutable=*/true); 1182 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1183 SDValue Val; 1184 1185 ISD::LoadExtType ExtType; 1186 switch (VA.getLocInfo()) { 1187 default: 1188 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1189 case CCValAssign::Full: 1190 case CCValAssign::Indirect: 1191 case CCValAssign::BCvt: 1192 ExtType = ISD::NON_EXTLOAD; 1193 break; 1194 } 1195 Val = DAG.getExtLoad( 1196 ExtType, DL, LocVT, Chain, FIN, 1197 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 1198 return Val; 1199 } 1200 1201 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 1202 const CCValAssign &VA, const SDLoc &DL) { 1203 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 1204 "Unexpected VA"); 1205 MachineFunction &MF = DAG.getMachineFunction(); 1206 MachineFrameInfo &MFI = MF.getFrameInfo(); 1207 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1208 1209 if (VA.isMemLoc()) { 1210 // f64 is passed on the stack. 1211 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 1212 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1213 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 1214 MachinePointerInfo::getFixedStack(MF, FI)); 1215 } 1216 1217 assert(VA.isRegLoc() && "Expected register VA assignment"); 1218 1219 unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1220 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 1221 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 1222 SDValue Hi; 1223 if (VA.getLocReg() == RISCV::X17) { 1224 // Second half of f64 is passed on the stack. 1225 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 1226 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1227 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 1228 MachinePointerInfo::getFixedStack(MF, FI)); 1229 } else { 1230 // Second half of f64 is passed in another GPR. 1231 unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1232 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 1233 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 1234 } 1235 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1236 } 1237 1238 // Transform physical registers into virtual registers. 1239 SDValue RISCVTargetLowering::LowerFormalArguments( 1240 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1241 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1242 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1243 1244 switch (CallConv) { 1245 default: 1246 report_fatal_error("Unsupported calling convention"); 1247 case CallingConv::C: 1248 case CallingConv::Fast: 1249 break; 1250 } 1251 1252 MachineFunction &MF = DAG.getMachineFunction(); 1253 1254 const Function &Func = MF.getFunction(); 1255 if (Func.hasFnAttribute("interrupt")) { 1256 if (!Func.arg_empty()) 1257 report_fatal_error( 1258 "Functions with the interrupt attribute cannot have arguments!"); 1259 1260 StringRef Kind = 1261 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1262 1263 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 1264 report_fatal_error( 1265 "Function interrupt attribute argument not supported!"); 1266 } 1267 1268 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1269 MVT XLenVT = Subtarget.getXLenVT(); 1270 unsigned XLenInBytes = Subtarget.getXLen() / 8; 1271 // Used with vargs to acumulate store chains. 1272 std::vector<SDValue> OutChains; 1273 1274 // Assign locations to all of the incoming arguments. 1275 SmallVector<CCValAssign, 16> ArgLocs; 1276 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1277 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 1278 1279 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1280 CCValAssign &VA = ArgLocs[i]; 1281 SDValue ArgValue; 1282 // Passing f64 on RV32D with a soft float ABI must be handled as a special 1283 // case. 1284 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 1285 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 1286 else if (VA.isRegLoc()) 1287 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); 1288 else 1289 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 1290 1291 if (VA.getLocInfo() == CCValAssign::Indirect) { 1292 // If the original argument was split and passed by reference (e.g. i128 1293 // on RV32), we need to load all parts of it here (using the same 1294 // address). 1295 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 1296 MachinePointerInfo())); 1297 unsigned ArgIndex = Ins[i].OrigArgIndex; 1298 assert(Ins[i].PartOffset == 0); 1299 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 1300 CCValAssign &PartVA = ArgLocs[i + 1]; 1301 unsigned PartOffset = Ins[i + 1].PartOffset; 1302 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 1303 DAG.getIntPtrConstant(PartOffset, DL)); 1304 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 1305 MachinePointerInfo())); 1306 ++i; 1307 } 1308 continue; 1309 } 1310 InVals.push_back(ArgValue); 1311 } 1312 1313 if (IsVarArg) { 1314 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 1315 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 1316 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 1317 MachineFrameInfo &MFI = MF.getFrameInfo(); 1318 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1319 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 1320 1321 // Offset of the first variable argument from stack pointer, and size of 1322 // the vararg save area. For now, the varargs save area is either zero or 1323 // large enough to hold a0-a7. 1324 int VaArgOffset, VarArgsSaveSize; 1325 1326 // If all registers are allocated, then all varargs must be passed on the 1327 // stack and we don't need to save any argregs. 1328 if (ArgRegs.size() == Idx) { 1329 VaArgOffset = CCInfo.getNextStackOffset(); 1330 VarArgsSaveSize = 0; 1331 } else { 1332 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 1333 VaArgOffset = -VarArgsSaveSize; 1334 } 1335 1336 // Record the frame index of the first variable argument 1337 // which is a value necessary to VASTART. 1338 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1339 RVFI->setVarArgsFrameIndex(FI); 1340 1341 // If saving an odd number of registers then create an extra stack slot to 1342 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 1343 // offsets to even-numbered registered remain 2*XLEN-aligned. 1344 if (Idx % 2) { 1345 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, 1346 true); 1347 VarArgsSaveSize += XLenInBytes; 1348 } 1349 1350 // Copy the integer registers that may have been used for passing varargs 1351 // to the vararg save area. 1352 for (unsigned I = Idx; I < ArgRegs.size(); 1353 ++I, VaArgOffset += XLenInBytes) { 1354 const unsigned Reg = RegInfo.createVirtualRegister(RC); 1355 RegInfo.addLiveIn(ArgRegs[I], Reg); 1356 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 1357 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 1358 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1359 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 1360 MachinePointerInfo::getFixedStack(MF, FI)); 1361 cast<StoreSDNode>(Store.getNode()) 1362 ->getMemOperand() 1363 ->setValue((Value *)nullptr); 1364 OutChains.push_back(Store); 1365 } 1366 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 1367 } 1368 1369 // All stores are grouped in one node to allow the matching between 1370 // the size of Ins and InVals. This only happens for vararg functions. 1371 if (!OutChains.empty()) { 1372 OutChains.push_back(Chain); 1373 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 1374 } 1375 1376 return Chain; 1377 } 1378 1379 /// isEligibleForTailCallOptimization - Check whether the call is eligible 1380 /// for tail call optimization. 1381 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 1382 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 1383 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 1384 const SmallVector<CCValAssign, 16> &ArgLocs) const { 1385 1386 auto &Callee = CLI.Callee; 1387 auto CalleeCC = CLI.CallConv; 1388 auto IsVarArg = CLI.IsVarArg; 1389 auto &Outs = CLI.Outs; 1390 auto &Caller = MF.getFunction(); 1391 auto CallerCC = Caller.getCallingConv(); 1392 1393 // Do not tail call opt functions with "disable-tail-calls" attribute. 1394 if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") 1395 return false; 1396 1397 // Exception-handling functions need a special set of instructions to 1398 // indicate a return to the hardware. Tail-calling another function would 1399 // probably break this. 1400 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 1401 // should be expanded as new function attributes are introduced. 1402 if (Caller.hasFnAttribute("interrupt")) 1403 return false; 1404 1405 // Do not tail call opt functions with varargs. 1406 if (IsVarArg) 1407 return false; 1408 1409 // Do not tail call opt if the stack is used to pass parameters. 1410 if (CCInfo.getNextStackOffset() != 0) 1411 return false; 1412 1413 // Do not tail call opt if any parameters need to be passed indirectly. 1414 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 1415 // passed indirectly. So the address of the value will be passed in a 1416 // register, or if not available, then the address is put on the stack. In 1417 // order to pass indirectly, space on the stack often needs to be allocated 1418 // in order to store the value. In this case the CCInfo.getNextStackOffset() 1419 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 1420 // are passed CCValAssign::Indirect. 1421 for (auto &VA : ArgLocs) 1422 if (VA.getLocInfo() == CCValAssign::Indirect) 1423 return false; 1424 1425 // Do not tail call opt if either caller or callee uses struct return 1426 // semantics. 1427 auto IsCallerStructRet = Caller.hasStructRetAttr(); 1428 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 1429 if (IsCallerStructRet || IsCalleeStructRet) 1430 return false; 1431 1432 // Externally-defined functions with weak linkage should not be 1433 // tail-called. The behaviour of branch instructions in this situation (as 1434 // used for tail calls) is implementation-defined, so we cannot rely on the 1435 // linker replacing the tail call with a return. 1436 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1437 const GlobalValue *GV = G->getGlobal(); 1438 if (GV->hasExternalWeakLinkage()) 1439 return false; 1440 } 1441 1442 // The callee has to preserve all registers the caller needs to preserve. 1443 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1444 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 1445 if (CalleeCC != CallerCC) { 1446 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 1447 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 1448 return false; 1449 } 1450 1451 // Byval parameters hand the function a pointer directly into the stack area 1452 // we want to reuse during a tail call. Working around this *is* possible 1453 // but less efficient and uglier in LowerCall. 1454 for (auto &Arg : Outs) 1455 if (Arg.Flags.isByVal()) 1456 return false; 1457 1458 return true; 1459 } 1460 1461 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 1462 // and output parameter nodes. 1463 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 1464 SmallVectorImpl<SDValue> &InVals) const { 1465 SelectionDAG &DAG = CLI.DAG; 1466 SDLoc &DL = CLI.DL; 1467 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1468 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1469 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1470 SDValue Chain = CLI.Chain; 1471 SDValue Callee = CLI.Callee; 1472 bool &IsTailCall = CLI.IsTailCall; 1473 CallingConv::ID CallConv = CLI.CallConv; 1474 bool IsVarArg = CLI.IsVarArg; 1475 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1476 MVT XLenVT = Subtarget.getXLenVT(); 1477 1478 MachineFunction &MF = DAG.getMachineFunction(); 1479 1480 // Analyze the operands of the call, assigning locations to each operand. 1481 SmallVector<CCValAssign, 16> ArgLocs; 1482 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1483 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 1484 1485 // Check if it's really possible to do a tail call. 1486 if (IsTailCall) 1487 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 1488 1489 if (IsTailCall) 1490 ++NumTailCalls; 1491 else if (CLI.CS && CLI.CS.isMustTailCall()) 1492 report_fatal_error("failed to perform tail call elimination on a call " 1493 "site marked musttail"); 1494 1495 // Get a count of how many bytes are to be pushed on the stack. 1496 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 1497 1498 // Create local copies for byval args 1499 SmallVector<SDValue, 8> ByValArgs; 1500 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1501 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1502 if (!Flags.isByVal()) 1503 continue; 1504 1505 SDValue Arg = OutVals[i]; 1506 unsigned Size = Flags.getByValSize(); 1507 unsigned Align = Flags.getByValAlign(); 1508 1509 int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false); 1510 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1511 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 1512 1513 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, 1514 /*IsVolatile=*/false, 1515 /*AlwaysInline=*/false, 1516 IsTailCall, MachinePointerInfo(), 1517 MachinePointerInfo()); 1518 ByValArgs.push_back(FIPtr); 1519 } 1520 1521 if (!IsTailCall) 1522 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 1523 1524 // Copy argument values to their designated locations. 1525 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1526 SmallVector<SDValue, 8> MemOpChains; 1527 SDValue StackPtr; 1528 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 1529 CCValAssign &VA = ArgLocs[i]; 1530 SDValue ArgValue = OutVals[i]; 1531 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1532 1533 // Handle passing f64 on RV32D with a soft float ABI as a special case. 1534 bool IsF64OnRV32DSoftABI = 1535 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 1536 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 1537 SDValue SplitF64 = DAG.getNode( 1538 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 1539 SDValue Lo = SplitF64.getValue(0); 1540 SDValue Hi = SplitF64.getValue(1); 1541 1542 unsigned RegLo = VA.getLocReg(); 1543 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 1544 1545 if (RegLo == RISCV::X17) { 1546 // Second half of f64 is passed on the stack. 1547 // Work out the address of the stack slot. 1548 if (!StackPtr.getNode()) 1549 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1550 // Emit the store. 1551 MemOpChains.push_back( 1552 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 1553 } else { 1554 // Second half of f64 is passed in another GPR. 1555 unsigned RegHigh = RegLo + 1; 1556 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 1557 } 1558 continue; 1559 } 1560 1561 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 1562 // as any other MemLoc. 1563 1564 // Promote the value if needed. 1565 // For now, only handle fully promoted and indirect arguments. 1566 if (VA.getLocInfo() == CCValAssign::Indirect) { 1567 // Store the argument in a stack slot and pass its address. 1568 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 1569 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1570 MemOpChains.push_back( 1571 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 1572 MachinePointerInfo::getFixedStack(MF, FI))); 1573 // If the original argument was split (e.g. i128), we need 1574 // to store all parts of it here (and pass just one address). 1575 unsigned ArgIndex = Outs[i].OrigArgIndex; 1576 assert(Outs[i].PartOffset == 0); 1577 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 1578 SDValue PartValue = OutVals[i + 1]; 1579 unsigned PartOffset = Outs[i + 1].PartOffset; 1580 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 1581 DAG.getIntPtrConstant(PartOffset, DL)); 1582 MemOpChains.push_back( 1583 DAG.getStore(Chain, DL, PartValue, Address, 1584 MachinePointerInfo::getFixedStack(MF, FI))); 1585 ++i; 1586 } 1587 ArgValue = SpillSlot; 1588 } else { 1589 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 1590 } 1591 1592 // Use local copy if it is a byval arg. 1593 if (Flags.isByVal()) 1594 ArgValue = ByValArgs[j++]; 1595 1596 if (VA.isRegLoc()) { 1597 // Queue up the argument copies and emit them at the end. 1598 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 1599 } else { 1600 assert(VA.isMemLoc() && "Argument not register or memory"); 1601 assert(!IsTailCall && "Tail call not allowed if stack is used " 1602 "for passing parameters"); 1603 1604 // Work out the address of the stack slot. 1605 if (!StackPtr.getNode()) 1606 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 1607 SDValue Address = 1608 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 1609 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 1610 1611 // Emit the store. 1612 MemOpChains.push_back( 1613 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 1614 } 1615 } 1616 1617 // Join the stores, which are independent of one another. 1618 if (!MemOpChains.empty()) 1619 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 1620 1621 SDValue Glue; 1622 1623 // Build a sequence of copy-to-reg nodes, chained and glued together. 1624 for (auto &Reg : RegsToPass) { 1625 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 1626 Glue = Chain.getValue(1); 1627 } 1628 1629 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 1630 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 1631 // split it and then direct call can be matched by PseudoCALL. 1632 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 1633 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0); 1634 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1635 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0); 1636 } 1637 1638 // The first call operand is the chain and the second is the target address. 1639 SmallVector<SDValue, 8> Ops; 1640 Ops.push_back(Chain); 1641 Ops.push_back(Callee); 1642 1643 // Add argument registers to the end of the list so that they are 1644 // known live into the call. 1645 for (auto &Reg : RegsToPass) 1646 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 1647 1648 if (!IsTailCall) { 1649 // Add a register mask operand representing the call-preserved registers. 1650 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1651 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 1652 assert(Mask && "Missing call preserved mask for calling convention"); 1653 Ops.push_back(DAG.getRegisterMask(Mask)); 1654 } 1655 1656 // Glue the call to the argument copies, if any. 1657 if (Glue.getNode()) 1658 Ops.push_back(Glue); 1659 1660 // Emit the call. 1661 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1662 1663 if (IsTailCall) { 1664 MF.getFrameInfo().setHasTailCall(); 1665 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 1666 } 1667 1668 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 1669 Glue = Chain.getValue(1); 1670 1671 // Mark the end of the call, which is glued to the call itself. 1672 Chain = DAG.getCALLSEQ_END(Chain, 1673 DAG.getConstant(NumBytes, DL, PtrVT, true), 1674 DAG.getConstant(0, DL, PtrVT, true), 1675 Glue, DL); 1676 Glue = Chain.getValue(1); 1677 1678 // Assign locations to each value returned by this call. 1679 SmallVector<CCValAssign, 16> RVLocs; 1680 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 1681 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 1682 1683 // Copy all of the result registers out of their specified physreg. 1684 for (auto &VA : RVLocs) { 1685 // Copy the value out 1686 SDValue RetValue = 1687 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 1688 // Glue the RetValue to the end of the call sequence 1689 Chain = RetValue.getValue(1); 1690 Glue = RetValue.getValue(2); 1691 1692 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1693 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 1694 SDValue RetValue2 = 1695 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 1696 Chain = RetValue2.getValue(1); 1697 Glue = RetValue2.getValue(2); 1698 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 1699 RetValue2); 1700 } 1701 1702 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 1703 1704 InVals.push_back(RetValue); 1705 } 1706 1707 return Chain; 1708 } 1709 1710 bool RISCVTargetLowering::CanLowerReturn( 1711 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 1712 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 1713 SmallVector<CCValAssign, 16> RVLocs; 1714 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 1715 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1716 MVT VT = Outs[i].VT; 1717 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1718 if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, 1719 CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) 1720 return false; 1721 } 1722 return true; 1723 } 1724 1725 SDValue 1726 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1727 bool IsVarArg, 1728 const SmallVectorImpl<ISD::OutputArg> &Outs, 1729 const SmallVectorImpl<SDValue> &OutVals, 1730 const SDLoc &DL, SelectionDAG &DAG) const { 1731 // Stores the assignment of the return value to a location. 1732 SmallVector<CCValAssign, 16> RVLocs; 1733 1734 // Info about the registers and stack slot. 1735 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 1736 *DAG.getContext()); 1737 1738 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 1739 nullptr); 1740 1741 SDValue Glue; 1742 SmallVector<SDValue, 4> RetOps(1, Chain); 1743 1744 // Copy the result values into the output registers. 1745 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 1746 SDValue Val = OutVals[i]; 1747 CCValAssign &VA = RVLocs[i]; 1748 assert(VA.isRegLoc() && "Can only return in registers!"); 1749 1750 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 1751 // Handle returning f64 on RV32D with a soft float ABI. 1752 assert(VA.isRegLoc() && "Expected return via registers"); 1753 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 1754 DAG.getVTList(MVT::i32, MVT::i32), Val); 1755 SDValue Lo = SplitF64.getValue(0); 1756 SDValue Hi = SplitF64.getValue(1); 1757 unsigned RegLo = VA.getLocReg(); 1758 unsigned RegHi = RegLo + 1; 1759 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 1760 Glue = Chain.getValue(1); 1761 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 1762 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 1763 Glue = Chain.getValue(1); 1764 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 1765 } else { 1766 // Handle a 'normal' return. 1767 Val = convertValVTToLocVT(DAG, Val, VA, DL); 1768 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 1769 1770 // Guarantee that all emitted copies are stuck together. 1771 Glue = Chain.getValue(1); 1772 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 1773 } 1774 } 1775 1776 RetOps[0] = Chain; // Update chain. 1777 1778 // Add the glue node if we have it. 1779 if (Glue.getNode()) { 1780 RetOps.push_back(Glue); 1781 } 1782 1783 // Interrupt service routines use different return instructions. 1784 const Function &Func = DAG.getMachineFunction().getFunction(); 1785 if (Func.hasFnAttribute("interrupt")) { 1786 if (!Func.getReturnType()->isVoidTy()) 1787 report_fatal_error( 1788 "Functions with the interrupt attribute must have void return type!"); 1789 1790 MachineFunction &MF = DAG.getMachineFunction(); 1791 StringRef Kind = 1792 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1793 1794 unsigned RetOpc; 1795 if (Kind == "user") 1796 RetOpc = RISCVISD::URET_FLAG; 1797 else if (Kind == "supervisor") 1798 RetOpc = RISCVISD::SRET_FLAG; 1799 else 1800 RetOpc = RISCVISD::MRET_FLAG; 1801 1802 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 1803 } 1804 1805 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 1806 } 1807 1808 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 1809 switch ((RISCVISD::NodeType)Opcode) { 1810 case RISCVISD::FIRST_NUMBER: 1811 break; 1812 case RISCVISD::RET_FLAG: 1813 return "RISCVISD::RET_FLAG"; 1814 case RISCVISD::URET_FLAG: 1815 return "RISCVISD::URET_FLAG"; 1816 case RISCVISD::SRET_FLAG: 1817 return "RISCVISD::SRET_FLAG"; 1818 case RISCVISD::MRET_FLAG: 1819 return "RISCVISD::MRET_FLAG"; 1820 case RISCVISD::CALL: 1821 return "RISCVISD::CALL"; 1822 case RISCVISD::SELECT_CC: 1823 return "RISCVISD::SELECT_CC"; 1824 case RISCVISD::BuildPairF64: 1825 return "RISCVISD::BuildPairF64"; 1826 case RISCVISD::SplitF64: 1827 return "RISCVISD::SplitF64"; 1828 case RISCVISD::TAIL: 1829 return "RISCVISD::TAIL"; 1830 case RISCVISD::SLLW: 1831 return "RISCVISD::SLLW"; 1832 case RISCVISD::SRAW: 1833 return "RISCVISD::SRAW"; 1834 case RISCVISD::SRLW: 1835 return "RISCVISD::SRLW"; 1836 case RISCVISD::DIVW: 1837 return "RISCVISD::DIVW"; 1838 case RISCVISD::DIVUW: 1839 return "RISCVISD::DIVUW"; 1840 case RISCVISD::REMUW: 1841 return "RISCVISD::REMUW"; 1842 case RISCVISD::FMV_W_X_RV64: 1843 return "RISCVISD::FMV_W_X_RV64"; 1844 case RISCVISD::FMV_X_ANYEXTW_RV64: 1845 return "RISCVISD::FMV_X_ANYEXTW_RV64"; 1846 } 1847 return nullptr; 1848 } 1849 1850 std::pair<unsigned, const TargetRegisterClass *> 1851 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1852 StringRef Constraint, 1853 MVT VT) const { 1854 // First, see if this is a constraint that directly corresponds to a 1855 // RISCV register class. 1856 if (Constraint.size() == 1) { 1857 switch (Constraint[0]) { 1858 case 'r': 1859 return std::make_pair(0U, &RISCV::GPRRegClass); 1860 default: 1861 break; 1862 } 1863 } 1864 1865 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 1866 } 1867 1868 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 1869 Instruction *Inst, 1870 AtomicOrdering Ord) const { 1871 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 1872 return Builder.CreateFence(Ord); 1873 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 1874 return Builder.CreateFence(AtomicOrdering::Release); 1875 return nullptr; 1876 } 1877 1878 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 1879 Instruction *Inst, 1880 AtomicOrdering Ord) const { 1881 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 1882 return Builder.CreateFence(AtomicOrdering::Acquire); 1883 return nullptr; 1884 } 1885 1886 TargetLowering::AtomicExpansionKind 1887 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 1888 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 1889 // point operations can't be used in an lr/sc sequence without breaking the 1890 // forward-progress guarantee. 1891 if (AI->isFloatingPointOperation()) 1892 return AtomicExpansionKind::CmpXChg; 1893 1894 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 1895 if (Size == 8 || Size == 16) 1896 return AtomicExpansionKind::MaskedIntrinsic; 1897 return AtomicExpansionKind::None; 1898 } 1899 1900 static Intrinsic::ID 1901 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 1902 if (XLen == 32) { 1903 switch (BinOp) { 1904 default: 1905 llvm_unreachable("Unexpected AtomicRMW BinOp"); 1906 case AtomicRMWInst::Xchg: 1907 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 1908 case AtomicRMWInst::Add: 1909 return Intrinsic::riscv_masked_atomicrmw_add_i32; 1910 case AtomicRMWInst::Sub: 1911 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 1912 case AtomicRMWInst::Nand: 1913 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 1914 case AtomicRMWInst::Max: 1915 return Intrinsic::riscv_masked_atomicrmw_max_i32; 1916 case AtomicRMWInst::Min: 1917 return Intrinsic::riscv_masked_atomicrmw_min_i32; 1918 case AtomicRMWInst::UMax: 1919 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 1920 case AtomicRMWInst::UMin: 1921 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 1922 } 1923 } 1924 1925 if (XLen == 64) { 1926 switch (BinOp) { 1927 default: 1928 llvm_unreachable("Unexpected AtomicRMW BinOp"); 1929 case AtomicRMWInst::Xchg: 1930 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 1931 case AtomicRMWInst::Add: 1932 return Intrinsic::riscv_masked_atomicrmw_add_i64; 1933 case AtomicRMWInst::Sub: 1934 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 1935 case AtomicRMWInst::Nand: 1936 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 1937 case AtomicRMWInst::Max: 1938 return Intrinsic::riscv_masked_atomicrmw_max_i64; 1939 case AtomicRMWInst::Min: 1940 return Intrinsic::riscv_masked_atomicrmw_min_i64; 1941 case AtomicRMWInst::UMax: 1942 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 1943 case AtomicRMWInst::UMin: 1944 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 1945 } 1946 } 1947 1948 llvm_unreachable("Unexpected XLen\n"); 1949 } 1950 1951 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 1952 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 1953 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 1954 unsigned XLen = Subtarget.getXLen(); 1955 Value *Ordering = 1956 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 1957 Type *Tys[] = {AlignedAddr->getType()}; 1958 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 1959 AI->getModule(), 1960 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 1961 1962 if (XLen == 64) { 1963 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 1964 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 1965 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 1966 } 1967 1968 Value *Result; 1969 1970 // Must pass the shift amount needed to sign extend the loaded value prior 1971 // to performing a signed comparison for min/max. ShiftAmt is the number of 1972 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 1973 // is the number of bits to left+right shift the value in order to 1974 // sign-extend. 1975 if (AI->getOperation() == AtomicRMWInst::Min || 1976 AI->getOperation() == AtomicRMWInst::Max) { 1977 const DataLayout &DL = AI->getModule()->getDataLayout(); 1978 unsigned ValWidth = 1979 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 1980 Value *SextShamt = 1981 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 1982 Result = Builder.CreateCall(LrwOpScwLoop, 1983 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 1984 } else { 1985 Result = 1986 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 1987 } 1988 1989 if (XLen == 64) 1990 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 1991 return Result; 1992 } 1993 1994 TargetLowering::AtomicExpansionKind 1995 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 1996 AtomicCmpXchgInst *CI) const { 1997 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 1998 if (Size == 8 || Size == 16) 1999 return AtomicExpansionKind::MaskedIntrinsic; 2000 return AtomicExpansionKind::None; 2001 } 2002 2003 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 2004 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 2005 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 2006 unsigned XLen = Subtarget.getXLen(); 2007 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 2008 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 2009 if (XLen == 64) { 2010 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 2011 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 2012 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 2013 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 2014 } 2015 Type *Tys[] = {AlignedAddr->getType()}; 2016 Function *MaskedCmpXchg = 2017 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 2018 Value *Result = Builder.CreateCall( 2019 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 2020 if (XLen == 64) 2021 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 2022 return Result; 2023 } 2024