1 //===----------------------- SIFrameLowering.cpp --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 9 #include "SIFrameLowering.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIMachineFunctionInfo.h" 13 #include "llvm/CodeGen/LivePhysRegs.h" 14 #include "llvm/CodeGen/MachineFrameInfo.h" 15 #include "llvm/CodeGen/RegisterScavenging.h" 16 #include "llvm/Target/TargetMachine.h" 17 18 using namespace llvm; 19 20 #define DEBUG_TYPE "frame-info" 21 22 23 // Find a scratch register that we can use at the start of the prologue to 24 // re-align the stack pointer. We avoid using callee-save registers since they 25 // may appear to be free when this is called from canUseAsPrologue (during 26 // shrink wrapping), but then no longer be free when this is called from 27 // emitPrologue. 28 // 29 // FIXME: This is a bit conservative, since in the above case we could use one 30 // of the callee-save registers as a scratch temp to re-align the stack pointer, 31 // but we would then have to make sure that we were in fact saving at least one 32 // callee-save register in the prologue, which is additional complexity that 33 // doesn't seem worth the benefit. 34 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, 35 LivePhysRegs &LiveRegs, 36 const TargetRegisterClass &RC, 37 bool Unused = false) { 38 // Mark callee saved registers as used so we will not choose them. 39 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 40 for (unsigned i = 0; CSRegs[i]; ++i) 41 LiveRegs.addReg(CSRegs[i]); 42 43 if (Unused) { 44 // We are looking for a register that can be used throughout the entire 45 // function, so any use is unacceptable. 46 for (MCRegister Reg : RC) { 47 if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) 48 return Reg; 49 } 50 } else { 51 for (MCRegister Reg : RC) { 52 if (LiveRegs.available(MRI, Reg)) 53 return Reg; 54 } 55 } 56 57 // If we require an unused register, this is used in contexts where failure is 58 // an option and has an alternative plan. In other contexts, this must 59 // succeed0. 60 if (!Unused) 61 report_fatal_error("failed to find free scratch register"); 62 63 return MCRegister(); 64 } 65 66 static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, 67 LivePhysRegs &LiveRegs, 68 Register &TempSGPR, 69 Optional<int> &FrameIndex, 70 bool IsFP) { 71 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 72 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 73 74 #ifndef NDEBUG 75 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 76 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 77 #endif 78 79 // We need to save and restore the current FP/BP. 80 81 // 1: If there is already a VGPR with free lanes, use it. We 82 // may already have to pay the penalty for spilling a CSR VGPR. 83 if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { 84 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, 85 TargetStackID::SGPRSpill); 86 87 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) 88 llvm_unreachable("allocate SGPR spill should have worked"); 89 90 FrameIndex = NewFI; 91 92 LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 93 dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to " 94 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane 95 << '\n'); 96 return; 97 } 98 99 // 2: Next, try to save the FP/BP in an unused SGPR. 100 TempSGPR = findScratchNonCalleeSaveRegister( 101 MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); 102 103 if (!TempSGPR) { 104 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, 105 TargetStackID::SGPRSpill); 106 107 if (MFI->allocateSGPRSpillToVGPR(MF, NewFI)) { 108 // 3: There's no free lane to spill, and no free register to save FP/BP, 109 // so we're forced to spill another VGPR to use for the spill. 110 FrameIndex = NewFI; 111 112 LLVM_DEBUG( 113 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 114 dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " 115 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';); 116 } else { 117 // Remove dead <NewFI> index 118 MF.getFrameInfo().RemoveStackObject(NewFI); 119 // 4: If all else fails, spill the FP/BP to memory. 120 FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); 121 LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling " 122 << (IsFP ? "FP" : "BP") << '\n'); 123 } 124 } else { 125 LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to " 126 << printReg(TempSGPR, TRI) << '\n'); 127 } 128 } 129 130 // We need to specially emit stack operations here because a different frame 131 // register is used than in the rest of the function, as getFrameRegister would 132 // use. 133 static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs, 134 MachineBasicBlock &MBB, 135 MachineBasicBlock::iterator I, 136 const SIInstrInfo *TII, Register SpillReg, 137 Register ScratchRsrcReg, Register SPReg, int FI) { 138 MachineFunction *MF = MBB.getParent(); 139 MachineFrameInfo &MFI = MF->getFrameInfo(); 140 141 int64_t Offset = MFI.getObjectOffset(FI); 142 143 MachineMemOperand *MMO = MF->getMachineMemOperand( 144 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4, 145 MFI.getObjectAlign(FI)); 146 147 if (ST.enableFlatScratch()) { 148 if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { 149 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR)) 150 .addReg(SpillReg, RegState::Kill) 151 .addReg(SPReg) 152 .addImm(Offset) 153 .addImm(0) // glc 154 .addImm(0) // slc 155 .addImm(0) // dlc 156 .addMemOperand(MMO); 157 return; 158 } 159 } else if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) { 160 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET)) 161 .addReg(SpillReg, RegState::Kill) 162 .addReg(ScratchRsrcReg) 163 .addReg(SPReg) 164 .addImm(Offset) 165 .addImm(0) // glc 166 .addImm(0) // slc 167 .addImm(0) // tfe 168 .addImm(0) // dlc 169 .addImm(0) // swz 170 .addMemOperand(MMO); 171 return; 172 } 173 174 // Don't clobber the TmpVGPR if we also need a scratch reg for the stack 175 // offset in the spill. 176 LiveRegs.addReg(SpillReg); 177 178 if (ST.enableFlatScratch()) { 179 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 180 MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass); 181 182 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg) 183 .addReg(SPReg) 184 .addImm(Offset); 185 186 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR)) 187 .addReg(SpillReg, RegState::Kill) 188 .addReg(OffsetReg, RegState::Kill) 189 .addImm(0) 190 .addImm(0) // glc 191 .addImm(0) // slc 192 .addImm(0) // dlc 193 .addMemOperand(MMO); 194 } else { 195 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 196 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 197 198 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 199 .addImm(Offset); 200 201 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN)) 202 .addReg(SpillReg, RegState::Kill) 203 .addReg(OffsetReg, RegState::Kill) 204 .addReg(ScratchRsrcReg) 205 .addReg(SPReg) 206 .addImm(0) 207 .addImm(0) // glc 208 .addImm(0) // slc 209 .addImm(0) // tfe 210 .addImm(0) // dlc 211 .addImm(0) // swz 212 .addMemOperand(MMO); 213 } 214 215 LiveRegs.removeReg(SpillReg); 216 } 217 218 static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs, 219 MachineBasicBlock &MBB, 220 MachineBasicBlock::iterator I, 221 const SIInstrInfo *TII, Register SpillReg, 222 Register ScratchRsrcReg, Register SPReg, int FI) { 223 MachineFunction *MF = MBB.getParent(); 224 MachineFrameInfo &MFI = MF->getFrameInfo(); 225 int64_t Offset = MFI.getObjectOffset(FI); 226 227 MachineMemOperand *MMO = MF->getMachineMemOperand( 228 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4, 229 MFI.getObjectAlign(FI)); 230 231 if (ST.enableFlatScratch()) { 232 if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { 233 BuildMI(MBB, I, DebugLoc(), 234 TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg) 235 .addReg(SPReg) 236 .addImm(Offset) 237 .addImm(0) // glc 238 .addImm(0) // slc 239 .addImm(0) // dlc 240 .addMemOperand(MMO); 241 return; 242 } 243 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 244 MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass); 245 246 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg) 247 .addReg(SPReg) 248 .addImm(Offset); 249 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), 250 SpillReg) 251 .addReg(OffsetReg, RegState::Kill) 252 .addImm(0) 253 .addImm(0) // glc 254 .addImm(0) // slc 255 .addImm(0) // dlc 256 .addMemOperand(MMO); 257 return; 258 } 259 260 if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) { 261 BuildMI(MBB, I, DebugLoc(), 262 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg) 263 .addReg(ScratchRsrcReg) 264 .addReg(SPReg) 265 .addImm(Offset) 266 .addImm(0) // glc 267 .addImm(0) // slc 268 .addImm(0) // tfe 269 .addImm(0) // dlc 270 .addImm(0) // swz 271 .addMemOperand(MMO); 272 return; 273 } 274 275 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 276 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 277 278 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 279 .addImm(Offset); 280 281 BuildMI(MBB, I, DebugLoc(), 282 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg) 283 .addReg(OffsetReg, RegState::Kill) 284 .addReg(ScratchRsrcReg) 285 .addReg(SPReg) 286 .addImm(0) 287 .addImm(0) // glc 288 .addImm(0) // slc 289 .addImm(0) // tfe 290 .addImm(0) // dlc 291 .addImm(0) // swz 292 .addMemOperand(MMO); 293 } 294 295 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 296 const DebugLoc &DL, const SIInstrInfo *TII, 297 Register TargetReg) { 298 MachineFunction *MF = MBB.getParent(); 299 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 300 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 301 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 302 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0); 303 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1); 304 305 if (MFI->getGITPtrHigh() != 0xffffffff) { 306 BuildMI(MBB, I, DL, SMovB32, TargetHi) 307 .addImm(MFI->getGITPtrHigh()) 308 .addReg(TargetReg, RegState::ImplicitDefine); 309 } else { 310 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); 311 BuildMI(MBB, I, DL, GetPC64, TargetReg); 312 } 313 Register GitPtrLo = MFI->getGITPtrLoReg(*MF); 314 MF->getRegInfo().addLiveIn(GitPtrLo); 315 MBB.addLiveIn(GitPtrLo); 316 BuildMI(MBB, I, DL, SMovB32, TargetLo) 317 .addReg(GitPtrLo); 318 } 319 320 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` 321 void SIFrameLowering::emitEntryFunctionFlatScratchInit( 322 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 323 const DebugLoc &DL, Register ScratchWaveOffsetReg) const { 324 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 325 const SIInstrInfo *TII = ST.getInstrInfo(); 326 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 327 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 328 329 // We don't need this if we only have spills since there is no user facing 330 // scratch. 331 332 // TODO: If we know we don't have flat instructions earlier, we can omit 333 // this from the input registers. 334 // 335 // TODO: We only need to know if we access scratch space through a flat 336 // pointer. Because we only detect if flat instructions are used at all, 337 // this will be used more often than necessary on VI. 338 339 Register FlatScrInitLo; 340 Register FlatScrInitHi; 341 342 if (ST.isAmdPalOS()) { 343 // Extract the scratch offset from the descriptor in the GIT 344 LivePhysRegs LiveRegs; 345 LiveRegs.init(*TRI); 346 LiveRegs.addLiveIns(MBB); 347 348 // Find unused reg to load flat scratch init into 349 MachineRegisterInfo &MRI = MF.getRegInfo(); 350 Register FlatScrInit = AMDGPU::NoRegister; 351 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF); 352 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2; 353 AllSGPR64s = AllSGPR64s.slice( 354 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded)); 355 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 356 for (MCPhysReg Reg : AllSGPR64s) { 357 if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) && 358 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { 359 FlatScrInit = Reg; 360 break; 361 } 362 } 363 assert(FlatScrInit && "Failed to find free register for scratch init"); 364 365 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0); 366 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1); 367 368 buildGitPtr(MBB, I, DL, TII, FlatScrInit); 369 370 // We now have the GIT ptr - now get the scratch descriptor from the entry 371 // at offset 0 (or offset 16 for a compute shader). 372 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 373 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 374 auto *MMO = MF.getMachineMemOperand( 375 PtrInfo, 376 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 377 MachineMemOperand::MODereferenceable, 378 8, Align(4)); 379 unsigned Offset = 380 MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 381 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 382 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 383 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit) 384 .addReg(FlatScrInit) 385 .addImm(EncodedOffset) // offset 386 .addImm(0) // glc 387 .addImm(0) // dlc 388 .addMemOperand(MMO); 389 390 // Mask the offset in [47:0] of the descriptor 391 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32); 392 BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi) 393 .addReg(FlatScrInitHi) 394 .addImm(0xffff); 395 } else { 396 Register FlatScratchInitReg = 397 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); 398 assert(FlatScratchInitReg); 399 400 MachineRegisterInfo &MRI = MF.getRegInfo(); 401 MRI.addLiveIn(FlatScratchInitReg); 402 MBB.addLiveIn(FlatScratchInitReg); 403 404 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); 405 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); 406 } 407 408 // Do a 64-bit pointer add. 409 if (ST.flatScratchIsPointer()) { 410 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 411 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 412 .addReg(FlatScrInitLo) 413 .addReg(ScratchWaveOffsetReg); 414 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) 415 .addReg(FlatScrInitHi) 416 .addImm(0); 417 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 418 addReg(FlatScrInitLo). 419 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | 420 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 421 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 422 addReg(FlatScrInitHi). 423 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | 424 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 425 return; 426 } 427 428 // For GFX9. 429 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) 430 .addReg(FlatScrInitLo) 431 .addReg(ScratchWaveOffsetReg); 432 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI) 433 .addReg(FlatScrInitHi) 434 .addImm(0); 435 436 return; 437 } 438 439 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9); 440 441 // Copy the size in bytes. 442 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) 443 .addReg(FlatScrInitHi, RegState::Kill); 444 445 // Add wave offset in bytes to private base offset. 446 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. 447 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 448 .addReg(FlatScrInitLo) 449 .addReg(ScratchWaveOffsetReg); 450 451 // Convert offset to 256-byte units. 452 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) 453 .addReg(FlatScrInitLo, RegState::Kill) 454 .addImm(8); 455 } 456 457 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not 458 // memory. They should have been removed by now. 459 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { 460 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 461 I != E; ++I) { 462 if (!MFI.isDeadObjectIndex(I)) 463 return false; 464 } 465 466 return true; 467 } 468 469 // Shift down registers reserved for the scratch RSRC. 470 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( 471 MachineFunction &MF) const { 472 473 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 474 const SIInstrInfo *TII = ST.getInstrInfo(); 475 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 476 MachineRegisterInfo &MRI = MF.getRegInfo(); 477 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 478 479 assert(MFI->isEntryFunction()); 480 481 Register ScratchRsrcReg = MFI->getScratchRSrcReg(); 482 483 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) && 484 allStackObjectsAreDead(MF.getFrameInfo()))) 485 return Register(); 486 487 if (ST.hasSGPRInitBug() || 488 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) 489 return ScratchRsrcReg; 490 491 // We reserved the last registers for this. Shift it down to the end of those 492 // which were actually used. 493 // 494 // FIXME: It might be safer to use a pseudoregister before replacement. 495 496 // FIXME: We should be able to eliminate unused input registers. We only 497 // cannot do this for the resources required for scratch access. For now we 498 // skip over user SGPRs and may leave unused holes. 499 500 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; 501 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF); 502 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); 503 504 // Skip the last N reserved elements because they should have already been 505 // reserved for VCC etc. 506 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 507 for (MCPhysReg Reg : AllSGPR128s) { 508 // Pick the first unallocated one. Make sure we don't clobber the other 509 // reserved input we needed. Also for PAL, make sure we don't clobber 510 // the GIT pointer passed in SGPR0 or SGPR8. 511 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 512 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { 513 MRI.replaceRegWith(ScratchRsrcReg, Reg); 514 MFI->setScratchRSrcReg(Reg); 515 return Reg; 516 } 517 } 518 519 return ScratchRsrcReg; 520 } 521 522 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) { 523 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize(); 524 } 525 526 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, 527 MachineBasicBlock &MBB) const { 528 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); 529 530 // FIXME: If we only have SGPR spills, we won't actually be using scratch 531 // memory since these spill to VGPRs. We should be cleaning up these unused 532 // SGPR spill frame indices somewhere. 533 534 // FIXME: We still have implicit uses on SGPR spill instructions in case they 535 // need to spill to vector memory. It's likely that will not happen, but at 536 // this point it appears we need the setup. This part of the prolog should be 537 // emitted after frame indices are eliminated. 538 539 // FIXME: Remove all of the isPhysRegUsed checks 540 541 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 542 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 543 const SIInstrInfo *TII = ST.getInstrInfo(); 544 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 545 MachineRegisterInfo &MRI = MF.getRegInfo(); 546 const Function &F = MF.getFunction(); 547 548 assert(MFI->isEntryFunction()); 549 550 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( 551 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); 552 // FIXME: Hack to not crash in situations which emitted an error. 553 if (!PreloadedScratchWaveOffsetReg) 554 return; 555 556 // We need to do the replacement of the private segment buffer register even 557 // if there are no stack objects. There could be stores to undef or a 558 // constant without an associated object. 559 // 560 // This will return `Register()` in cases where there are no actual 561 // uses of the SRSRC. 562 Register ScratchRsrcReg; 563 if (!ST.enableFlatScratch()) 564 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF); 565 566 // Make the selected register live throughout the function. 567 if (ScratchRsrcReg) { 568 for (MachineBasicBlock &OtherBB : MF) { 569 if (&OtherBB != &MBB) { 570 OtherBB.addLiveIn(ScratchRsrcReg); 571 } 572 } 573 } 574 575 // Now that we have fixed the reserved SRSRC we need to locate the 576 // (potentially) preloaded SRSRC. 577 Register PreloadedScratchRsrcReg; 578 if (ST.isAmdHsaOrMesa(F)) { 579 PreloadedScratchRsrcReg = 580 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); 581 if (ScratchRsrcReg && PreloadedScratchRsrcReg) { 582 // We added live-ins during argument lowering, but since they were not 583 // used they were deleted. We're adding the uses now, so add them back. 584 MRI.addLiveIn(PreloadedScratchRsrcReg); 585 MBB.addLiveIn(PreloadedScratchRsrcReg); 586 } 587 } 588 589 // Debug location must be unknown since the first debug location is used to 590 // determine the end of the prologue. 591 DebugLoc DL; 592 MachineBasicBlock::iterator I = MBB.begin(); 593 594 // We found the SRSRC first because it needs four registers and has an 595 // alignment requirement. If the SRSRC that we found is clobbering with 596 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR 597 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch 598 // wave offset to a free SGPR. 599 Register ScratchWaveOffsetReg; 600 if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) { 601 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF); 602 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); 603 AllSGPRs = AllSGPRs.slice( 604 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded)); 605 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 606 for (MCPhysReg Reg : AllSGPRs) { 607 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 608 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) { 609 ScratchWaveOffsetReg = Reg; 610 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) 611 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); 612 break; 613 } 614 } 615 } else { 616 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg; 617 } 618 assert(ScratchWaveOffsetReg); 619 620 if (requiresStackPointerReference(MF)) { 621 Register SPReg = MFI->getStackPtrOffsetReg(); 622 assert(SPReg != AMDGPU::SP_REG); 623 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) 624 .addImm(MF.getFrameInfo().getStackSize() * getScratchScaleFactor(ST)); 625 } 626 627 if (hasFP(MF)) { 628 Register FPReg = MFI->getFrameOffsetReg(); 629 assert(FPReg != AMDGPU::FP_REG); 630 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); 631 } 632 633 if (MFI->hasFlatScratchInit() || ScratchRsrcReg) { 634 MRI.addLiveIn(PreloadedScratchWaveOffsetReg); 635 MBB.addLiveIn(PreloadedScratchWaveOffsetReg); 636 } 637 638 if (MFI->hasFlatScratchInit()) { 639 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg); 640 } 641 642 if (ScratchRsrcReg) { 643 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL, 644 PreloadedScratchRsrcReg, 645 ScratchRsrcReg, ScratchWaveOffsetReg); 646 } 647 } 648 649 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg` 650 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup( 651 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 652 const DebugLoc &DL, Register PreloadedScratchRsrcReg, 653 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const { 654 655 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 656 const SIInstrInfo *TII = ST.getInstrInfo(); 657 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 658 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 659 const Function &Fn = MF.getFunction(); 660 661 if (ST.isAmdPalOS()) { 662 // The pointer to the GIT is formed from the offset passed in and either 663 // the amdgpu-git-ptr-high function attribute or the top part of the PC 664 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 665 666 buildGitPtr(MBB, I, DL, TII, Rsrc01); 667 668 // We now have the GIT ptr - now get the scratch descriptor from the entry 669 // at offset 0 (or offset 16 for a compute shader). 670 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 671 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); 672 auto MMO = MF.getMachineMemOperand(PtrInfo, 673 MachineMemOperand::MOLoad | 674 MachineMemOperand::MOInvariant | 675 MachineMemOperand::MODereferenceable, 676 16, Align(4)); 677 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 678 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 679 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 680 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) 681 .addReg(Rsrc01) 682 .addImm(EncodedOffset) // offset 683 .addImm(0) // glc 684 .addImm(0) // dlc 685 .addReg(ScratchRsrcReg, RegState::ImplicitDefine) 686 .addMemOperand(MMO); 687 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) { 688 assert(!ST.isAmdHsaOrMesa(Fn)); 689 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 690 691 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); 692 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); 693 694 // Use relocations to get the pointer, and setup the other bits manually. 695 uint64_t Rsrc23 = TII->getScratchRsrcWords23(); 696 697 if (MFI->hasImplicitBufferPtr()) { 698 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 699 700 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { 701 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); 702 703 BuildMI(MBB, I, DL, Mov64, Rsrc01) 704 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 705 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 706 } else { 707 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 708 709 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 710 auto MMO = MF.getMachineMemOperand( 711 PtrInfo, 712 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 713 MachineMemOperand::MODereferenceable, 714 8, Align(4)); 715 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) 716 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 717 .addImm(0) // offset 718 .addImm(0) // glc 719 .addImm(0) // dlc 720 .addMemOperand(MMO) 721 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 722 723 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 724 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 725 } 726 } else { 727 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 728 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 729 730 BuildMI(MBB, I, DL, SMovB32, Rsrc0) 731 .addExternalSymbol("SCRATCH_RSRC_DWORD0") 732 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 733 734 BuildMI(MBB, I, DL, SMovB32, Rsrc1) 735 .addExternalSymbol("SCRATCH_RSRC_DWORD1") 736 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 737 738 } 739 740 BuildMI(MBB, I, DL, SMovB32, Rsrc2) 741 .addImm(Rsrc23 & 0xffffffff) 742 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 743 744 BuildMI(MBB, I, DL, SMovB32, Rsrc3) 745 .addImm(Rsrc23 >> 32) 746 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 747 } else if (ST.isAmdHsaOrMesa(Fn)) { 748 assert(PreloadedScratchRsrcReg); 749 750 if (ScratchRsrcReg != PreloadedScratchRsrcReg) { 751 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) 752 .addReg(PreloadedScratchRsrcReg, RegState::Kill); 753 } 754 } 755 756 // Add the scratch wave offset into the scratch RSRC. 757 // 758 // We only want to update the first 48 bits, which is the base address 759 // pointer, without touching the adjacent 16 bits of flags. We know this add 760 // cannot carry-out from bit 47, otherwise the scratch allocation would be 761 // impossible to fit in the 48-bit global address space. 762 // 763 // TODO: Evaluate if it is better to just construct an SRD using the flat 764 // scratch init and some constants rather than update the one we are passed. 765 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 766 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 767 768 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in 769 // the kernel body via inreg arguments. 770 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0) 771 .addReg(ScratchRsrcSub0) 772 .addReg(ScratchWaveOffsetReg) 773 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 774 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1) 775 .addReg(ScratchRsrcSub1) 776 .addImm(0) 777 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 778 } 779 780 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { 781 switch (ID) { 782 case TargetStackID::Default: 783 case TargetStackID::NoAlloc: 784 case TargetStackID::SGPRSpill: 785 return true; 786 case TargetStackID::SVEVector: 787 return false; 788 } 789 llvm_unreachable("Invalid TargetStackID::Value"); 790 } 791 792 // Activate all lanes, returns saved exec. 793 static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, 794 MachineFunction &MF, 795 MachineBasicBlock &MBB, 796 MachineBasicBlock::iterator MBBI, 797 bool IsProlog) { 798 Register ScratchExecCopy; 799 MachineRegisterInfo &MRI = MF.getRegInfo(); 800 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 801 const SIInstrInfo *TII = ST.getInstrInfo(); 802 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 803 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 804 DebugLoc DL; 805 806 if (LiveRegs.empty()) { 807 if (IsProlog) { 808 LiveRegs.init(TRI); 809 LiveRegs.addLiveIns(MBB); 810 if (FuncInfo->SGPRForFPSaveRestoreCopy) 811 LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy); 812 813 if (FuncInfo->SGPRForBPSaveRestoreCopy) 814 LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy); 815 } else { 816 // In epilog. 817 LiveRegs.init(*ST.getRegisterInfo()); 818 LiveRegs.addLiveOuts(MBB); 819 LiveRegs.stepBackward(*MBBI); 820 } 821 } 822 823 ScratchExecCopy = findScratchNonCalleeSaveRegister( 824 MRI, LiveRegs, *TRI.getWaveMaskRegClass()); 825 826 if (!IsProlog) 827 LiveRegs.removeReg(ScratchExecCopy); 828 829 const unsigned OrSaveExec = 830 ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; 831 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1); 832 833 return ScratchExecCopy; 834 } 835 836 void SIFrameLowering::emitPrologue(MachineFunction &MF, 837 MachineBasicBlock &MBB) const { 838 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 839 if (FuncInfo->isEntryFunction()) { 840 emitEntryFunctionPrologue(MF, MBB); 841 return; 842 } 843 844 const MachineFrameInfo &MFI = MF.getFrameInfo(); 845 MachineRegisterInfo &MRI = MF.getRegInfo(); 846 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 847 const SIInstrInfo *TII = ST.getInstrInfo(); 848 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 849 850 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 851 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 852 Register BasePtrReg = 853 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); 854 LivePhysRegs LiveRegs; 855 856 MachineBasicBlock::iterator MBBI = MBB.begin(); 857 DebugLoc DL; 858 859 bool HasFP = false; 860 bool HasBP = false; 861 uint32_t NumBytes = MFI.getStackSize(); 862 uint32_t RoundedSize = NumBytes; 863 // To avoid clobbering VGPRs in lanes that weren't active on function entry, 864 // turn on all lanes before doing the spill to memory. 865 Register ScratchExecCopy; 866 867 bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); 868 bool SpillFPToMemory = false; 869 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. 870 // Otherwise we are spilling the FP to memory. 871 if (HasFPSaveIndex) { 872 SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) != 873 TargetStackID::SGPRSpill; 874 } 875 876 bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue(); 877 bool SpillBPToMemory = false; 878 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. 879 // Otherwise we are spilling the BP to memory. 880 if (HasBPSaveIndex) { 881 SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) != 882 TargetStackID::SGPRSpill; 883 } 884 885 // Emit the copy if we need an FP, and are using a free SGPR to save it. 886 if (FuncInfo->SGPRForFPSaveRestoreCopy) { 887 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy) 888 .addReg(FramePtrReg) 889 .setMIFlag(MachineInstr::FrameSetup); 890 } 891 892 // Emit the copy if we need a BP, and are using a free SGPR to save it. 893 if (FuncInfo->SGPRForBPSaveRestoreCopy) { 894 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), 895 FuncInfo->SGPRForBPSaveRestoreCopy) 896 .addReg(BasePtrReg) 897 .setMIFlag(MachineInstr::FrameSetup); 898 } 899 900 // If a copy has been emitted for FP and/or BP, Make the SGPRs 901 // used in the copy instructions live throughout the function. 902 SmallVector<MCPhysReg, 2> TempSGPRs; 903 if (FuncInfo->SGPRForFPSaveRestoreCopy) 904 TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy); 905 906 if (FuncInfo->SGPRForBPSaveRestoreCopy) 907 TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy); 908 909 if (!TempSGPRs.empty()) { 910 for (MachineBasicBlock &MBB : MF) { 911 for (MCPhysReg Reg : TempSGPRs) 912 MBB.addLiveIn(Reg); 913 914 MBB.sortUniqueLiveIns(); 915 } 916 } 917 918 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg 919 : FuncInfo->getSGPRSpillVGPRs()) { 920 if (!Reg.FI.hasValue()) 921 continue; 922 923 if (!ScratchExecCopy) 924 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 925 926 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR, 927 FuncInfo->getScratchRSrcReg(), 928 StackPtrReg, 929 Reg.FI.getValue()); 930 } 931 932 if (HasFPSaveIndex && SpillFPToMemory) { 933 assert(!MFI.isDeadObjectIndex(FuncInfo->FramePointerSaveIndex.getValue())); 934 935 if (!ScratchExecCopy) 936 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 937 938 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( 939 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 940 941 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 942 .addReg(FramePtrReg); 943 944 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR, 945 FuncInfo->getScratchRSrcReg(), StackPtrReg, 946 FuncInfo->FramePointerSaveIndex.getValue()); 947 } 948 949 if (HasBPSaveIndex && SpillBPToMemory) { 950 assert(!MFI.isDeadObjectIndex(*FuncInfo->BasePointerSaveIndex)); 951 952 if (!ScratchExecCopy) 953 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 954 955 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( 956 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 957 958 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 959 .addReg(BasePtrReg); 960 961 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR, 962 FuncInfo->getScratchRSrcReg(), StackPtrReg, 963 *FuncInfo->BasePointerSaveIndex); 964 } 965 966 if (ScratchExecCopy) { 967 // FIXME: Split block and make terminator. 968 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 969 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 970 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 971 .addReg(ScratchExecCopy, RegState::Kill); 972 LiveRegs.addReg(ScratchExecCopy); 973 } 974 975 // In this case, spill the FP to a reserved VGPR. 976 if (HasFPSaveIndex && !SpillFPToMemory) { 977 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 978 assert(!MFI.isDeadObjectIndex(FI)); 979 980 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 981 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 982 FuncInfo->getSGPRToVGPRSpills(FI); 983 assert(Spill.size() == 1); 984 985 // Save FP before setting it up. 986 // FIXME: This should respect spillSGPRToVGPR; 987 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) 988 .addReg(FramePtrReg) 989 .addImm(Spill[0].Lane) 990 .addReg(Spill[0].VGPR, RegState::Undef); 991 } 992 993 // In this case, spill the BP to a reserved VGPR. 994 if (HasBPSaveIndex && !SpillBPToMemory) { 995 const int BasePtrFI = *FuncInfo->BasePointerSaveIndex; 996 assert(!MFI.isDeadObjectIndex(BasePtrFI)); 997 998 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); 999 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 1000 FuncInfo->getSGPRToVGPRSpills(BasePtrFI); 1001 assert(Spill.size() == 1); 1002 1003 // Save BP before setting it up. 1004 // FIXME: This should respect spillSGPRToVGPR; 1005 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) 1006 .addReg(BasePtrReg) 1007 .addImm(Spill[0].Lane) 1008 .addReg(Spill[0].VGPR, RegState::Undef); 1009 } 1010 1011 if (TRI.needsStackRealignment(MF)) { 1012 HasFP = true; 1013 const unsigned Alignment = MFI.getMaxAlign().value(); 1014 1015 RoundedSize += Alignment; 1016 if (LiveRegs.empty()) { 1017 LiveRegs.init(TRI); 1018 LiveRegs.addLiveIns(MBB); 1019 LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); 1020 LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy); 1021 } 1022 1023 Register ScratchSPReg = findScratchNonCalleeSaveRegister( 1024 MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass); 1025 assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy && 1026 ScratchSPReg != FuncInfo->SGPRForBPSaveRestoreCopy); 1027 1028 // s_add_u32 tmp_reg, s32, NumBytes 1029 // s_and_b32 s32, tmp_reg, 0b111...0000 1030 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg) 1031 .addReg(StackPtrReg) 1032 .addImm((Alignment - 1) * getScratchScaleFactor(ST)) 1033 .setMIFlag(MachineInstr::FrameSetup); 1034 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) 1035 .addReg(ScratchSPReg, RegState::Kill) 1036 .addImm(-Alignment * getScratchScaleFactor(ST)) 1037 .setMIFlag(MachineInstr::FrameSetup); 1038 FuncInfo->setIsStackRealigned(true); 1039 } else if ((HasFP = hasFP(MF))) { 1040 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 1041 .addReg(StackPtrReg) 1042 .setMIFlag(MachineInstr::FrameSetup); 1043 } 1044 1045 // If we need a base pointer, set it up here. It's whatever the value of 1046 // the stack pointer is at this point. Any variable size objects will be 1047 // allocated after this, so we can still use the base pointer to reference 1048 // the incoming arguments. 1049 if ((HasBP = TRI.hasBasePointer(MF))) { 1050 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) 1051 .addReg(StackPtrReg) 1052 .setMIFlag(MachineInstr::FrameSetup); 1053 } 1054 1055 if (HasFP && RoundedSize != 0) { 1056 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) 1057 .addReg(StackPtrReg) 1058 .addImm(RoundedSize * getScratchScaleFactor(ST)) 1059 .setMIFlag(MachineInstr::FrameSetup); 1060 } 1061 1062 assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy || 1063 FuncInfo->FramePointerSaveIndex)) && 1064 "Needed to save FP but didn't save it anywhere"); 1065 1066 assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy && 1067 !FuncInfo->FramePointerSaveIndex)) && 1068 "Saved FP but didn't need it"); 1069 1070 assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy || 1071 FuncInfo->BasePointerSaveIndex)) && 1072 "Needed to save BP but didn't save it anywhere"); 1073 1074 assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy && 1075 !FuncInfo->BasePointerSaveIndex)) && 1076 "Saved BP but didn't need it"); 1077 } 1078 1079 void SIFrameLowering::emitEpilogue(MachineFunction &MF, 1080 MachineBasicBlock &MBB) const { 1081 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1082 if (FuncInfo->isEntryFunction()) 1083 return; 1084 1085 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1086 const SIInstrInfo *TII = ST.getInstrInfo(); 1087 MachineRegisterInfo &MRI = MF.getRegInfo(); 1088 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 1089 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1090 LivePhysRegs LiveRegs; 1091 DebugLoc DL; 1092 1093 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1094 uint32_t NumBytes = MFI.getStackSize(); 1095 uint32_t RoundedSize = FuncInfo->isStackRealigned() 1096 ? NumBytes + MFI.getMaxAlign().value() 1097 : NumBytes; 1098 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 1099 const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 1100 const Register BasePtrReg = 1101 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); 1102 1103 bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); 1104 bool SpillFPToMemory = false; 1105 if (HasFPSaveIndex) { 1106 SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) != 1107 TargetStackID::SGPRSpill; 1108 } 1109 1110 bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue(); 1111 bool SpillBPToMemory = false; 1112 if (HasBPSaveIndex) { 1113 SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) != 1114 TargetStackID::SGPRSpill; 1115 } 1116 1117 if (RoundedSize != 0 && hasFP(MF)) { 1118 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) 1119 .addReg(StackPtrReg) 1120 .addImm(RoundedSize * getScratchScaleFactor(ST)) 1121 .setMIFlag(MachineInstr::FrameDestroy); 1122 } 1123 1124 if (FuncInfo->SGPRForFPSaveRestoreCopy) { 1125 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 1126 .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) 1127 .setMIFlag(MachineInstr::FrameSetup); 1128 } 1129 1130 if (FuncInfo->SGPRForBPSaveRestoreCopy) { 1131 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) 1132 .addReg(FuncInfo->SGPRForBPSaveRestoreCopy) 1133 .setMIFlag(MachineInstr::FrameSetup); 1134 } 1135 1136 Register ScratchExecCopy; 1137 if (HasFPSaveIndex) { 1138 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 1139 assert(!MFI.isDeadObjectIndex(FI)); 1140 if (SpillFPToMemory) { 1141 if (!ScratchExecCopy) 1142 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1143 1144 MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister( 1145 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 1146 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR, 1147 FuncInfo->getScratchRSrcReg(), StackPtrReg, FI); 1148 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) 1149 .addReg(TempVGPR, RegState::Kill); 1150 } else { 1151 // Reload from VGPR spill. 1152 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 1153 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 1154 FuncInfo->getSGPRToVGPRSpills(FI); 1155 assert(Spill.size() == 1); 1156 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg) 1157 .addReg(Spill[0].VGPR) 1158 .addImm(Spill[0].Lane); 1159 } 1160 } 1161 1162 if (HasBPSaveIndex) { 1163 const int BasePtrFI = *FuncInfo->BasePointerSaveIndex; 1164 assert(!MFI.isDeadObjectIndex(BasePtrFI)); 1165 if (SpillBPToMemory) { 1166 if (!ScratchExecCopy) 1167 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1168 1169 MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister( 1170 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 1171 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR, 1172 FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI); 1173 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) 1174 .addReg(TempVGPR, RegState::Kill); 1175 } else { 1176 // Reload from VGPR spill. 1177 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); 1178 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 1179 FuncInfo->getSGPRToVGPRSpills(BasePtrFI); 1180 assert(Spill.size() == 1); 1181 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg) 1182 .addReg(Spill[0].VGPR) 1183 .addImm(Spill[0].Lane); 1184 } 1185 } 1186 1187 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg : 1188 FuncInfo->getSGPRSpillVGPRs()) { 1189 if (!Reg.FI.hasValue()) 1190 continue; 1191 1192 if (!ScratchExecCopy) 1193 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1194 1195 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR, 1196 FuncInfo->getScratchRSrcReg(), StackPtrReg, 1197 Reg.FI.getValue()); 1198 } 1199 1200 if (ScratchExecCopy) { 1201 // FIXME: Split block and make terminator. 1202 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 1203 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 1204 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 1205 .addReg(ScratchExecCopy, RegState::Kill); 1206 } 1207 } 1208 1209 #ifndef NDEBUG 1210 static bool allSGPRSpillsAreDead(const MachineFunction &MF) { 1211 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1212 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1213 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 1214 I != E; ++I) { 1215 if (!MFI.isDeadObjectIndex(I) && 1216 MFI.getStackID(I) == TargetStackID::SGPRSpill && 1217 (I != FuncInfo->FramePointerSaveIndex && 1218 I != FuncInfo->BasePointerSaveIndex)) { 1219 return false; 1220 } 1221 } 1222 1223 return true; 1224 } 1225 #endif 1226 1227 StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, 1228 int FI, 1229 Register &FrameReg) const { 1230 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 1231 1232 FrameReg = RI->getFrameRegister(MF); 1233 return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI)); 1234 } 1235 1236 void SIFrameLowering::processFunctionBeforeFrameFinalized( 1237 MachineFunction &MF, 1238 RegScavenger *RS) const { 1239 MachineFrameInfo &MFI = MF.getFrameInfo(); 1240 1241 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1242 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1243 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1244 1245 FuncInfo->removeDeadFrameIndices(MFI); 1246 assert(allSGPRSpillsAreDead(MF) && 1247 "SGPR spill should have been removed in SILowerSGPRSpills"); 1248 1249 // FIXME: The other checks should be redundant with allStackObjectsAreDead, 1250 // but currently hasNonSpillStackObjects is set only from source 1251 // allocas. Stack temps produced from legalization are not counted currently. 1252 if (!allStackObjectsAreDead(MFI)) { 1253 assert(RS && "RegScavenger required if spilling"); 1254 1255 if (FuncInfo->isEntryFunction()) { 1256 int ScavengeFI = MFI.CreateFixedObject( 1257 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); 1258 RS->addScavengingFrameIndex(ScavengeFI); 1259 } else { 1260 int ScavengeFI = MFI.CreateStackObject( 1261 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 1262 TRI->getSpillAlign(AMDGPU::SGPR_32RegClass), false); 1263 RS->addScavengingFrameIndex(ScavengeFI); 1264 } 1265 } 1266 } 1267 1268 // Only report VGPRs to generic code. 1269 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, 1270 BitVector &SavedVGPRs, 1271 RegScavenger *RS) const { 1272 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); 1273 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1274 if (MFI->isEntryFunction()) 1275 return; 1276 1277 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 1278 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1279 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1280 1281 // Ignore the SGPRs the default implementation found. 1282 SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask()); 1283 1284 // hasFP only knows about stack objects that already exist. We're now 1285 // determining the stack slots that will be created, so we have to predict 1286 // them. Stack objects force FP usage with calls. 1287 // 1288 // Note a new VGPR CSR may be introduced if one is used for the spill, but we 1289 // don't want to report it here. 1290 // 1291 // FIXME: Is this really hasReservedCallFrame? 1292 const bool WillHaveFP = 1293 FrameInfo.hasCalls() && 1294 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); 1295 1296 // VGPRs used for SGPR spilling need to be specially inserted in the prolog, 1297 // so don't allow the default insertion to handle them. 1298 for (auto SSpill : MFI->getSGPRSpillVGPRs()) 1299 SavedVGPRs.reset(SSpill.VGPR); 1300 1301 LivePhysRegs LiveRegs; 1302 LiveRegs.init(*TRI); 1303 1304 if (WillHaveFP || hasFP(MF)) { 1305 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy, 1306 MFI->FramePointerSaveIndex, true); 1307 } 1308 1309 if (TRI->hasBasePointer(MF)) { 1310 if (MFI->SGPRForFPSaveRestoreCopy) 1311 LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy); 1312 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy, 1313 MFI->BasePointerSaveIndex, false); 1314 } 1315 } 1316 1317 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, 1318 BitVector &SavedRegs, 1319 RegScavenger *RS) const { 1320 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1321 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1322 if (MFI->isEntryFunction()) 1323 return; 1324 1325 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1326 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1327 1328 // The SP is specifically managed and we don't want extra spills of it. 1329 SavedRegs.reset(MFI->getStackPtrOffsetReg()); 1330 SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask()); 1331 } 1332 1333 bool SIFrameLowering::assignCalleeSavedSpillSlots( 1334 MachineFunction &MF, const TargetRegisterInfo *TRI, 1335 std::vector<CalleeSavedInfo> &CSI) const { 1336 if (CSI.empty()) 1337 return true; // Early exit if no callee saved registers are modified! 1338 1339 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1340 if (!FuncInfo->SGPRForFPSaveRestoreCopy && 1341 !FuncInfo->SGPRForBPSaveRestoreCopy) 1342 return false; 1343 1344 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1345 const SIRegisterInfo *RI = ST.getRegisterInfo(); 1346 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 1347 Register BasePtrReg = RI->getBaseRegister(); 1348 unsigned NumModifiedRegs = 0; 1349 1350 if (FuncInfo->SGPRForFPSaveRestoreCopy) 1351 NumModifiedRegs++; 1352 if (FuncInfo->SGPRForBPSaveRestoreCopy) 1353 NumModifiedRegs++; 1354 1355 for (auto &CS : CSI) { 1356 if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) { 1357 CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); 1358 if (--NumModifiedRegs) 1359 break; 1360 } else if (CS.getReg() == BasePtrReg && 1361 FuncInfo->SGPRForBPSaveRestoreCopy) { 1362 CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy); 1363 if (--NumModifiedRegs) 1364 break; 1365 } 1366 } 1367 1368 return false; 1369 } 1370 1371 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( 1372 MachineFunction &MF, 1373 MachineBasicBlock &MBB, 1374 MachineBasicBlock::iterator I) const { 1375 int64_t Amount = I->getOperand(0).getImm(); 1376 if (Amount == 0) 1377 return MBB.erase(I); 1378 1379 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1380 const SIInstrInfo *TII = ST.getInstrInfo(); 1381 const DebugLoc &DL = I->getDebugLoc(); 1382 unsigned Opc = I->getOpcode(); 1383 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 1384 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 1385 1386 if (!hasReservedCallFrame(MF)) { 1387 Amount = alignTo(Amount, getStackAlign()); 1388 assert(isUInt<32>(Amount) && "exceeded stack address space size"); 1389 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1390 Register SPReg = MFI->getStackPtrOffsetReg(); 1391 1392 unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 1393 BuildMI(MBB, I, DL, TII->get(Op), SPReg) 1394 .addReg(SPReg) 1395 .addImm(Amount * getScratchScaleFactor(ST)); 1396 } else if (CalleePopAmount != 0) { 1397 llvm_unreachable("is this used?"); 1398 } 1399 1400 return MBB.erase(I); 1401 } 1402 1403 /// Returns true if the frame will require a reference to the stack pointer. 1404 /// 1405 /// This is the set of conditions common to setting up the stack pointer in a 1406 /// kernel, and for using a frame pointer in a callable function. 1407 /// 1408 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm 1409 /// references SP. 1410 static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) { 1411 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint(); 1412 } 1413 1414 // The FP for kernels is always known 0, so we never really need to setup an 1415 // explicit register for it. However, DisableFramePointerElim will force us to 1416 // use a register for it. 1417 bool SIFrameLowering::hasFP(const MachineFunction &MF) const { 1418 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1419 1420 // For entry functions we can use an immediate offset in most cases, so the 1421 // presence of calls doesn't imply we need a distinct frame pointer. 1422 if (MFI.hasCalls() && 1423 !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) { 1424 // All offsets are unsigned, so need to be addressed in the same direction 1425 // as stack growth. 1426 1427 // FIXME: This function is pretty broken, since it can be called before the 1428 // frame layout is determined or CSR spills are inserted. 1429 return MFI.getStackSize() != 0; 1430 } 1431 1432 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() || 1433 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) || 1434 MF.getTarget().Options.DisableFramePointerElim(MF); 1435 } 1436 1437 // This is essentially a reduced version of hasFP for entry functions. Since the 1438 // stack pointer is known 0 on entry to kernels, we never really need an FP 1439 // register. We may need to initialize the stack pointer depending on the frame 1440 // properties, which logically overlaps many of the cases where an ordinary 1441 // function would require an FP. 1442 bool SIFrameLowering::requiresStackPointerReference( 1443 const MachineFunction &MF) const { 1444 // Callable functions always require a stack pointer reference. 1445 assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() && 1446 "only expected to call this for entry points"); 1447 1448 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1449 1450 // Entry points ordinarily don't need to initialize SP. We have to set it up 1451 // for callees if there are any. Also note tail calls are impossible/don't 1452 // make any sense for kernels. 1453 if (MFI.hasCalls()) 1454 return true; 1455 1456 // We still need to initialize the SP if we're doing anything weird that 1457 // references the SP, like variable sized stack objects. 1458 return frameTriviallyRequiresSP(MFI); 1459 } 1460