1 //===----------------------- SIFrameLowering.cpp --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 9 #include "SIFrameLowering.h" 10 #include "AMDGPU.h" 11 #include "GCNSubtarget.h" 12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 13 #include "SIMachineFunctionInfo.h" 14 #include "llvm/CodeGen/LivePhysRegs.h" 15 #include "llvm/CodeGen/MachineFrameInfo.h" 16 #include "llvm/CodeGen/RegisterScavenging.h" 17 #include "llvm/Target/TargetMachine.h" 18 19 using namespace llvm; 20 21 #define DEBUG_TYPE "frame-info" 22 23 24 // Find a scratch register that we can use at the start of the prologue to 25 // re-align the stack pointer. We avoid using callee-save registers since they 26 // may appear to be free when this is called from canUseAsPrologue (during 27 // shrink wrapping), but then no longer be free when this is called from 28 // emitPrologue. 29 // 30 // FIXME: This is a bit conservative, since in the above case we could use one 31 // of the callee-save registers as a scratch temp to re-align the stack pointer, 32 // but we would then have to make sure that we were in fact saving at least one 33 // callee-save register in the prologue, which is additional complexity that 34 // doesn't seem worth the benefit. 35 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, 36 LivePhysRegs &LiveRegs, 37 const TargetRegisterClass &RC, 38 bool Unused = false) { 39 // Mark callee saved registers as used so we will not choose them. 40 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 41 for (unsigned i = 0; CSRegs[i]; ++i) 42 LiveRegs.addReg(CSRegs[i]); 43 44 if (Unused) { 45 // We are looking for a register that can be used throughout the entire 46 // function, so any use is unacceptable. 47 for (MCRegister Reg : RC) { 48 if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) 49 return Reg; 50 } 51 } else { 52 for (MCRegister Reg : RC) { 53 if (LiveRegs.available(MRI, Reg)) 54 return Reg; 55 } 56 } 57 58 // If we require an unused register, this is used in contexts where failure is 59 // an option and has an alternative plan. In other contexts, this must 60 // succeed0. 61 if (!Unused) 62 report_fatal_error("failed to find free scratch register"); 63 64 return MCRegister(); 65 } 66 67 static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, 68 LivePhysRegs &LiveRegs, 69 Register &TempSGPR, 70 Optional<int> &FrameIndex, 71 bool IsFP) { 72 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 73 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 74 75 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 76 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 77 78 // We need to save and restore the current FP/BP. 79 80 // 1: If there is already a VGPR with free lanes, use it. We 81 // may already have to pay the penalty for spilling a CSR VGPR. 82 if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { 83 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, 84 TargetStackID::SGPRSpill); 85 86 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) 87 llvm_unreachable("allocate SGPR spill should have worked"); 88 89 FrameIndex = NewFI; 90 91 LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 92 dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to " 93 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane 94 << '\n'); 95 return; 96 } 97 98 // 2: Next, try to save the FP/BP in an unused SGPR. 99 TempSGPR = findScratchNonCalleeSaveRegister( 100 MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); 101 102 if (!TempSGPR) { 103 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, 104 TargetStackID::SGPRSpill); 105 106 if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) { 107 // 3: There's no free lane to spill, and no free register to save FP/BP, 108 // so we're forced to spill another VGPR to use for the spill. 109 FrameIndex = NewFI; 110 111 LLVM_DEBUG( 112 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 113 dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " 114 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';); 115 } else { 116 // Remove dead <NewFI> index 117 MF.getFrameInfo().RemoveStackObject(NewFI); 118 // 4: If all else fails, spill the FP/BP to memory. 119 FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); 120 LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling " 121 << (IsFP ? "FP" : "BP") << '\n'); 122 } 123 } else { 124 LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to " 125 << printReg(TempSGPR, TRI) << '\n'); 126 } 127 } 128 129 // We need to specially emit stack operations here because a different frame 130 // register is used than in the rest of the function, as getFrameRegister would 131 // use. 132 static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs, 133 MachineBasicBlock &MBB, 134 MachineBasicBlock::iterator I, 135 const SIInstrInfo *TII, Register SpillReg, 136 Register ScratchRsrcReg, Register SPReg, int FI) { 137 MachineFunction *MF = MBB.getParent(); 138 MachineFrameInfo &MFI = MF->getFrameInfo(); 139 140 int64_t Offset = MFI.getObjectOffset(FI); 141 142 MachineMemOperand *MMO = MF->getMachineMemOperand( 143 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4, 144 MFI.getObjectAlign(FI)); 145 146 if (ST.enableFlatScratch()) { 147 if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { 148 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR)) 149 .addReg(SpillReg, RegState::Kill) 150 .addReg(SPReg) 151 .addImm(Offset) 152 .addImm(0) // glc 153 .addImm(0) // slc 154 .addImm(0) // dlc 155 .addMemOperand(MMO); 156 return; 157 } 158 } else if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) { 159 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET)) 160 .addReg(SpillReg, RegState::Kill) 161 .addReg(ScratchRsrcReg) 162 .addReg(SPReg) 163 .addImm(Offset) 164 .addImm(0) // glc 165 .addImm(0) // slc 166 .addImm(0) // tfe 167 .addImm(0) // dlc 168 .addImm(0) // swz 169 .addMemOperand(MMO); 170 return; 171 } 172 173 // Don't clobber the TmpVGPR if we also need a scratch reg for the stack 174 // offset in the spill. 175 LiveRegs.addReg(SpillReg); 176 177 if (ST.enableFlatScratch()) { 178 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 179 MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass); 180 181 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg) 182 .addReg(SPReg) 183 .addImm(Offset); 184 185 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR)) 186 .addReg(SpillReg, RegState::Kill) 187 .addReg(OffsetReg, RegState::Kill) 188 .addImm(0) 189 .addImm(0) // glc 190 .addImm(0) // slc 191 .addImm(0) // dlc 192 .addMemOperand(MMO); 193 } else { 194 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 195 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 196 197 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 198 .addImm(Offset); 199 200 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN)) 201 .addReg(SpillReg, RegState::Kill) 202 .addReg(OffsetReg, RegState::Kill) 203 .addReg(ScratchRsrcReg) 204 .addReg(SPReg) 205 .addImm(0) 206 .addImm(0) // glc 207 .addImm(0) // slc 208 .addImm(0) // tfe 209 .addImm(0) // dlc 210 .addImm(0) // swz 211 .addMemOperand(MMO); 212 } 213 214 LiveRegs.removeReg(SpillReg); 215 } 216 217 static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs, 218 MachineBasicBlock &MBB, 219 MachineBasicBlock::iterator I, 220 const SIInstrInfo *TII, Register SpillReg, 221 Register ScratchRsrcReg, Register SPReg, int FI) { 222 MachineFunction *MF = MBB.getParent(); 223 MachineFrameInfo &MFI = MF->getFrameInfo(); 224 int64_t Offset = MFI.getObjectOffset(FI); 225 226 MachineMemOperand *MMO = MF->getMachineMemOperand( 227 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4, 228 MFI.getObjectAlign(FI)); 229 230 if (ST.enableFlatScratch()) { 231 if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { 232 BuildMI(MBB, I, DebugLoc(), 233 TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg) 234 .addReg(SPReg) 235 .addImm(Offset) 236 .addImm(0) // glc 237 .addImm(0) // slc 238 .addImm(0) // dlc 239 .addMemOperand(MMO); 240 return; 241 } 242 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 243 MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass); 244 245 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg) 246 .addReg(SPReg) 247 .addImm(Offset); 248 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), 249 SpillReg) 250 .addReg(OffsetReg, RegState::Kill) 251 .addImm(0) 252 .addImm(0) // glc 253 .addImm(0) // slc 254 .addImm(0) // dlc 255 .addMemOperand(MMO); 256 return; 257 } 258 259 if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) { 260 BuildMI(MBB, I, DebugLoc(), 261 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg) 262 .addReg(ScratchRsrcReg) 263 .addReg(SPReg) 264 .addImm(Offset) 265 .addImm(0) // glc 266 .addImm(0) // slc 267 .addImm(0) // tfe 268 .addImm(0) // dlc 269 .addImm(0) // swz 270 .addMemOperand(MMO); 271 return; 272 } 273 274 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 275 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 276 277 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 278 .addImm(Offset); 279 280 BuildMI(MBB, I, DebugLoc(), 281 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg) 282 .addReg(OffsetReg, RegState::Kill) 283 .addReg(ScratchRsrcReg) 284 .addReg(SPReg) 285 .addImm(0) 286 .addImm(0) // glc 287 .addImm(0) // slc 288 .addImm(0) // tfe 289 .addImm(0) // dlc 290 .addImm(0) // swz 291 .addMemOperand(MMO); 292 } 293 294 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 295 const DebugLoc &DL, const SIInstrInfo *TII, 296 Register TargetReg) { 297 MachineFunction *MF = MBB.getParent(); 298 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 299 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 300 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 301 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0); 302 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1); 303 304 if (MFI->getGITPtrHigh() != 0xffffffff) { 305 BuildMI(MBB, I, DL, SMovB32, TargetHi) 306 .addImm(MFI->getGITPtrHigh()) 307 .addReg(TargetReg, RegState::ImplicitDefine); 308 } else { 309 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); 310 BuildMI(MBB, I, DL, GetPC64, TargetReg); 311 } 312 Register GitPtrLo = MFI->getGITPtrLoReg(*MF); 313 MF->getRegInfo().addLiveIn(GitPtrLo); 314 MBB.addLiveIn(GitPtrLo); 315 BuildMI(MBB, I, DL, SMovB32, TargetLo) 316 .addReg(GitPtrLo); 317 } 318 319 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` 320 void SIFrameLowering::emitEntryFunctionFlatScratchInit( 321 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 322 const DebugLoc &DL, Register ScratchWaveOffsetReg) const { 323 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 324 const SIInstrInfo *TII = ST.getInstrInfo(); 325 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 326 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 327 328 // We don't need this if we only have spills since there is no user facing 329 // scratch. 330 331 // TODO: If we know we don't have flat instructions earlier, we can omit 332 // this from the input registers. 333 // 334 // TODO: We only need to know if we access scratch space through a flat 335 // pointer. Because we only detect if flat instructions are used at all, 336 // this will be used more often than necessary on VI. 337 338 Register FlatScrInitLo; 339 Register FlatScrInitHi; 340 341 if (ST.isAmdPalOS()) { 342 // Extract the scratch offset from the descriptor in the GIT 343 LivePhysRegs LiveRegs; 344 LiveRegs.init(*TRI); 345 LiveRegs.addLiveIns(MBB); 346 347 // Find unused reg to load flat scratch init into 348 MachineRegisterInfo &MRI = MF.getRegInfo(); 349 Register FlatScrInit = AMDGPU::NoRegister; 350 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF); 351 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2; 352 AllSGPR64s = AllSGPR64s.slice( 353 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded)); 354 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 355 for (MCPhysReg Reg : AllSGPR64s) { 356 if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) && 357 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { 358 FlatScrInit = Reg; 359 break; 360 } 361 } 362 assert(FlatScrInit && "Failed to find free register for scratch init"); 363 364 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0); 365 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1); 366 367 buildGitPtr(MBB, I, DL, TII, FlatScrInit); 368 369 // We now have the GIT ptr - now get the scratch descriptor from the entry 370 // at offset 0 (or offset 16 for a compute shader). 371 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 372 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 373 auto *MMO = MF.getMachineMemOperand( 374 PtrInfo, 375 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 376 MachineMemOperand::MODereferenceable, 377 8, Align(4)); 378 unsigned Offset = 379 MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 380 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 381 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 382 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit) 383 .addReg(FlatScrInit) 384 .addImm(EncodedOffset) // offset 385 .addImm(0) // glc 386 .addImm(0) // dlc 387 .addMemOperand(MMO); 388 389 // Mask the offset in [47:0] of the descriptor 390 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32); 391 BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi) 392 .addReg(FlatScrInitHi) 393 .addImm(0xffff); 394 } else { 395 Register FlatScratchInitReg = 396 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); 397 assert(FlatScratchInitReg); 398 399 MachineRegisterInfo &MRI = MF.getRegInfo(); 400 MRI.addLiveIn(FlatScratchInitReg); 401 MBB.addLiveIn(FlatScratchInitReg); 402 403 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); 404 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); 405 } 406 407 // Do a 64-bit pointer add. 408 if (ST.flatScratchIsPointer()) { 409 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 410 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 411 .addReg(FlatScrInitLo) 412 .addReg(ScratchWaveOffsetReg); 413 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) 414 .addReg(FlatScrInitHi) 415 .addImm(0); 416 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 417 addReg(FlatScrInitLo). 418 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | 419 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 420 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 421 addReg(FlatScrInitHi). 422 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | 423 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 424 return; 425 } 426 427 // For GFX9. 428 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) 429 .addReg(FlatScrInitLo) 430 .addReg(ScratchWaveOffsetReg); 431 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI) 432 .addReg(FlatScrInitHi) 433 .addImm(0); 434 435 return; 436 } 437 438 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9); 439 440 // Copy the size in bytes. 441 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) 442 .addReg(FlatScrInitHi, RegState::Kill); 443 444 // Add wave offset in bytes to private base offset. 445 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. 446 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 447 .addReg(FlatScrInitLo) 448 .addReg(ScratchWaveOffsetReg); 449 450 // Convert offset to 256-byte units. 451 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) 452 .addReg(FlatScrInitLo, RegState::Kill) 453 .addImm(8); 454 } 455 456 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not 457 // memory. They should have been removed by now. 458 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { 459 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 460 I != E; ++I) { 461 if (!MFI.isDeadObjectIndex(I)) 462 return false; 463 } 464 465 return true; 466 } 467 468 // Shift down registers reserved for the scratch RSRC. 469 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( 470 MachineFunction &MF) const { 471 472 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 473 const SIInstrInfo *TII = ST.getInstrInfo(); 474 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 475 MachineRegisterInfo &MRI = MF.getRegInfo(); 476 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 477 478 assert(MFI->isEntryFunction()); 479 480 Register ScratchRsrcReg = MFI->getScratchRSrcReg(); 481 482 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) && 483 allStackObjectsAreDead(MF.getFrameInfo()))) 484 return Register(); 485 486 if (ST.hasSGPRInitBug() || 487 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) 488 return ScratchRsrcReg; 489 490 // We reserved the last registers for this. Shift it down to the end of those 491 // which were actually used. 492 // 493 // FIXME: It might be safer to use a pseudoregister before replacement. 494 495 // FIXME: We should be able to eliminate unused input registers. We only 496 // cannot do this for the resources required for scratch access. For now we 497 // skip over user SGPRs and may leave unused holes. 498 499 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; 500 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF); 501 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); 502 503 // Skip the last N reserved elements because they should have already been 504 // reserved for VCC etc. 505 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 506 for (MCPhysReg Reg : AllSGPR128s) { 507 // Pick the first unallocated one. Make sure we don't clobber the other 508 // reserved input we needed. Also for PAL, make sure we don't clobber 509 // the GIT pointer passed in SGPR0 or SGPR8. 510 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 511 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { 512 MRI.replaceRegWith(ScratchRsrcReg, Reg); 513 MFI->setScratchRSrcReg(Reg); 514 return Reg; 515 } 516 } 517 518 return ScratchRsrcReg; 519 } 520 521 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) { 522 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize(); 523 } 524 525 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, 526 MachineBasicBlock &MBB) const { 527 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); 528 529 // FIXME: If we only have SGPR spills, we won't actually be using scratch 530 // memory since these spill to VGPRs. We should be cleaning up these unused 531 // SGPR spill frame indices somewhere. 532 533 // FIXME: We still have implicit uses on SGPR spill instructions in case they 534 // need to spill to vector memory. It's likely that will not happen, but at 535 // this point it appears we need the setup. This part of the prolog should be 536 // emitted after frame indices are eliminated. 537 538 // FIXME: Remove all of the isPhysRegUsed checks 539 540 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 541 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 542 const SIInstrInfo *TII = ST.getInstrInfo(); 543 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 544 MachineRegisterInfo &MRI = MF.getRegInfo(); 545 const Function &F = MF.getFunction(); 546 547 assert(MFI->isEntryFunction()); 548 549 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( 550 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); 551 // FIXME: Hack to not crash in situations which emitted an error. 552 if (!PreloadedScratchWaveOffsetReg) 553 return; 554 555 // We need to do the replacement of the private segment buffer register even 556 // if there are no stack objects. There could be stores to undef or a 557 // constant without an associated object. 558 // 559 // This will return `Register()` in cases where there are no actual 560 // uses of the SRSRC. 561 Register ScratchRsrcReg; 562 if (!ST.enableFlatScratch()) 563 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF); 564 565 // Make the selected register live throughout the function. 566 if (ScratchRsrcReg) { 567 for (MachineBasicBlock &OtherBB : MF) { 568 if (&OtherBB != &MBB) { 569 OtherBB.addLiveIn(ScratchRsrcReg); 570 } 571 } 572 } 573 574 // Now that we have fixed the reserved SRSRC we need to locate the 575 // (potentially) preloaded SRSRC. 576 Register PreloadedScratchRsrcReg; 577 if (ST.isAmdHsaOrMesa(F)) { 578 PreloadedScratchRsrcReg = 579 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); 580 if (ScratchRsrcReg && PreloadedScratchRsrcReg) { 581 // We added live-ins during argument lowering, but since they were not 582 // used they were deleted. We're adding the uses now, so add them back. 583 MRI.addLiveIn(PreloadedScratchRsrcReg); 584 MBB.addLiveIn(PreloadedScratchRsrcReg); 585 } 586 } 587 588 // Debug location must be unknown since the first debug location is used to 589 // determine the end of the prologue. 590 DebugLoc DL; 591 MachineBasicBlock::iterator I = MBB.begin(); 592 593 // We found the SRSRC first because it needs four registers and has an 594 // alignment requirement. If the SRSRC that we found is clobbering with 595 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR 596 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch 597 // wave offset to a free SGPR. 598 Register ScratchWaveOffsetReg; 599 if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) { 600 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF); 601 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); 602 AllSGPRs = AllSGPRs.slice( 603 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded)); 604 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 605 for (MCPhysReg Reg : AllSGPRs) { 606 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 607 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) { 608 ScratchWaveOffsetReg = Reg; 609 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) 610 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); 611 break; 612 } 613 } 614 } else { 615 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg; 616 } 617 assert(ScratchWaveOffsetReg); 618 619 if (requiresStackPointerReference(MF)) { 620 Register SPReg = MFI->getStackPtrOffsetReg(); 621 assert(SPReg != AMDGPU::SP_REG); 622 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) 623 .addImm(MF.getFrameInfo().getStackSize() * getScratchScaleFactor(ST)); 624 } 625 626 if (hasFP(MF)) { 627 Register FPReg = MFI->getFrameOffsetReg(); 628 assert(FPReg != AMDGPU::FP_REG); 629 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); 630 } 631 632 if (MFI->hasFlatScratchInit() || ScratchRsrcReg) { 633 MRI.addLiveIn(PreloadedScratchWaveOffsetReg); 634 MBB.addLiveIn(PreloadedScratchWaveOffsetReg); 635 } 636 637 if (MFI->hasFlatScratchInit()) { 638 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg); 639 } 640 641 if (ScratchRsrcReg) { 642 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL, 643 PreloadedScratchRsrcReg, 644 ScratchRsrcReg, ScratchWaveOffsetReg); 645 } 646 } 647 648 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg` 649 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup( 650 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 651 const DebugLoc &DL, Register PreloadedScratchRsrcReg, 652 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const { 653 654 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 655 const SIInstrInfo *TII = ST.getInstrInfo(); 656 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 657 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 658 const Function &Fn = MF.getFunction(); 659 660 if (ST.isAmdPalOS()) { 661 // The pointer to the GIT is formed from the offset passed in and either 662 // the amdgpu-git-ptr-high function attribute or the top part of the PC 663 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 664 665 buildGitPtr(MBB, I, DL, TII, Rsrc01); 666 667 // We now have the GIT ptr - now get the scratch descriptor from the entry 668 // at offset 0 (or offset 16 for a compute shader). 669 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 670 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); 671 auto MMO = MF.getMachineMemOperand(PtrInfo, 672 MachineMemOperand::MOLoad | 673 MachineMemOperand::MOInvariant | 674 MachineMemOperand::MODereferenceable, 675 16, Align(4)); 676 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 677 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 678 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 679 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) 680 .addReg(Rsrc01) 681 .addImm(EncodedOffset) // offset 682 .addImm(0) // glc 683 .addImm(0) // dlc 684 .addReg(ScratchRsrcReg, RegState::ImplicitDefine) 685 .addMemOperand(MMO); 686 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) { 687 assert(!ST.isAmdHsaOrMesa(Fn)); 688 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 689 690 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); 691 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); 692 693 // Use relocations to get the pointer, and setup the other bits manually. 694 uint64_t Rsrc23 = TII->getScratchRsrcWords23(); 695 696 if (MFI->hasImplicitBufferPtr()) { 697 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 698 699 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { 700 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); 701 702 BuildMI(MBB, I, DL, Mov64, Rsrc01) 703 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 704 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 705 } else { 706 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 707 708 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 709 auto MMO = MF.getMachineMemOperand( 710 PtrInfo, 711 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 712 MachineMemOperand::MODereferenceable, 713 8, Align(4)); 714 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) 715 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 716 .addImm(0) // offset 717 .addImm(0) // glc 718 .addImm(0) // dlc 719 .addMemOperand(MMO) 720 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 721 722 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 723 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 724 } 725 } else { 726 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 727 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 728 729 BuildMI(MBB, I, DL, SMovB32, Rsrc0) 730 .addExternalSymbol("SCRATCH_RSRC_DWORD0") 731 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 732 733 BuildMI(MBB, I, DL, SMovB32, Rsrc1) 734 .addExternalSymbol("SCRATCH_RSRC_DWORD1") 735 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 736 737 } 738 739 BuildMI(MBB, I, DL, SMovB32, Rsrc2) 740 .addImm(Rsrc23 & 0xffffffff) 741 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 742 743 BuildMI(MBB, I, DL, SMovB32, Rsrc3) 744 .addImm(Rsrc23 >> 32) 745 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 746 } else if (ST.isAmdHsaOrMesa(Fn)) { 747 assert(PreloadedScratchRsrcReg); 748 749 if (ScratchRsrcReg != PreloadedScratchRsrcReg) { 750 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) 751 .addReg(PreloadedScratchRsrcReg, RegState::Kill); 752 } 753 } 754 755 // Add the scratch wave offset into the scratch RSRC. 756 // 757 // We only want to update the first 48 bits, which is the base address 758 // pointer, without touching the adjacent 16 bits of flags. We know this add 759 // cannot carry-out from bit 47, otherwise the scratch allocation would be 760 // impossible to fit in the 48-bit global address space. 761 // 762 // TODO: Evaluate if it is better to just construct an SRD using the flat 763 // scratch init and some constants rather than update the one we are passed. 764 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 765 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 766 767 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in 768 // the kernel body via inreg arguments. 769 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0) 770 .addReg(ScratchRsrcSub0) 771 .addReg(ScratchWaveOffsetReg) 772 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 773 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1) 774 .addReg(ScratchRsrcSub1) 775 .addImm(0) 776 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 777 } 778 779 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { 780 switch (ID) { 781 case TargetStackID::Default: 782 case TargetStackID::NoAlloc: 783 case TargetStackID::SGPRSpill: 784 return true; 785 case TargetStackID::ScalableVector: 786 return false; 787 } 788 llvm_unreachable("Invalid TargetStackID::Value"); 789 } 790 791 // Activate all lanes, returns saved exec. 792 static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, 793 MachineFunction &MF, 794 MachineBasicBlock &MBB, 795 MachineBasicBlock::iterator MBBI, 796 bool IsProlog) { 797 Register ScratchExecCopy; 798 MachineRegisterInfo &MRI = MF.getRegInfo(); 799 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 800 const SIInstrInfo *TII = ST.getInstrInfo(); 801 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 802 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 803 DebugLoc DL; 804 805 if (LiveRegs.empty()) { 806 if (IsProlog) { 807 LiveRegs.init(TRI); 808 LiveRegs.addLiveIns(MBB); 809 if (FuncInfo->SGPRForFPSaveRestoreCopy) 810 LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy); 811 812 if (FuncInfo->SGPRForBPSaveRestoreCopy) 813 LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy); 814 } else { 815 // In epilog. 816 LiveRegs.init(*ST.getRegisterInfo()); 817 LiveRegs.addLiveOuts(MBB); 818 LiveRegs.stepBackward(*MBBI); 819 } 820 } 821 822 ScratchExecCopy = findScratchNonCalleeSaveRegister( 823 MRI, LiveRegs, *TRI.getWaveMaskRegClass()); 824 825 if (!IsProlog) 826 LiveRegs.removeReg(ScratchExecCopy); 827 828 const unsigned OrSaveExec = 829 ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; 830 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1); 831 832 return ScratchExecCopy; 833 } 834 835 void SIFrameLowering::emitPrologue(MachineFunction &MF, 836 MachineBasicBlock &MBB) const { 837 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 838 if (FuncInfo->isEntryFunction()) { 839 emitEntryFunctionPrologue(MF, MBB); 840 return; 841 } 842 843 const MachineFrameInfo &MFI = MF.getFrameInfo(); 844 MachineRegisterInfo &MRI = MF.getRegInfo(); 845 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 846 const SIInstrInfo *TII = ST.getInstrInfo(); 847 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 848 849 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 850 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 851 Register BasePtrReg = 852 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); 853 LivePhysRegs LiveRegs; 854 855 MachineBasicBlock::iterator MBBI = MBB.begin(); 856 DebugLoc DL; 857 858 bool HasFP = false; 859 bool HasBP = false; 860 uint32_t NumBytes = MFI.getStackSize(); 861 uint32_t RoundedSize = NumBytes; 862 // To avoid clobbering VGPRs in lanes that weren't active on function entry, 863 // turn on all lanes before doing the spill to memory. 864 Register ScratchExecCopy; 865 866 bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); 867 bool SpillFPToMemory = false; 868 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. 869 // Otherwise we are spilling the FP to memory. 870 if (HasFPSaveIndex) { 871 SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) != 872 TargetStackID::SGPRSpill; 873 } 874 875 bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue(); 876 bool SpillBPToMemory = false; 877 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. 878 // Otherwise we are spilling the BP to memory. 879 if (HasBPSaveIndex) { 880 SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) != 881 TargetStackID::SGPRSpill; 882 } 883 884 // Emit the copy if we need an FP, and are using a free SGPR to save it. 885 if (FuncInfo->SGPRForFPSaveRestoreCopy) { 886 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy) 887 .addReg(FramePtrReg) 888 .setMIFlag(MachineInstr::FrameSetup); 889 } 890 891 // Emit the copy if we need a BP, and are using a free SGPR to save it. 892 if (FuncInfo->SGPRForBPSaveRestoreCopy) { 893 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), 894 FuncInfo->SGPRForBPSaveRestoreCopy) 895 .addReg(BasePtrReg) 896 .setMIFlag(MachineInstr::FrameSetup); 897 } 898 899 // If a copy has been emitted for FP and/or BP, Make the SGPRs 900 // used in the copy instructions live throughout the function. 901 SmallVector<MCPhysReg, 2> TempSGPRs; 902 if (FuncInfo->SGPRForFPSaveRestoreCopy) 903 TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy); 904 905 if (FuncInfo->SGPRForBPSaveRestoreCopy) 906 TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy); 907 908 if (!TempSGPRs.empty()) { 909 for (MachineBasicBlock &MBB : MF) { 910 for (MCPhysReg Reg : TempSGPRs) 911 MBB.addLiveIn(Reg); 912 913 MBB.sortUniqueLiveIns(); 914 } 915 } 916 917 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg 918 : FuncInfo->getSGPRSpillVGPRs()) { 919 if (!Reg.FI.hasValue()) 920 continue; 921 922 if (!ScratchExecCopy) 923 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 924 925 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR, 926 FuncInfo->getScratchRSrcReg(), 927 StackPtrReg, 928 Reg.FI.getValue()); 929 } 930 931 if (HasFPSaveIndex && SpillFPToMemory) { 932 assert(!MFI.isDeadObjectIndex(FuncInfo->FramePointerSaveIndex.getValue())); 933 934 if (!ScratchExecCopy) 935 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 936 937 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( 938 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 939 940 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 941 .addReg(FramePtrReg); 942 943 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR, 944 FuncInfo->getScratchRSrcReg(), StackPtrReg, 945 FuncInfo->FramePointerSaveIndex.getValue()); 946 } 947 948 if (HasBPSaveIndex && SpillBPToMemory) { 949 assert(!MFI.isDeadObjectIndex(*FuncInfo->BasePointerSaveIndex)); 950 951 if (!ScratchExecCopy) 952 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 953 954 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( 955 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 956 957 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 958 .addReg(BasePtrReg); 959 960 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR, 961 FuncInfo->getScratchRSrcReg(), StackPtrReg, 962 *FuncInfo->BasePointerSaveIndex); 963 } 964 965 if (ScratchExecCopy) { 966 // FIXME: Split block and make terminator. 967 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 968 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 969 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 970 .addReg(ScratchExecCopy, RegState::Kill); 971 LiveRegs.addReg(ScratchExecCopy); 972 } 973 974 // In this case, spill the FP to a reserved VGPR. 975 if (HasFPSaveIndex && !SpillFPToMemory) { 976 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 977 assert(!MFI.isDeadObjectIndex(FI)); 978 979 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 980 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 981 FuncInfo->getSGPRToVGPRSpills(FI); 982 assert(Spill.size() == 1); 983 984 // Save FP before setting it up. 985 // FIXME: This should respect spillSGPRToVGPR; 986 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) 987 .addReg(FramePtrReg) 988 .addImm(Spill[0].Lane) 989 .addReg(Spill[0].VGPR, RegState::Undef); 990 } 991 992 // In this case, spill the BP to a reserved VGPR. 993 if (HasBPSaveIndex && !SpillBPToMemory) { 994 const int BasePtrFI = *FuncInfo->BasePointerSaveIndex; 995 assert(!MFI.isDeadObjectIndex(BasePtrFI)); 996 997 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); 998 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 999 FuncInfo->getSGPRToVGPRSpills(BasePtrFI); 1000 assert(Spill.size() == 1); 1001 1002 // Save BP before setting it up. 1003 // FIXME: This should respect spillSGPRToVGPR; 1004 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) 1005 .addReg(BasePtrReg) 1006 .addImm(Spill[0].Lane) 1007 .addReg(Spill[0].VGPR, RegState::Undef); 1008 } 1009 1010 if (TRI.needsStackRealignment(MF)) { 1011 HasFP = true; 1012 const unsigned Alignment = MFI.getMaxAlign().value(); 1013 1014 RoundedSize += Alignment; 1015 if (LiveRegs.empty()) { 1016 LiveRegs.init(TRI); 1017 LiveRegs.addLiveIns(MBB); 1018 LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); 1019 LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy); 1020 } 1021 1022 Register ScratchSPReg = findScratchNonCalleeSaveRegister( 1023 MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass); 1024 assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy && 1025 ScratchSPReg != FuncInfo->SGPRForBPSaveRestoreCopy); 1026 1027 // s_add_u32 tmp_reg, s32, NumBytes 1028 // s_and_b32 s32, tmp_reg, 0b111...0000 1029 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg) 1030 .addReg(StackPtrReg) 1031 .addImm((Alignment - 1) * getScratchScaleFactor(ST)) 1032 .setMIFlag(MachineInstr::FrameSetup); 1033 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) 1034 .addReg(ScratchSPReg, RegState::Kill) 1035 .addImm(-Alignment * getScratchScaleFactor(ST)) 1036 .setMIFlag(MachineInstr::FrameSetup); 1037 FuncInfo->setIsStackRealigned(true); 1038 } else if ((HasFP = hasFP(MF))) { 1039 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 1040 .addReg(StackPtrReg) 1041 .setMIFlag(MachineInstr::FrameSetup); 1042 } 1043 1044 // If we need a base pointer, set it up here. It's whatever the value of 1045 // the stack pointer is at this point. Any variable size objects will be 1046 // allocated after this, so we can still use the base pointer to reference 1047 // the incoming arguments. 1048 if ((HasBP = TRI.hasBasePointer(MF))) { 1049 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) 1050 .addReg(StackPtrReg) 1051 .setMIFlag(MachineInstr::FrameSetup); 1052 } 1053 1054 if (HasFP && RoundedSize != 0) { 1055 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) 1056 .addReg(StackPtrReg) 1057 .addImm(RoundedSize * getScratchScaleFactor(ST)) 1058 .setMIFlag(MachineInstr::FrameSetup); 1059 } 1060 1061 assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy || 1062 FuncInfo->FramePointerSaveIndex)) && 1063 "Needed to save FP but didn't save it anywhere"); 1064 1065 assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy && 1066 !FuncInfo->FramePointerSaveIndex)) && 1067 "Saved FP but didn't need it"); 1068 1069 assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy || 1070 FuncInfo->BasePointerSaveIndex)) && 1071 "Needed to save BP but didn't save it anywhere"); 1072 1073 assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy && 1074 !FuncInfo->BasePointerSaveIndex)) && 1075 "Saved BP but didn't need it"); 1076 } 1077 1078 void SIFrameLowering::emitEpilogue(MachineFunction &MF, 1079 MachineBasicBlock &MBB) const { 1080 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1081 if (FuncInfo->isEntryFunction()) 1082 return; 1083 1084 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1085 const SIInstrInfo *TII = ST.getInstrInfo(); 1086 MachineRegisterInfo &MRI = MF.getRegInfo(); 1087 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 1088 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1089 LivePhysRegs LiveRegs; 1090 DebugLoc DL; 1091 1092 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1093 uint32_t NumBytes = MFI.getStackSize(); 1094 uint32_t RoundedSize = FuncInfo->isStackRealigned() 1095 ? NumBytes + MFI.getMaxAlign().value() 1096 : NumBytes; 1097 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 1098 const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 1099 const Register BasePtrReg = 1100 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); 1101 1102 bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); 1103 bool SpillFPToMemory = false; 1104 if (HasFPSaveIndex) { 1105 SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) != 1106 TargetStackID::SGPRSpill; 1107 } 1108 1109 bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue(); 1110 bool SpillBPToMemory = false; 1111 if (HasBPSaveIndex) { 1112 SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) != 1113 TargetStackID::SGPRSpill; 1114 } 1115 1116 if (RoundedSize != 0 && hasFP(MF)) { 1117 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) 1118 .addReg(StackPtrReg) 1119 .addImm(RoundedSize * getScratchScaleFactor(ST)) 1120 .setMIFlag(MachineInstr::FrameDestroy); 1121 } 1122 1123 if (FuncInfo->SGPRForFPSaveRestoreCopy) { 1124 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 1125 .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) 1126 .setMIFlag(MachineInstr::FrameSetup); 1127 } 1128 1129 if (FuncInfo->SGPRForBPSaveRestoreCopy) { 1130 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) 1131 .addReg(FuncInfo->SGPRForBPSaveRestoreCopy) 1132 .setMIFlag(MachineInstr::FrameSetup); 1133 } 1134 1135 Register ScratchExecCopy; 1136 if (HasFPSaveIndex) { 1137 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 1138 assert(!MFI.isDeadObjectIndex(FI)); 1139 if (SpillFPToMemory) { 1140 if (!ScratchExecCopy) 1141 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1142 1143 MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister( 1144 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 1145 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR, 1146 FuncInfo->getScratchRSrcReg(), StackPtrReg, FI); 1147 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) 1148 .addReg(TempVGPR, RegState::Kill); 1149 } else { 1150 // Reload from VGPR spill. 1151 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 1152 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 1153 FuncInfo->getSGPRToVGPRSpills(FI); 1154 assert(Spill.size() == 1); 1155 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg) 1156 .addReg(Spill[0].VGPR) 1157 .addImm(Spill[0].Lane); 1158 } 1159 } 1160 1161 if (HasBPSaveIndex) { 1162 const int BasePtrFI = *FuncInfo->BasePointerSaveIndex; 1163 assert(!MFI.isDeadObjectIndex(BasePtrFI)); 1164 if (SpillBPToMemory) { 1165 if (!ScratchExecCopy) 1166 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1167 1168 MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister( 1169 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 1170 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR, 1171 FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI); 1172 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) 1173 .addReg(TempVGPR, RegState::Kill); 1174 } else { 1175 // Reload from VGPR spill. 1176 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); 1177 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 1178 FuncInfo->getSGPRToVGPRSpills(BasePtrFI); 1179 assert(Spill.size() == 1); 1180 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg) 1181 .addReg(Spill[0].VGPR) 1182 .addImm(Spill[0].Lane); 1183 } 1184 } 1185 1186 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg : 1187 FuncInfo->getSGPRSpillVGPRs()) { 1188 if (!Reg.FI.hasValue()) 1189 continue; 1190 1191 if (!ScratchExecCopy) 1192 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1193 1194 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR, 1195 FuncInfo->getScratchRSrcReg(), StackPtrReg, 1196 Reg.FI.getValue()); 1197 } 1198 1199 if (ScratchExecCopy) { 1200 // FIXME: Split block and make terminator. 1201 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 1202 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 1203 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 1204 .addReg(ScratchExecCopy, RegState::Kill); 1205 } 1206 } 1207 1208 #ifndef NDEBUG 1209 static bool allSGPRSpillsAreDead(const MachineFunction &MF) { 1210 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1211 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1212 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 1213 I != E; ++I) { 1214 if (!MFI.isDeadObjectIndex(I) && 1215 MFI.getStackID(I) == TargetStackID::SGPRSpill && 1216 (I != FuncInfo->FramePointerSaveIndex && 1217 I != FuncInfo->BasePointerSaveIndex)) { 1218 return false; 1219 } 1220 } 1221 1222 return true; 1223 } 1224 #endif 1225 1226 StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, 1227 int FI, 1228 Register &FrameReg) const { 1229 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 1230 1231 FrameReg = RI->getFrameRegister(MF); 1232 return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI)); 1233 } 1234 1235 void SIFrameLowering::processFunctionBeforeFrameFinalized( 1236 MachineFunction &MF, 1237 RegScavenger *RS) const { 1238 MachineFrameInfo &MFI = MF.getFrameInfo(); 1239 1240 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1241 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1242 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1243 1244 FuncInfo->removeDeadFrameIndices(MFI); 1245 assert(allSGPRSpillsAreDead(MF) && 1246 "SGPR spill should have been removed in SILowerSGPRSpills"); 1247 1248 // FIXME: The other checks should be redundant with allStackObjectsAreDead, 1249 // but currently hasNonSpillStackObjects is set only from source 1250 // allocas. Stack temps produced from legalization are not counted currently. 1251 if (!allStackObjectsAreDead(MFI)) { 1252 assert(RS && "RegScavenger required if spilling"); 1253 1254 if (FuncInfo->isEntryFunction()) { 1255 int ScavengeFI = MFI.CreateFixedObject( 1256 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); 1257 RS->addScavengingFrameIndex(ScavengeFI); 1258 } else { 1259 int ScavengeFI = MFI.CreateStackObject( 1260 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 1261 TRI->getSpillAlign(AMDGPU::SGPR_32RegClass), false); 1262 RS->addScavengingFrameIndex(ScavengeFI); 1263 } 1264 } 1265 } 1266 1267 // Only report VGPRs to generic code. 1268 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, 1269 BitVector &SavedVGPRs, 1270 RegScavenger *RS) const { 1271 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); 1272 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1273 if (MFI->isEntryFunction()) 1274 return; 1275 1276 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 1277 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1278 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1279 1280 // Ignore the SGPRs the default implementation found. 1281 SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask()); 1282 1283 // hasFP only knows about stack objects that already exist. We're now 1284 // determining the stack slots that will be created, so we have to predict 1285 // them. Stack objects force FP usage with calls. 1286 // 1287 // Note a new VGPR CSR may be introduced if one is used for the spill, but we 1288 // don't want to report it here. 1289 // 1290 // FIXME: Is this really hasReservedCallFrame? 1291 const bool WillHaveFP = 1292 FrameInfo.hasCalls() && 1293 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); 1294 1295 // VGPRs used for SGPR spilling need to be specially inserted in the prolog, 1296 // so don't allow the default insertion to handle them. 1297 for (auto SSpill : MFI->getSGPRSpillVGPRs()) 1298 SavedVGPRs.reset(SSpill.VGPR); 1299 1300 LivePhysRegs LiveRegs; 1301 LiveRegs.init(*TRI); 1302 1303 if (WillHaveFP || hasFP(MF)) { 1304 assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex && 1305 "Re-reserving spill slot for FP"); 1306 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy, 1307 MFI->FramePointerSaveIndex, true); 1308 } 1309 1310 if (TRI->hasBasePointer(MF)) { 1311 if (MFI->SGPRForFPSaveRestoreCopy) 1312 LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy); 1313 1314 assert(!MFI->SGPRForBPSaveRestoreCopy && 1315 !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP"); 1316 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy, 1317 MFI->BasePointerSaveIndex, false); 1318 } 1319 } 1320 1321 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, 1322 BitVector &SavedRegs, 1323 RegScavenger *RS) const { 1324 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1325 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1326 if (MFI->isEntryFunction()) 1327 return; 1328 1329 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1330 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1331 1332 // The SP is specifically managed and we don't want extra spills of it. 1333 SavedRegs.reset(MFI->getStackPtrOffsetReg()); 1334 1335 const BitVector AllSavedRegs = SavedRegs; 1336 SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask()); 1337 1338 // If clearing VGPRs changed the mask, we will have some CSR VGPR spills. 1339 const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs; 1340 1341 // We have to anticipate introducing CSR VGPR spills if we don't have any 1342 // stack objects already, since we require an FP if there is a call and stack. 1343 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 1344 const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR; 1345 1346 // FP will be specially managed like SP. 1347 if (WillHaveFP || hasFP(MF)) 1348 SavedRegs.reset(MFI->getFrameOffsetReg()); 1349 } 1350 1351 bool SIFrameLowering::assignCalleeSavedSpillSlots( 1352 MachineFunction &MF, const TargetRegisterInfo *TRI, 1353 std::vector<CalleeSavedInfo> &CSI) const { 1354 if (CSI.empty()) 1355 return true; // Early exit if no callee saved registers are modified! 1356 1357 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1358 if (!FuncInfo->SGPRForFPSaveRestoreCopy && 1359 !FuncInfo->SGPRForBPSaveRestoreCopy) 1360 return false; 1361 1362 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1363 const SIRegisterInfo *RI = ST.getRegisterInfo(); 1364 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 1365 Register BasePtrReg = RI->getBaseRegister(); 1366 unsigned NumModifiedRegs = 0; 1367 1368 if (FuncInfo->SGPRForFPSaveRestoreCopy) 1369 NumModifiedRegs++; 1370 if (FuncInfo->SGPRForBPSaveRestoreCopy) 1371 NumModifiedRegs++; 1372 1373 for (auto &CS : CSI) { 1374 if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) { 1375 CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); 1376 if (--NumModifiedRegs) 1377 break; 1378 } else if (CS.getReg() == BasePtrReg && 1379 FuncInfo->SGPRForBPSaveRestoreCopy) { 1380 CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy); 1381 if (--NumModifiedRegs) 1382 break; 1383 } 1384 } 1385 1386 return false; 1387 } 1388 1389 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( 1390 MachineFunction &MF, 1391 MachineBasicBlock &MBB, 1392 MachineBasicBlock::iterator I) const { 1393 int64_t Amount = I->getOperand(0).getImm(); 1394 if (Amount == 0) 1395 return MBB.erase(I); 1396 1397 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1398 const SIInstrInfo *TII = ST.getInstrInfo(); 1399 const DebugLoc &DL = I->getDebugLoc(); 1400 unsigned Opc = I->getOpcode(); 1401 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 1402 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 1403 1404 if (!hasReservedCallFrame(MF)) { 1405 Amount = alignTo(Amount, getStackAlign()); 1406 assert(isUInt<32>(Amount) && "exceeded stack address space size"); 1407 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1408 Register SPReg = MFI->getStackPtrOffsetReg(); 1409 1410 unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 1411 BuildMI(MBB, I, DL, TII->get(Op), SPReg) 1412 .addReg(SPReg) 1413 .addImm(Amount * getScratchScaleFactor(ST)); 1414 } else if (CalleePopAmount != 0) { 1415 llvm_unreachable("is this used?"); 1416 } 1417 1418 return MBB.erase(I); 1419 } 1420 1421 /// Returns true if the frame will require a reference to the stack pointer. 1422 /// 1423 /// This is the set of conditions common to setting up the stack pointer in a 1424 /// kernel, and for using a frame pointer in a callable function. 1425 /// 1426 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm 1427 /// references SP. 1428 static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) { 1429 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint(); 1430 } 1431 1432 // The FP for kernels is always known 0, so we never really need to setup an 1433 // explicit register for it. However, DisableFramePointerElim will force us to 1434 // use a register for it. 1435 bool SIFrameLowering::hasFP(const MachineFunction &MF) const { 1436 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1437 1438 // For entry functions we can use an immediate offset in most cases, so the 1439 // presence of calls doesn't imply we need a distinct frame pointer. 1440 if (MFI.hasCalls() && 1441 !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) { 1442 // All offsets are unsigned, so need to be addressed in the same direction 1443 // as stack growth. 1444 1445 // FIXME: This function is pretty broken, since it can be called before the 1446 // frame layout is determined or CSR spills are inserted. 1447 return MFI.getStackSize() != 0; 1448 } 1449 1450 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() || 1451 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) || 1452 MF.getTarget().Options.DisableFramePointerElim(MF); 1453 } 1454 1455 // This is essentially a reduced version of hasFP for entry functions. Since the 1456 // stack pointer is known 0 on entry to kernels, we never really need an FP 1457 // register. We may need to initialize the stack pointer depending on the frame 1458 // properties, which logically overlaps many of the cases where an ordinary 1459 // function would require an FP. 1460 bool SIFrameLowering::requiresStackPointerReference( 1461 const MachineFunction &MF) const { 1462 // Callable functions always require a stack pointer reference. 1463 assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() && 1464 "only expected to call this for entry points"); 1465 1466 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1467 1468 // Entry points ordinarily don't need to initialize SP. We have to set it up 1469 // for callees if there are any. Also note tail calls are impossible/don't 1470 // make any sense for kernels. 1471 if (MFI.hasCalls()) 1472 return true; 1473 1474 // We still need to initialize the SP if we're doing anything weird that 1475 // references the SP, like variable sized stack objects. 1476 return frameTriviallyRequiresSP(MFI); 1477 } 1478