1 //===----------------------- SIFrameLowering.cpp --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 9 #include "SIFrameLowering.h" 10 #include "AMDGPU.h" 11 #include "GCNSubtarget.h" 12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 13 #include "SIMachineFunctionInfo.h" 14 #include "llvm/CodeGen/LivePhysRegs.h" 15 #include "llvm/CodeGen/MachineFrameInfo.h" 16 #include "llvm/CodeGen/RegisterScavenging.h" 17 #include "llvm/Target/TargetMachine.h" 18 19 using namespace llvm; 20 21 #define DEBUG_TYPE "frame-info" 22 23 24 // Find a scratch register that we can use at the start of the prologue to 25 // re-align the stack pointer. We avoid using callee-save registers since they 26 // may appear to be free when this is called from canUseAsPrologue (during 27 // shrink wrapping), but then no longer be free when this is called from 28 // emitPrologue. 29 // 30 // FIXME: This is a bit conservative, since in the above case we could use one 31 // of the callee-save registers as a scratch temp to re-align the stack pointer, 32 // but we would then have to make sure that we were in fact saving at least one 33 // callee-save register in the prologue, which is additional complexity that 34 // doesn't seem worth the benefit. 35 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, 36 LivePhysRegs &LiveRegs, 37 const TargetRegisterClass &RC, 38 bool Unused = false) { 39 // Mark callee saved registers as used so we will not choose them. 40 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 41 for (unsigned i = 0; CSRegs[i]; ++i) 42 LiveRegs.addReg(CSRegs[i]); 43 44 if (Unused) { 45 // We are looking for a register that can be used throughout the entire 46 // function, so any use is unacceptable. 47 for (MCRegister Reg : RC) { 48 if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) 49 return Reg; 50 } 51 } else { 52 for (MCRegister Reg : RC) { 53 if (LiveRegs.available(MRI, Reg)) 54 return Reg; 55 } 56 } 57 58 // If we require an unused register, this is used in contexts where failure is 59 // an option and has an alternative plan. In other contexts, this must 60 // succeed0. 61 if (!Unused) 62 report_fatal_error("failed to find free scratch register"); 63 64 return MCRegister(); 65 } 66 67 static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, 68 LivePhysRegs &LiveRegs, 69 Register &TempSGPR, 70 Optional<int> &FrameIndex, 71 bool IsFP) { 72 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 73 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 74 75 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 76 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 77 78 // We need to save and restore the current FP/BP. 79 80 // 1: If there is already a VGPR with free lanes, use it. We 81 // may already have to pay the penalty for spilling a CSR VGPR. 82 if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { 83 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, 84 TargetStackID::SGPRSpill); 85 86 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) 87 llvm_unreachable("allocate SGPR spill should have worked"); 88 89 FrameIndex = NewFI; 90 91 LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 92 dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to " 93 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane 94 << '\n'); 95 return; 96 } 97 98 // 2: Next, try to save the FP/BP in an unused SGPR. 99 TempSGPR = findScratchNonCalleeSaveRegister( 100 MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); 101 102 if (!TempSGPR) { 103 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, 104 TargetStackID::SGPRSpill); 105 106 if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) { 107 // 3: There's no free lane to spill, and no free register to save FP/BP, 108 // so we're forced to spill another VGPR to use for the spill. 109 FrameIndex = NewFI; 110 111 LLVM_DEBUG( 112 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 113 dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " 114 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';); 115 } else { 116 // Remove dead <NewFI> index 117 MF.getFrameInfo().RemoveStackObject(NewFI); 118 // 4: If all else fails, spill the FP/BP to memory. 119 FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); 120 LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling " 121 << (IsFP ? "FP" : "BP") << '\n'); 122 } 123 } else { 124 LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to " 125 << printReg(TempSGPR, TRI) << '\n'); 126 } 127 } 128 129 // We need to specially emit stack operations here because a different frame 130 // register is used than in the rest of the function, as getFrameRegister would 131 // use. 132 static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs, 133 MachineBasicBlock &MBB, 134 MachineBasicBlock::iterator I, 135 const SIInstrInfo *TII, Register SpillReg, 136 Register ScratchRsrcReg, Register SPReg, int FI) { 137 MachineFunction *MF = MBB.getParent(); 138 MachineFrameInfo &MFI = MF->getFrameInfo(); 139 140 int64_t Offset = MFI.getObjectOffset(FI); 141 142 MachineMemOperand *MMO = MF->getMachineMemOperand( 143 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4, 144 MFI.getObjectAlign(FI)); 145 146 if (ST.enableFlatScratch()) { 147 if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { 148 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR)) 149 .addReg(SpillReg, RegState::Kill) 150 .addReg(SPReg) 151 .addImm(Offset) 152 .addImm(0) // glc 153 .addImm(0) // slc 154 .addImm(0) // dlc 155 .addMemOperand(MMO); 156 return; 157 } 158 } else if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) { 159 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET)) 160 .addReg(SpillReg, RegState::Kill) 161 .addReg(ScratchRsrcReg) 162 .addReg(SPReg) 163 .addImm(Offset) 164 .addImm(0) // glc 165 .addImm(0) // slc 166 .addImm(0) // tfe 167 .addImm(0) // dlc 168 .addImm(0) // swz 169 .addMemOperand(MMO); 170 return; 171 } 172 173 // Don't clobber the TmpVGPR if we also need a scratch reg for the stack 174 // offset in the spill. 175 LiveRegs.addReg(SpillReg); 176 177 if (ST.enableFlatScratch()) { 178 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 179 MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass); 180 181 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg) 182 .addReg(SPReg) 183 .addImm(Offset); 184 185 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR)) 186 .addReg(SpillReg, RegState::Kill) 187 .addReg(OffsetReg, RegState::Kill) 188 .addImm(0) 189 .addImm(0) // glc 190 .addImm(0) // slc 191 .addImm(0) // dlc 192 .addMemOperand(MMO); 193 } else { 194 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 195 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 196 197 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 198 .addImm(Offset); 199 200 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN)) 201 .addReg(SpillReg, RegState::Kill) 202 .addReg(OffsetReg, RegState::Kill) 203 .addReg(ScratchRsrcReg) 204 .addReg(SPReg) 205 .addImm(0) 206 .addImm(0) // glc 207 .addImm(0) // slc 208 .addImm(0) // tfe 209 .addImm(0) // dlc 210 .addImm(0) // swz 211 .addMemOperand(MMO); 212 } 213 214 LiveRegs.removeReg(SpillReg); 215 } 216 217 static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs, 218 MachineBasicBlock &MBB, 219 MachineBasicBlock::iterator I, 220 const SIInstrInfo *TII, Register SpillReg, 221 Register ScratchRsrcReg, Register SPReg, int FI) { 222 MachineFunction *MF = MBB.getParent(); 223 MachineFrameInfo &MFI = MF->getFrameInfo(); 224 int64_t Offset = MFI.getObjectOffset(FI); 225 226 MachineMemOperand *MMO = MF->getMachineMemOperand( 227 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4, 228 MFI.getObjectAlign(FI)); 229 230 if (ST.enableFlatScratch()) { 231 if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { 232 BuildMI(MBB, I, DebugLoc(), 233 TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg) 234 .addReg(SPReg) 235 .addImm(Offset) 236 .addImm(0) // glc 237 .addImm(0) // slc 238 .addImm(0) // dlc 239 .addMemOperand(MMO); 240 return; 241 } 242 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 243 MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass); 244 245 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg) 246 .addReg(SPReg) 247 .addImm(Offset); 248 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), 249 SpillReg) 250 .addReg(OffsetReg, RegState::Kill) 251 .addImm(0) 252 .addImm(0) // glc 253 .addImm(0) // slc 254 .addImm(0) // dlc 255 .addMemOperand(MMO); 256 return; 257 } 258 259 if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) { 260 BuildMI(MBB, I, DebugLoc(), 261 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg) 262 .addReg(ScratchRsrcReg) 263 .addReg(SPReg) 264 .addImm(Offset) 265 .addImm(0) // glc 266 .addImm(0) // slc 267 .addImm(0) // tfe 268 .addImm(0) // dlc 269 .addImm(0) // swz 270 .addMemOperand(MMO); 271 return; 272 } 273 274 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 275 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 276 277 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 278 .addImm(Offset); 279 280 BuildMI(MBB, I, DebugLoc(), 281 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg) 282 .addReg(OffsetReg, RegState::Kill) 283 .addReg(ScratchRsrcReg) 284 .addReg(SPReg) 285 .addImm(0) 286 .addImm(0) // glc 287 .addImm(0) // slc 288 .addImm(0) // tfe 289 .addImm(0) // dlc 290 .addImm(0) // swz 291 .addMemOperand(MMO); 292 } 293 294 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 295 const DebugLoc &DL, const SIInstrInfo *TII, 296 Register TargetReg) { 297 MachineFunction *MF = MBB.getParent(); 298 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 299 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 300 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 301 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0); 302 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1); 303 304 if (MFI->getGITPtrHigh() != 0xffffffff) { 305 BuildMI(MBB, I, DL, SMovB32, TargetHi) 306 .addImm(MFI->getGITPtrHigh()) 307 .addReg(TargetReg, RegState::ImplicitDefine); 308 } else { 309 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); 310 BuildMI(MBB, I, DL, GetPC64, TargetReg); 311 } 312 Register GitPtrLo = MFI->getGITPtrLoReg(*MF); 313 MF->getRegInfo().addLiveIn(GitPtrLo); 314 MBB.addLiveIn(GitPtrLo); 315 BuildMI(MBB, I, DL, SMovB32, TargetLo) 316 .addReg(GitPtrLo); 317 } 318 319 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` 320 void SIFrameLowering::emitEntryFunctionFlatScratchInit( 321 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 322 const DebugLoc &DL, Register ScratchWaveOffsetReg) const { 323 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 324 const SIInstrInfo *TII = ST.getInstrInfo(); 325 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 326 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 327 328 // We don't need this if we only have spills since there is no user facing 329 // scratch. 330 331 // TODO: If we know we don't have flat instructions earlier, we can omit 332 // this from the input registers. 333 // 334 // TODO: We only need to know if we access scratch space through a flat 335 // pointer. Because we only detect if flat instructions are used at all, 336 // this will be used more often than necessary on VI. 337 338 Register FlatScrInitLo; 339 Register FlatScrInitHi; 340 341 if (ST.isAmdPalOS()) { 342 // Extract the scratch offset from the descriptor in the GIT 343 LivePhysRegs LiveRegs; 344 LiveRegs.init(*TRI); 345 LiveRegs.addLiveIns(MBB); 346 347 // Find unused reg to load flat scratch init into 348 MachineRegisterInfo &MRI = MF.getRegInfo(); 349 Register FlatScrInit = AMDGPU::NoRegister; 350 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF); 351 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2; 352 AllSGPR64s = AllSGPR64s.slice( 353 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded)); 354 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 355 for (MCPhysReg Reg : AllSGPR64s) { 356 if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) && 357 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { 358 FlatScrInit = Reg; 359 break; 360 } 361 } 362 assert(FlatScrInit && "Failed to find free register for scratch init"); 363 364 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0); 365 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1); 366 367 buildGitPtr(MBB, I, DL, TII, FlatScrInit); 368 369 // We now have the GIT ptr - now get the scratch descriptor from the entry 370 // at offset 0 (or offset 16 for a compute shader). 371 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 372 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 373 auto *MMO = MF.getMachineMemOperand( 374 PtrInfo, 375 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 376 MachineMemOperand::MODereferenceable, 377 8, Align(4)); 378 unsigned Offset = 379 MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 380 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 381 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 382 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit) 383 .addReg(FlatScrInit) 384 .addImm(EncodedOffset) // offset 385 .addImm(0) // glc 386 .addImm(0) // dlc 387 .addMemOperand(MMO); 388 389 // Mask the offset in [47:0] of the descriptor 390 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32); 391 BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi) 392 .addReg(FlatScrInitHi) 393 .addImm(0xffff); 394 } else { 395 Register FlatScratchInitReg = 396 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); 397 assert(FlatScratchInitReg); 398 399 MachineRegisterInfo &MRI = MF.getRegInfo(); 400 MRI.addLiveIn(FlatScratchInitReg); 401 MBB.addLiveIn(FlatScratchInitReg); 402 403 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); 404 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); 405 } 406 407 // Do a 64-bit pointer add. 408 if (ST.flatScratchIsPointer()) { 409 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 410 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 411 .addReg(FlatScrInitLo) 412 .addReg(ScratchWaveOffsetReg); 413 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) 414 .addReg(FlatScrInitHi) 415 .addImm(0); 416 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 417 addReg(FlatScrInitLo). 418 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | 419 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 420 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 421 addReg(FlatScrInitHi). 422 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | 423 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 424 return; 425 } 426 427 // For GFX9. 428 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) 429 .addReg(FlatScrInitLo) 430 .addReg(ScratchWaveOffsetReg); 431 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI) 432 .addReg(FlatScrInitHi) 433 .addImm(0); 434 435 return; 436 } 437 438 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9); 439 440 // Copy the size in bytes. 441 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) 442 .addReg(FlatScrInitHi, RegState::Kill); 443 444 // Add wave offset in bytes to private base offset. 445 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. 446 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 447 .addReg(FlatScrInitLo) 448 .addReg(ScratchWaveOffsetReg); 449 450 // Convert offset to 256-byte units. 451 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) 452 .addReg(FlatScrInitLo, RegState::Kill) 453 .addImm(8); 454 } 455 456 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not 457 // memory. They should have been removed by now. 458 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { 459 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 460 I != E; ++I) { 461 if (!MFI.isDeadObjectIndex(I)) 462 return false; 463 } 464 465 return true; 466 } 467 468 // Shift down registers reserved for the scratch RSRC. 469 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( 470 MachineFunction &MF) const { 471 472 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 473 const SIInstrInfo *TII = ST.getInstrInfo(); 474 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 475 MachineRegisterInfo &MRI = MF.getRegInfo(); 476 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 477 478 assert(MFI->isEntryFunction()); 479 480 Register ScratchRsrcReg = MFI->getScratchRSrcReg(); 481 482 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) && 483 allStackObjectsAreDead(MF.getFrameInfo()))) 484 return Register(); 485 486 if (ST.hasSGPRInitBug() || 487 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) 488 return ScratchRsrcReg; 489 490 // We reserved the last registers for this. Shift it down to the end of those 491 // which were actually used. 492 // 493 // FIXME: It might be safer to use a pseudoregister before replacement. 494 495 // FIXME: We should be able to eliminate unused input registers. We only 496 // cannot do this for the resources required for scratch access. For now we 497 // skip over user SGPRs and may leave unused holes. 498 499 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; 500 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF); 501 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); 502 503 // Skip the last N reserved elements because they should have already been 504 // reserved for VCC etc. 505 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 506 for (MCPhysReg Reg : AllSGPR128s) { 507 // Pick the first unallocated one. Make sure we don't clobber the other 508 // reserved input we needed. Also for PAL, make sure we don't clobber 509 // the GIT pointer passed in SGPR0 or SGPR8. 510 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 511 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { 512 MRI.replaceRegWith(ScratchRsrcReg, Reg); 513 MFI->setScratchRSrcReg(Reg); 514 return Reg; 515 } 516 } 517 518 return ScratchRsrcReg; 519 } 520 521 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) { 522 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize(); 523 } 524 525 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, 526 MachineBasicBlock &MBB) const { 527 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); 528 529 // FIXME: If we only have SGPR spills, we won't actually be using scratch 530 // memory since these spill to VGPRs. We should be cleaning up these unused 531 // SGPR spill frame indices somewhere. 532 533 // FIXME: We still have implicit uses on SGPR spill instructions in case they 534 // need to spill to vector memory. It's likely that will not happen, but at 535 // this point it appears we need the setup. This part of the prolog should be 536 // emitted after frame indices are eliminated. 537 538 // FIXME: Remove all of the isPhysRegUsed checks 539 540 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 541 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 542 const SIInstrInfo *TII = ST.getInstrInfo(); 543 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 544 MachineRegisterInfo &MRI = MF.getRegInfo(); 545 const Function &F = MF.getFunction(); 546 547 assert(MFI->isEntryFunction()); 548 549 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( 550 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); 551 // FIXME: Hack to not crash in situations which emitted an error. 552 if (!PreloadedScratchWaveOffsetReg) 553 return; 554 555 // We need to do the replacement of the private segment buffer register even 556 // if there are no stack objects. There could be stores to undef or a 557 // constant without an associated object. 558 // 559 // This will return `Register()` in cases where there are no actual 560 // uses of the SRSRC. 561 Register ScratchRsrcReg; 562 if (!ST.enableFlatScratch()) 563 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF); 564 565 // Make the selected register live throughout the function. 566 if (ScratchRsrcReg) { 567 for (MachineBasicBlock &OtherBB : MF) { 568 if (&OtherBB != &MBB) { 569 OtherBB.addLiveIn(ScratchRsrcReg); 570 } 571 } 572 } 573 574 // Now that we have fixed the reserved SRSRC we need to locate the 575 // (potentially) preloaded SRSRC. 576 Register PreloadedScratchRsrcReg; 577 if (ST.isAmdHsaOrMesa(F)) { 578 PreloadedScratchRsrcReg = 579 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); 580 if (ScratchRsrcReg && PreloadedScratchRsrcReg) { 581 // We added live-ins during argument lowering, but since they were not 582 // used they were deleted. We're adding the uses now, so add them back. 583 MRI.addLiveIn(PreloadedScratchRsrcReg); 584 MBB.addLiveIn(PreloadedScratchRsrcReg); 585 } 586 } 587 588 // Debug location must be unknown since the first debug location is used to 589 // determine the end of the prologue. 590 DebugLoc DL; 591 MachineBasicBlock::iterator I = MBB.begin(); 592 593 // We found the SRSRC first because it needs four registers and has an 594 // alignment requirement. If the SRSRC that we found is clobbering with 595 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR 596 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch 597 // wave offset to a free SGPR. 598 Register ScratchWaveOffsetReg; 599 if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) { 600 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF); 601 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); 602 AllSGPRs = AllSGPRs.slice( 603 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded)); 604 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 605 for (MCPhysReg Reg : AllSGPRs) { 606 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 607 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) { 608 ScratchWaveOffsetReg = Reg; 609 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) 610 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); 611 break; 612 } 613 } 614 } else { 615 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg; 616 } 617 assert(ScratchWaveOffsetReg); 618 619 if (requiresStackPointerReference(MF)) { 620 Register SPReg = MFI->getStackPtrOffsetReg(); 621 assert(SPReg != AMDGPU::SP_REG); 622 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) 623 .addImm(MF.getFrameInfo().getStackSize() * getScratchScaleFactor(ST)); 624 } 625 626 if (hasFP(MF)) { 627 Register FPReg = MFI->getFrameOffsetReg(); 628 assert(FPReg != AMDGPU::FP_REG); 629 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); 630 } 631 632 if (MFI->hasFlatScratchInit() || ScratchRsrcReg) { 633 MRI.addLiveIn(PreloadedScratchWaveOffsetReg); 634 MBB.addLiveIn(PreloadedScratchWaveOffsetReg); 635 } 636 637 if (MFI->hasFlatScratchInit()) { 638 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg); 639 } 640 641 if (ScratchRsrcReg) { 642 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL, 643 PreloadedScratchRsrcReg, 644 ScratchRsrcReg, ScratchWaveOffsetReg); 645 } 646 } 647 648 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg` 649 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup( 650 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 651 const DebugLoc &DL, Register PreloadedScratchRsrcReg, 652 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const { 653 654 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 655 const SIInstrInfo *TII = ST.getInstrInfo(); 656 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 657 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 658 const Function &Fn = MF.getFunction(); 659 660 if (ST.isAmdPalOS()) { 661 // The pointer to the GIT is formed from the offset passed in and either 662 // the amdgpu-git-ptr-high function attribute or the top part of the PC 663 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 664 665 buildGitPtr(MBB, I, DL, TII, Rsrc01); 666 667 // We now have the GIT ptr - now get the scratch descriptor from the entry 668 // at offset 0 (or offset 16 for a compute shader). 669 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 670 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); 671 auto MMO = MF.getMachineMemOperand(PtrInfo, 672 MachineMemOperand::MOLoad | 673 MachineMemOperand::MOInvariant | 674 MachineMemOperand::MODereferenceable, 675 16, Align(4)); 676 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 677 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 678 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 679 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) 680 .addReg(Rsrc01) 681 .addImm(EncodedOffset) // offset 682 .addImm(0) // glc 683 .addImm(0) // dlc 684 .addReg(ScratchRsrcReg, RegState::ImplicitDefine) 685 .addMemOperand(MMO); 686 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) { 687 assert(!ST.isAmdHsaOrMesa(Fn)); 688 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 689 690 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); 691 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); 692 693 // Use relocations to get the pointer, and setup the other bits manually. 694 uint64_t Rsrc23 = TII->getScratchRsrcWords23(); 695 696 if (MFI->hasImplicitBufferPtr()) { 697 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 698 699 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { 700 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); 701 702 BuildMI(MBB, I, DL, Mov64, Rsrc01) 703 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 704 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 705 } else { 706 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 707 708 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 709 auto MMO = MF.getMachineMemOperand( 710 PtrInfo, 711 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 712 MachineMemOperand::MODereferenceable, 713 8, Align(4)); 714 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) 715 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 716 .addImm(0) // offset 717 .addImm(0) // glc 718 .addImm(0) // dlc 719 .addMemOperand(MMO) 720 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 721 722 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 723 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 724 } 725 } else { 726 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 727 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 728 729 BuildMI(MBB, I, DL, SMovB32, Rsrc0) 730 .addExternalSymbol("SCRATCH_RSRC_DWORD0") 731 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 732 733 BuildMI(MBB, I, DL, SMovB32, Rsrc1) 734 .addExternalSymbol("SCRATCH_RSRC_DWORD1") 735 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 736 737 } 738 739 BuildMI(MBB, I, DL, SMovB32, Rsrc2) 740 .addImm(Rsrc23 & 0xffffffff) 741 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 742 743 BuildMI(MBB, I, DL, SMovB32, Rsrc3) 744 .addImm(Rsrc23 >> 32) 745 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 746 } else if (ST.isAmdHsaOrMesa(Fn)) { 747 assert(PreloadedScratchRsrcReg); 748 749 if (ScratchRsrcReg != PreloadedScratchRsrcReg) { 750 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) 751 .addReg(PreloadedScratchRsrcReg, RegState::Kill); 752 } 753 } 754 755 // Add the scratch wave offset into the scratch RSRC. 756 // 757 // We only want to update the first 48 bits, which is the base address 758 // pointer, without touching the adjacent 16 bits of flags. We know this add 759 // cannot carry-out from bit 47, otherwise the scratch allocation would be 760 // impossible to fit in the 48-bit global address space. 761 // 762 // TODO: Evaluate if it is better to just construct an SRD using the flat 763 // scratch init and some constants rather than update the one we are passed. 764 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 765 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 766 767 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in 768 // the kernel body via inreg arguments. 769 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0) 770 .addReg(ScratchRsrcSub0) 771 .addReg(ScratchWaveOffsetReg) 772 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 773 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1) 774 .addReg(ScratchRsrcSub1) 775 .addImm(0) 776 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 777 } 778 779 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { 780 switch (ID) { 781 case TargetStackID::Default: 782 case TargetStackID::NoAlloc: 783 case TargetStackID::SGPRSpill: 784 return true; 785 case TargetStackID::ScalableVector: 786 return false; 787 } 788 llvm_unreachable("Invalid TargetStackID::Value"); 789 } 790 791 // Activate all lanes, returns saved exec. 792 static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, 793 MachineFunction &MF, 794 MachineBasicBlock &MBB, 795 MachineBasicBlock::iterator MBBI, 796 bool IsProlog) { 797 Register ScratchExecCopy; 798 MachineRegisterInfo &MRI = MF.getRegInfo(); 799 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 800 const SIInstrInfo *TII = ST.getInstrInfo(); 801 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 802 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 803 DebugLoc DL; 804 805 if (LiveRegs.empty()) { 806 if (IsProlog) { 807 LiveRegs.init(TRI); 808 LiveRegs.addLiveIns(MBB); 809 if (FuncInfo->SGPRForFPSaveRestoreCopy) 810 LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy); 811 812 if (FuncInfo->SGPRForBPSaveRestoreCopy) 813 LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy); 814 } else { 815 // In epilog. 816 LiveRegs.init(*ST.getRegisterInfo()); 817 LiveRegs.addLiveOuts(MBB); 818 LiveRegs.stepBackward(*MBBI); 819 } 820 } 821 822 ScratchExecCopy = findScratchNonCalleeSaveRegister( 823 MRI, LiveRegs, *TRI.getWaveMaskRegClass()); 824 825 if (!IsProlog) 826 LiveRegs.removeReg(ScratchExecCopy); 827 828 const unsigned OrSaveExec = 829 ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; 830 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1); 831 832 return ScratchExecCopy; 833 } 834 835 void SIFrameLowering::emitPrologue(MachineFunction &MF, 836 MachineBasicBlock &MBB) const { 837 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 838 if (FuncInfo->isEntryFunction()) { 839 emitEntryFunctionPrologue(MF, MBB); 840 return; 841 } 842 843 const MachineFrameInfo &MFI = MF.getFrameInfo(); 844 MachineRegisterInfo &MRI = MF.getRegInfo(); 845 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 846 const SIInstrInfo *TII = ST.getInstrInfo(); 847 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 848 849 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 850 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 851 Register BasePtrReg = 852 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); 853 LivePhysRegs LiveRegs; 854 855 MachineBasicBlock::iterator MBBI = MBB.begin(); 856 DebugLoc DL; 857 858 bool HasFP = false; 859 bool HasBP = false; 860 uint32_t NumBytes = MFI.getStackSize(); 861 uint32_t RoundedSize = NumBytes; 862 // To avoid clobbering VGPRs in lanes that weren't active on function entry, 863 // turn on all lanes before doing the spill to memory. 864 Register ScratchExecCopy; 865 866 bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); 867 bool SpillFPToMemory = false; 868 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. 869 // Otherwise we are spilling the FP to memory. 870 if (HasFPSaveIndex) { 871 SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) != 872 TargetStackID::SGPRSpill; 873 } 874 875 bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue(); 876 bool SpillBPToMemory = false; 877 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. 878 // Otherwise we are spilling the BP to memory. 879 if (HasBPSaveIndex) { 880 SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) != 881 TargetStackID::SGPRSpill; 882 } 883 884 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg 885 : FuncInfo->getSGPRSpillVGPRs()) { 886 if (!Reg.FI.hasValue()) 887 continue; 888 889 if (!ScratchExecCopy) 890 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 891 892 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR, 893 FuncInfo->getScratchRSrcReg(), 894 StackPtrReg, 895 Reg.FI.getValue()); 896 } 897 898 if (HasFPSaveIndex && SpillFPToMemory) { 899 assert(!MFI.isDeadObjectIndex(FuncInfo->FramePointerSaveIndex.getValue())); 900 901 if (!ScratchExecCopy) 902 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 903 904 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( 905 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 906 907 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 908 .addReg(FramePtrReg); 909 910 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR, 911 FuncInfo->getScratchRSrcReg(), StackPtrReg, 912 FuncInfo->FramePointerSaveIndex.getValue()); 913 } 914 915 if (HasBPSaveIndex && SpillBPToMemory) { 916 assert(!MFI.isDeadObjectIndex(*FuncInfo->BasePointerSaveIndex)); 917 918 if (!ScratchExecCopy) 919 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 920 921 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( 922 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 923 924 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 925 .addReg(BasePtrReg); 926 927 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR, 928 FuncInfo->getScratchRSrcReg(), StackPtrReg, 929 *FuncInfo->BasePointerSaveIndex); 930 } 931 932 if (ScratchExecCopy) { 933 // FIXME: Split block and make terminator. 934 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 935 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 936 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 937 .addReg(ScratchExecCopy, RegState::Kill); 938 LiveRegs.addReg(ScratchExecCopy); 939 } 940 941 // In this case, spill the FP to a reserved VGPR. 942 if (HasFPSaveIndex && !SpillFPToMemory) { 943 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 944 assert(!MFI.isDeadObjectIndex(FI)); 945 946 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 947 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 948 FuncInfo->getSGPRToVGPRSpills(FI); 949 assert(Spill.size() == 1); 950 951 // Save FP before setting it up. 952 // FIXME: This should respect spillSGPRToVGPR; 953 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) 954 .addReg(FramePtrReg) 955 .addImm(Spill[0].Lane) 956 .addReg(Spill[0].VGPR, RegState::Undef); 957 } 958 959 // In this case, spill the BP to a reserved VGPR. 960 if (HasBPSaveIndex && !SpillBPToMemory) { 961 const int BasePtrFI = *FuncInfo->BasePointerSaveIndex; 962 assert(!MFI.isDeadObjectIndex(BasePtrFI)); 963 964 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); 965 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 966 FuncInfo->getSGPRToVGPRSpills(BasePtrFI); 967 assert(Spill.size() == 1); 968 969 // Save BP before setting it up. 970 // FIXME: This should respect spillSGPRToVGPR; 971 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) 972 .addReg(BasePtrReg) 973 .addImm(Spill[0].Lane) 974 .addReg(Spill[0].VGPR, RegState::Undef); 975 } 976 977 // Emit the copy if we need an FP, and are using a free SGPR to save it. 978 if (FuncInfo->SGPRForFPSaveRestoreCopy) { 979 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), 980 FuncInfo->SGPRForFPSaveRestoreCopy) 981 .addReg(FramePtrReg) 982 .setMIFlag(MachineInstr::FrameSetup); 983 } 984 985 // Emit the copy if we need a BP, and are using a free SGPR to save it. 986 if (FuncInfo->SGPRForBPSaveRestoreCopy) { 987 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), 988 FuncInfo->SGPRForBPSaveRestoreCopy) 989 .addReg(BasePtrReg) 990 .setMIFlag(MachineInstr::FrameSetup); 991 } 992 993 // If a copy has been emitted for FP and/or BP, Make the SGPRs 994 // used in the copy instructions live throughout the function. 995 SmallVector<MCPhysReg, 2> TempSGPRs; 996 if (FuncInfo->SGPRForFPSaveRestoreCopy) 997 TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy); 998 999 if (FuncInfo->SGPRForBPSaveRestoreCopy) 1000 TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy); 1001 1002 if (!TempSGPRs.empty()) { 1003 for (MachineBasicBlock &MBB : MF) { 1004 for (MCPhysReg Reg : TempSGPRs) 1005 MBB.addLiveIn(Reg); 1006 1007 MBB.sortUniqueLiveIns(); 1008 } 1009 if (!LiveRegs.empty()) { 1010 LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); 1011 LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy); 1012 } 1013 } 1014 1015 if (TRI.needsStackRealignment(MF)) { 1016 HasFP = true; 1017 const unsigned Alignment = MFI.getMaxAlign().value(); 1018 1019 RoundedSize += Alignment; 1020 if (LiveRegs.empty()) { 1021 LiveRegs.init(TRI); 1022 LiveRegs.addLiveIns(MBB); 1023 } 1024 1025 // s_add_u32 s33, s32, NumBytes 1026 // s_and_b32 s33, s33, 0b111...0000 1027 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), FramePtrReg) 1028 .addReg(StackPtrReg) 1029 .addImm((Alignment - 1) * getScratchScaleFactor(ST)) 1030 .setMIFlag(MachineInstr::FrameSetup); 1031 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) 1032 .addReg(FramePtrReg, RegState::Kill) 1033 .addImm(-Alignment * getScratchScaleFactor(ST)) 1034 .setMIFlag(MachineInstr::FrameSetup); 1035 FuncInfo->setIsStackRealigned(true); 1036 } else if ((HasFP = hasFP(MF))) { 1037 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 1038 .addReg(StackPtrReg) 1039 .setMIFlag(MachineInstr::FrameSetup); 1040 } 1041 1042 // If we need a base pointer, set it up here. It's whatever the value of 1043 // the stack pointer is at this point. Any variable size objects will be 1044 // allocated after this, so we can still use the base pointer to reference 1045 // the incoming arguments. 1046 if ((HasBP = TRI.hasBasePointer(MF))) { 1047 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) 1048 .addReg(StackPtrReg) 1049 .setMIFlag(MachineInstr::FrameSetup); 1050 } 1051 1052 if (HasFP && RoundedSize != 0) { 1053 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) 1054 .addReg(StackPtrReg) 1055 .addImm(RoundedSize * getScratchScaleFactor(ST)) 1056 .setMIFlag(MachineInstr::FrameSetup); 1057 } 1058 1059 assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy || 1060 FuncInfo->FramePointerSaveIndex)) && 1061 "Needed to save FP but didn't save it anywhere"); 1062 1063 assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy && 1064 !FuncInfo->FramePointerSaveIndex)) && 1065 "Saved FP but didn't need it"); 1066 1067 assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy || 1068 FuncInfo->BasePointerSaveIndex)) && 1069 "Needed to save BP but didn't save it anywhere"); 1070 1071 assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy && 1072 !FuncInfo->BasePointerSaveIndex)) && 1073 "Saved BP but didn't need it"); 1074 } 1075 1076 void SIFrameLowering::emitEpilogue(MachineFunction &MF, 1077 MachineBasicBlock &MBB) const { 1078 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1079 if (FuncInfo->isEntryFunction()) 1080 return; 1081 1082 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1083 const SIInstrInfo *TII = ST.getInstrInfo(); 1084 MachineRegisterInfo &MRI = MF.getRegInfo(); 1085 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 1086 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1087 LivePhysRegs LiveRegs; 1088 DebugLoc DL; 1089 1090 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1091 uint32_t NumBytes = MFI.getStackSize(); 1092 uint32_t RoundedSize = FuncInfo->isStackRealigned() 1093 ? NumBytes + MFI.getMaxAlign().value() 1094 : NumBytes; 1095 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 1096 const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 1097 const Register BasePtrReg = 1098 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); 1099 1100 bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); 1101 bool SpillFPToMemory = false; 1102 if (HasFPSaveIndex) { 1103 SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) != 1104 TargetStackID::SGPRSpill; 1105 } 1106 1107 bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue(); 1108 bool SpillBPToMemory = false; 1109 if (HasBPSaveIndex) { 1110 SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) != 1111 TargetStackID::SGPRSpill; 1112 } 1113 1114 if (RoundedSize != 0 && hasFP(MF)) { 1115 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) 1116 .addReg(StackPtrReg) 1117 .addImm(RoundedSize * getScratchScaleFactor(ST)) 1118 .setMIFlag(MachineInstr::FrameDestroy); 1119 } 1120 1121 if (FuncInfo->SGPRForFPSaveRestoreCopy) { 1122 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 1123 .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) 1124 .setMIFlag(MachineInstr::FrameDestroy); 1125 } 1126 1127 if (FuncInfo->SGPRForBPSaveRestoreCopy) { 1128 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) 1129 .addReg(FuncInfo->SGPRForBPSaveRestoreCopy) 1130 .setMIFlag(MachineInstr::FrameDestroy); 1131 } 1132 1133 Register ScratchExecCopy; 1134 if (HasFPSaveIndex) { 1135 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 1136 assert(!MFI.isDeadObjectIndex(FI)); 1137 if (SpillFPToMemory) { 1138 if (!ScratchExecCopy) 1139 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1140 1141 MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister( 1142 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 1143 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR, 1144 FuncInfo->getScratchRSrcReg(), StackPtrReg, FI); 1145 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) 1146 .addReg(TempVGPR, RegState::Kill); 1147 } else { 1148 // Reload from VGPR spill. 1149 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 1150 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 1151 FuncInfo->getSGPRToVGPRSpills(FI); 1152 assert(Spill.size() == 1); 1153 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg) 1154 .addReg(Spill[0].VGPR) 1155 .addImm(Spill[0].Lane); 1156 } 1157 } 1158 1159 if (HasBPSaveIndex) { 1160 const int BasePtrFI = *FuncInfo->BasePointerSaveIndex; 1161 assert(!MFI.isDeadObjectIndex(BasePtrFI)); 1162 if (SpillBPToMemory) { 1163 if (!ScratchExecCopy) 1164 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1165 1166 MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister( 1167 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 1168 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR, 1169 FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI); 1170 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) 1171 .addReg(TempVGPR, RegState::Kill); 1172 } else { 1173 // Reload from VGPR spill. 1174 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); 1175 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 1176 FuncInfo->getSGPRToVGPRSpills(BasePtrFI); 1177 assert(Spill.size() == 1); 1178 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg) 1179 .addReg(Spill[0].VGPR) 1180 .addImm(Spill[0].Lane); 1181 } 1182 } 1183 1184 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg : 1185 FuncInfo->getSGPRSpillVGPRs()) { 1186 if (!Reg.FI.hasValue()) 1187 continue; 1188 1189 if (!ScratchExecCopy) 1190 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1191 1192 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR, 1193 FuncInfo->getScratchRSrcReg(), StackPtrReg, 1194 Reg.FI.getValue()); 1195 } 1196 1197 if (ScratchExecCopy) { 1198 // FIXME: Split block and make terminator. 1199 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 1200 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 1201 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 1202 .addReg(ScratchExecCopy, RegState::Kill); 1203 } 1204 } 1205 1206 #ifndef NDEBUG 1207 static bool allSGPRSpillsAreDead(const MachineFunction &MF) { 1208 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1209 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1210 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 1211 I != E; ++I) { 1212 if (!MFI.isDeadObjectIndex(I) && 1213 MFI.getStackID(I) == TargetStackID::SGPRSpill && 1214 (I != FuncInfo->FramePointerSaveIndex && 1215 I != FuncInfo->BasePointerSaveIndex)) { 1216 return false; 1217 } 1218 } 1219 1220 return true; 1221 } 1222 #endif 1223 1224 StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, 1225 int FI, 1226 Register &FrameReg) const { 1227 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 1228 1229 FrameReg = RI->getFrameRegister(MF); 1230 return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI)); 1231 } 1232 1233 void SIFrameLowering::processFunctionBeforeFrameFinalized( 1234 MachineFunction &MF, 1235 RegScavenger *RS) const { 1236 MachineFrameInfo &MFI = MF.getFrameInfo(); 1237 1238 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1239 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1240 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1241 1242 FuncInfo->removeDeadFrameIndices(MFI); 1243 assert(allSGPRSpillsAreDead(MF) && 1244 "SGPR spill should have been removed in SILowerSGPRSpills"); 1245 1246 // FIXME: The other checks should be redundant with allStackObjectsAreDead, 1247 // but currently hasNonSpillStackObjects is set only from source 1248 // allocas. Stack temps produced from legalization are not counted currently. 1249 if (!allStackObjectsAreDead(MFI)) { 1250 assert(RS && "RegScavenger required if spilling"); 1251 1252 if (FuncInfo->isEntryFunction()) { 1253 int ScavengeFI = MFI.CreateFixedObject( 1254 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); 1255 RS->addScavengingFrameIndex(ScavengeFI); 1256 } else { 1257 int ScavengeFI = MFI.CreateStackObject( 1258 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 1259 TRI->getSpillAlign(AMDGPU::SGPR_32RegClass), false); 1260 RS->addScavengingFrameIndex(ScavengeFI); 1261 } 1262 } 1263 } 1264 1265 // Only report VGPRs to generic code. 1266 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, 1267 BitVector &SavedVGPRs, 1268 RegScavenger *RS) const { 1269 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); 1270 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1271 if (MFI->isEntryFunction()) 1272 return; 1273 1274 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 1275 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1276 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1277 1278 // Ignore the SGPRs the default implementation found. 1279 SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask()); 1280 1281 // hasFP only knows about stack objects that already exist. We're now 1282 // determining the stack slots that will be created, so we have to predict 1283 // them. Stack objects force FP usage with calls. 1284 // 1285 // Note a new VGPR CSR may be introduced if one is used for the spill, but we 1286 // don't want to report it here. 1287 // 1288 // FIXME: Is this really hasReservedCallFrame? 1289 const bool WillHaveFP = 1290 FrameInfo.hasCalls() && 1291 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); 1292 1293 // VGPRs used for SGPR spilling need to be specially inserted in the prolog, 1294 // so don't allow the default insertion to handle them. 1295 for (auto SSpill : MFI->getSGPRSpillVGPRs()) 1296 SavedVGPRs.reset(SSpill.VGPR); 1297 1298 LivePhysRegs LiveRegs; 1299 LiveRegs.init(*TRI); 1300 1301 if (WillHaveFP || hasFP(MF)) { 1302 assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex && 1303 "Re-reserving spill slot for FP"); 1304 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy, 1305 MFI->FramePointerSaveIndex, true); 1306 } 1307 1308 if (TRI->hasBasePointer(MF)) { 1309 if (MFI->SGPRForFPSaveRestoreCopy) 1310 LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy); 1311 1312 assert(!MFI->SGPRForBPSaveRestoreCopy && 1313 !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP"); 1314 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy, 1315 MFI->BasePointerSaveIndex, false); 1316 } 1317 } 1318 1319 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, 1320 BitVector &SavedRegs, 1321 RegScavenger *RS) const { 1322 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1323 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1324 if (MFI->isEntryFunction()) 1325 return; 1326 1327 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1328 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1329 1330 // The SP is specifically managed and we don't want extra spills of it. 1331 SavedRegs.reset(MFI->getStackPtrOffsetReg()); 1332 1333 const BitVector AllSavedRegs = SavedRegs; 1334 SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask()); 1335 1336 // If clearing VGPRs changed the mask, we will have some CSR VGPR spills. 1337 const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs; 1338 1339 // We have to anticipate introducing CSR VGPR spills if we don't have any 1340 // stack objects already, since we require an FP if there is a call and stack. 1341 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 1342 const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR; 1343 1344 // FP will be specially managed like SP. 1345 if (WillHaveFP || hasFP(MF)) 1346 SavedRegs.reset(MFI->getFrameOffsetReg()); 1347 } 1348 1349 bool SIFrameLowering::assignCalleeSavedSpillSlots( 1350 MachineFunction &MF, const TargetRegisterInfo *TRI, 1351 std::vector<CalleeSavedInfo> &CSI) const { 1352 if (CSI.empty()) 1353 return true; // Early exit if no callee saved registers are modified! 1354 1355 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1356 if (!FuncInfo->SGPRForFPSaveRestoreCopy && 1357 !FuncInfo->SGPRForBPSaveRestoreCopy) 1358 return false; 1359 1360 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1361 const SIRegisterInfo *RI = ST.getRegisterInfo(); 1362 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 1363 Register BasePtrReg = RI->getBaseRegister(); 1364 unsigned NumModifiedRegs = 0; 1365 1366 if (FuncInfo->SGPRForFPSaveRestoreCopy) 1367 NumModifiedRegs++; 1368 if (FuncInfo->SGPRForBPSaveRestoreCopy) 1369 NumModifiedRegs++; 1370 1371 for (auto &CS : CSI) { 1372 if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) { 1373 CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); 1374 if (--NumModifiedRegs) 1375 break; 1376 } else if (CS.getReg() == BasePtrReg && 1377 FuncInfo->SGPRForBPSaveRestoreCopy) { 1378 CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy); 1379 if (--NumModifiedRegs) 1380 break; 1381 } 1382 } 1383 1384 return false; 1385 } 1386 1387 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( 1388 MachineFunction &MF, 1389 MachineBasicBlock &MBB, 1390 MachineBasicBlock::iterator I) const { 1391 int64_t Amount = I->getOperand(0).getImm(); 1392 if (Amount == 0) 1393 return MBB.erase(I); 1394 1395 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1396 const SIInstrInfo *TII = ST.getInstrInfo(); 1397 const DebugLoc &DL = I->getDebugLoc(); 1398 unsigned Opc = I->getOpcode(); 1399 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 1400 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 1401 1402 if (!hasReservedCallFrame(MF)) { 1403 Amount = alignTo(Amount, getStackAlign()); 1404 assert(isUInt<32>(Amount) && "exceeded stack address space size"); 1405 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1406 Register SPReg = MFI->getStackPtrOffsetReg(); 1407 1408 unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 1409 BuildMI(MBB, I, DL, TII->get(Op), SPReg) 1410 .addReg(SPReg) 1411 .addImm(Amount * getScratchScaleFactor(ST)); 1412 } else if (CalleePopAmount != 0) { 1413 llvm_unreachable("is this used?"); 1414 } 1415 1416 return MBB.erase(I); 1417 } 1418 1419 /// Returns true if the frame will require a reference to the stack pointer. 1420 /// 1421 /// This is the set of conditions common to setting up the stack pointer in a 1422 /// kernel, and for using a frame pointer in a callable function. 1423 /// 1424 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm 1425 /// references SP. 1426 static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) { 1427 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint(); 1428 } 1429 1430 // The FP for kernels is always known 0, so we never really need to setup an 1431 // explicit register for it. However, DisableFramePointerElim will force us to 1432 // use a register for it. 1433 bool SIFrameLowering::hasFP(const MachineFunction &MF) const { 1434 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1435 1436 // For entry functions we can use an immediate offset in most cases, so the 1437 // presence of calls doesn't imply we need a distinct frame pointer. 1438 if (MFI.hasCalls() && 1439 !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) { 1440 // All offsets are unsigned, so need to be addressed in the same direction 1441 // as stack growth. 1442 1443 // FIXME: This function is pretty broken, since it can be called before the 1444 // frame layout is determined or CSR spills are inserted. 1445 return MFI.getStackSize() != 0; 1446 } 1447 1448 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() || 1449 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) || 1450 MF.getTarget().Options.DisableFramePointerElim(MF); 1451 } 1452 1453 // This is essentially a reduced version of hasFP for entry functions. Since the 1454 // stack pointer is known 0 on entry to kernels, we never really need an FP 1455 // register. We may need to initialize the stack pointer depending on the frame 1456 // properties, which logically overlaps many of the cases where an ordinary 1457 // function would require an FP. 1458 bool SIFrameLowering::requiresStackPointerReference( 1459 const MachineFunction &MF) const { 1460 // Callable functions always require a stack pointer reference. 1461 assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() && 1462 "only expected to call this for entry points"); 1463 1464 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1465 1466 // Entry points ordinarily don't need to initialize SP. We have to set it up 1467 // for callees if there are any. Also note tail calls are impossible/don't 1468 // make any sense for kernels. 1469 if (MFI.hasCalls()) 1470 return true; 1471 1472 // We still need to initialize the SP if we're doing anything weird that 1473 // references the SP, like variable sized stack objects. 1474 return frameTriviallyRequiresSP(MFI); 1475 } 1476