1 //===----------------------- SIFrameLowering.cpp --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 9 #include "SIFrameLowering.h" 10 #include "AMDGPUSubtarget.h" 11 #include "SIInstrInfo.h" 12 #include "SIMachineFunctionInfo.h" 13 #include "SIRegisterInfo.h" 14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 15 16 #include "llvm/CodeGen/LivePhysRegs.h" 17 #include "llvm/CodeGen/MachineFrameInfo.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/RegisterScavenging.h" 21 22 using namespace llvm; 23 24 #define DEBUG_TYPE "frame-info" 25 26 27 static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST, 28 const MachineFunction &MF) { 29 return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(), 30 ST.getMaxNumSGPRs(MF) / 4); 31 } 32 33 // Find a scratch register that we can use at the start of the prologue to 34 // re-align the stack pointer. We avoid using callee-save registers since they 35 // may appear to be free when this is called from canUseAsPrologue (during 36 // shrink wrapping), but then no longer be free when this is called from 37 // emitPrologue. 38 // 39 // FIXME: This is a bit conservative, since in the above case we could use one 40 // of the callee-save registers as a scratch temp to re-align the stack pointer, 41 // but we would then have to make sure that we were in fact saving at least one 42 // callee-save register in the prologue, which is additional complexity that 43 // doesn't seem worth the benefit. 44 static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, 45 LivePhysRegs &LiveRegs, 46 const TargetRegisterClass &RC, 47 bool Unused = false) { 48 // Mark callee saved registers as used so we will not choose them. 49 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 50 for (unsigned i = 0; CSRegs[i]; ++i) 51 LiveRegs.addReg(CSRegs[i]); 52 53 if (Unused) { 54 // We are looking for a register that can be used throughout the entire 55 // function, so any use is unacceptable. 56 for (unsigned Reg : RC) { 57 if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) 58 return Reg; 59 } 60 } else { 61 for (unsigned Reg : RC) { 62 if (LiveRegs.available(MRI, Reg)) 63 return Reg; 64 } 65 } 66 67 // If we require an unused register, this is used in contexts where failure is 68 // an option and has an alternative plan. In other contexts, this must 69 // succeed0. 70 if (!Unused) 71 report_fatal_error("failed to find free scratch register"); 72 73 return AMDGPU::NoRegister; 74 } 75 76 static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) { 77 LivePhysRegs LiveRegs; 78 LiveRegs.init(*MRI.getTargetRegisterInfo()); 79 return findScratchNonCalleeSaveRegister( 80 MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); 81 } 82 83 // We need to specially emit stack operations here because a different frame 84 // register is used than in the rest of the function, as getFrameRegister would 85 // use. 86 static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, 87 MachineBasicBlock::iterator I, 88 const SIInstrInfo *TII, unsigned SpillReg, 89 unsigned ScratchRsrcReg, unsigned SPReg, int FI) { 90 MachineFunction *MF = MBB.getParent(); 91 MachineFrameInfo &MFI = MF->getFrameInfo(); 92 93 int64_t Offset = MFI.getObjectOffset(FI); 94 95 MachineMemOperand *MMO = MF->getMachineMemOperand( 96 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4, 97 MFI.getObjectAlign(FI)); 98 99 if (isUInt<12>(Offset)) { 100 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET)) 101 .addReg(SpillReg, RegState::Kill) 102 .addReg(ScratchRsrcReg) 103 .addReg(SPReg) 104 .addImm(Offset) 105 .addImm(0) // glc 106 .addImm(0) // slc 107 .addImm(0) // tfe 108 .addImm(0) // dlc 109 .addImm(0) // swz 110 .addMemOperand(MMO); 111 return; 112 } 113 114 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 115 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 116 117 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 118 .addImm(Offset); 119 120 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN)) 121 .addReg(SpillReg, RegState::Kill) 122 .addReg(OffsetReg, RegState::Kill) 123 .addReg(ScratchRsrcReg) 124 .addReg(SPReg) 125 .addImm(0) 126 .addImm(0) // glc 127 .addImm(0) // slc 128 .addImm(0) // tfe 129 .addImm(0) // dlc 130 .addImm(0) // swz 131 .addMemOperand(MMO); 132 } 133 134 static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, 135 MachineBasicBlock::iterator I, 136 const SIInstrInfo *TII, unsigned SpillReg, 137 unsigned ScratchRsrcReg, unsigned SPReg, int FI) { 138 MachineFunction *MF = MBB.getParent(); 139 MachineFrameInfo &MFI = MF->getFrameInfo(); 140 int64_t Offset = MFI.getObjectOffset(FI); 141 142 MachineMemOperand *MMO = MF->getMachineMemOperand( 143 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4, 144 MFI.getObjectAlign(FI)); 145 146 if (isUInt<12>(Offset)) { 147 BuildMI(MBB, I, DebugLoc(), 148 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg) 149 .addReg(ScratchRsrcReg) 150 .addReg(SPReg) 151 .addImm(Offset) 152 .addImm(0) // glc 153 .addImm(0) // slc 154 .addImm(0) // tfe 155 .addImm(0) // dlc 156 .addImm(0) // swz 157 .addMemOperand(MMO); 158 return; 159 } 160 161 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 162 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 163 164 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 165 .addImm(Offset); 166 167 BuildMI(MBB, I, DebugLoc(), 168 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg) 169 .addReg(OffsetReg, RegState::Kill) 170 .addReg(ScratchRsrcReg) 171 .addReg(SPReg) 172 .addImm(0) 173 .addImm(0) // glc 174 .addImm(0) // slc 175 .addImm(0) // tfe 176 .addImm(0) // dlc 177 .addImm(0) // swz 178 .addMemOperand(MMO); 179 } 180 181 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` 182 void SIFrameLowering::emitEntryFunctionFlatScratchInit( 183 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 184 const DebugLoc &DL, Register ScratchWaveOffsetReg) const { 185 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 186 const SIInstrInfo *TII = ST.getInstrInfo(); 187 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 188 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 189 190 // We don't need this if we only have spills since there is no user facing 191 // scratch. 192 193 // TODO: If we know we don't have flat instructions earlier, we can omit 194 // this from the input registers. 195 // 196 // TODO: We only need to know if we access scratch space through a flat 197 // pointer. Because we only detect if flat instructions are used at all, 198 // this will be used more often than necessary on VI. 199 200 Register FlatScratchInitReg = 201 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); 202 203 MachineRegisterInfo &MRI = MF.getRegInfo(); 204 MRI.addLiveIn(FlatScratchInitReg); 205 MBB.addLiveIn(FlatScratchInitReg); 206 207 Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); 208 Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); 209 210 // Do a 64-bit pointer add. 211 if (ST.flatScratchIsPointer()) { 212 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 213 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 214 .addReg(FlatScrInitLo) 215 .addReg(ScratchWaveOffsetReg); 216 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) 217 .addReg(FlatScrInitHi) 218 .addImm(0); 219 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 220 addReg(FlatScrInitLo). 221 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | 222 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 223 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 224 addReg(FlatScrInitHi). 225 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | 226 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 227 return; 228 } 229 230 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) 231 .addReg(FlatScrInitLo) 232 .addReg(ScratchWaveOffsetReg); 233 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI) 234 .addReg(FlatScrInitHi) 235 .addImm(0); 236 237 return; 238 } 239 240 assert(ST.getGeneration() < AMDGPUSubtarget::GFX10); 241 242 // Copy the size in bytes. 243 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) 244 .addReg(FlatScrInitHi, RegState::Kill); 245 246 // Add wave offset in bytes to private base offset. 247 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. 248 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 249 .addReg(FlatScrInitLo) 250 .addReg(ScratchWaveOffsetReg); 251 252 // Convert offset to 256-byte units. 253 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) 254 .addReg(FlatScrInitLo, RegState::Kill) 255 .addImm(8); 256 } 257 258 // Shift down registers reserved for the scratch RSRC. 259 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( 260 MachineFunction &MF, Register ScratchWaveOffsetReg) const { 261 262 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 263 const SIInstrInfo *TII = ST.getInstrInfo(); 264 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 265 MachineRegisterInfo &MRI = MF.getRegInfo(); 266 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 267 268 assert(MFI->isEntryFunction()); 269 270 Register ScratchRsrcReg = MFI->getScratchRSrcReg(); 271 272 if (ScratchRsrcReg == AMDGPU::NoRegister || 273 !MRI.isPhysRegUsed(ScratchRsrcReg)) 274 return AMDGPU::NoRegister; 275 276 if (ST.hasSGPRInitBug() || 277 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) 278 return ScratchRsrcReg; 279 280 // We reserved the last registers for this. Shift it down to the end of those 281 // which were actually used. 282 // 283 // FIXME: It might be safer to use a pseudoregister before replacement. 284 285 // FIXME: We should be able to eliminate unused input registers. We only 286 // cannot do this for the resources required for scratch access. For now we 287 // skip over user SGPRs and may leave unused holes. 288 289 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; 290 ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF); 291 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); 292 293 // Skip the last N reserved elements because they should have already been 294 // reserved for VCC etc. 295 for (MCPhysReg Reg : AllSGPR128s) { 296 // Pick the first unallocated one. Make sure we don't clobber the other 297 // reserved input we needed. 298 // 299 // FIXME: The preloaded SGPR count is not accurate for shaders as the 300 // scratch wave offset may be in a fixed SGPR or 301 // SITargetLowering::allocateSystemSGPRs may choose some free SGPR for the 302 // scratch wave offset. We explicitly avoid the scratch wave offset to 303 // account for this. 304 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 305 !TRI->isSubRegisterEq(Reg, ScratchWaveOffsetReg)) { 306 MRI.replaceRegWith(ScratchRsrcReg, Reg); 307 MFI->setScratchRSrcReg(Reg); 308 return Reg; 309 } 310 } 311 312 return ScratchRsrcReg; 313 } 314 315 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, 316 MachineBasicBlock &MBB) const { 317 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); 318 319 // FIXME: If we only have SGPR spills, we won't actually be using scratch 320 // memory since these spill to VGPRs. We should be cleaning up these unused 321 // SGPR spill frame indices somewhere. 322 323 // FIXME: We still have implicit uses on SGPR spill instructions in case they 324 // need to spill to vector memory. It's likely that will not happen, but at 325 // this point it appears we need the setup. This part of the prolog should be 326 // emitted after frame indices are eliminated. 327 328 // FIXME: Remove all of the isPhysRegUsed checks 329 330 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 331 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 332 const SIInstrInfo *TII = ST.getInstrInfo(); 333 MachineRegisterInfo &MRI = MF.getRegInfo(); 334 const Function &F = MF.getFunction(); 335 336 assert(MFI->isEntryFunction()); 337 338 Register ScratchWaveOffsetReg = MFI->getPreloadedReg( 339 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); 340 // FIXME: Hack to not crash in situations which emitted an error. 341 if (ScratchWaveOffsetReg == AMDGPU::NoRegister) 342 return; 343 344 // We need to do the replacement of the private segment buffer register even 345 // if there are no stack objects. There could be stores to undef or a 346 // constant without an associated object. 347 // 348 // This will return `AMDGPU::NoRegister` in cases where there are no actual 349 // uses of the SRSRC. 350 Register ScratchRsrcReg = 351 getEntryFunctionReservedScratchRsrcReg(MF, ScratchWaveOffsetReg); 352 353 // Make the selected register live throughout the function. 354 if (ScratchRsrcReg != AMDGPU::NoRegister) { 355 for (MachineBasicBlock &OtherBB : MF) { 356 if (&OtherBB != &MBB) { 357 OtherBB.addLiveIn(ScratchRsrcReg); 358 } 359 } 360 } 361 362 // Now that we have fixed the reserved SRSRC we need to locate the 363 // (potentially) preloaded SRSRC. 364 Register PreloadedScratchRsrcReg = AMDGPU::NoRegister; 365 if (ST.isAmdHsaOrMesa(F)) { 366 PreloadedScratchRsrcReg = 367 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); 368 if (ScratchRsrcReg != AMDGPU::NoRegister && 369 PreloadedScratchRsrcReg != AMDGPU::NoRegister) { 370 // We added live-ins during argument lowering, but since they were not 371 // used they were deleted. We're adding the uses now, so add them back. 372 MRI.addLiveIn(PreloadedScratchRsrcReg); 373 MBB.addLiveIn(PreloadedScratchRsrcReg); 374 } 375 } 376 377 // Debug location must be unknown since the first debug location is used to 378 // determine the end of the prologue. 379 DebugLoc DL; 380 MachineBasicBlock::iterator I = MBB.begin(); 381 382 if (MF.getFrameInfo().hasCalls()) { 383 Register SPReg = MFI->getStackPtrOffsetReg(); 384 assert(SPReg != AMDGPU::SP_REG); 385 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) 386 .addImm(MF.getFrameInfo().getStackSize() * ST.getWavefrontSize()); 387 } 388 389 if (hasFP(MF)) { 390 Register FPReg = MFI->getFrameOffsetReg(); 391 assert(FPReg != AMDGPU::FP_REG); 392 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); 393 } 394 395 if (MFI->hasFlatScratchInit() || ScratchRsrcReg != AMDGPU::NoRegister) { 396 MRI.addLiveIn(ScratchWaveOffsetReg); 397 MBB.addLiveIn(ScratchWaveOffsetReg); 398 } 399 400 if (MFI->hasFlatScratchInit()) { 401 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg); 402 } 403 404 if (ScratchRsrcReg != AMDGPU::NoRegister) { 405 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL, 406 PreloadedScratchRsrcReg, 407 ScratchRsrcReg, ScratchWaveOffsetReg); 408 } 409 } 410 411 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg` 412 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup( 413 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 414 const DebugLoc &DL, Register PreloadedScratchRsrcReg, 415 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const { 416 417 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 418 const SIInstrInfo *TII = ST.getInstrInfo(); 419 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 420 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 421 const Function &Fn = MF.getFunction(); 422 423 if (ST.isAmdPalOS()) { 424 // The pointer to the GIT is formed from the offset passed in and either 425 // the amdgpu-git-ptr-high function attribute or the top part of the PC 426 Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 427 Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 428 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 429 430 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 431 432 if (MFI->getGITPtrHigh() != 0xffffffff) { 433 BuildMI(MBB, I, DL, SMovB32, RsrcHi) 434 .addImm(MFI->getGITPtrHigh()) 435 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 436 } else { 437 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); 438 BuildMI(MBB, I, DL, GetPC64, Rsrc01); 439 } 440 auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in 441 if (ST.hasMergedShaders()) { 442 switch (MF.getFunction().getCallingConv()) { 443 case CallingConv::AMDGPU_HS: 444 case CallingConv::AMDGPU_GS: 445 // Low GIT address is passed in s8 rather than s0 for an LS+HS or 446 // ES+GS merged shader on gfx9+. 447 GitPtrLo = AMDGPU::SGPR8; 448 break; 449 default: 450 break; 451 } 452 } 453 MF.getRegInfo().addLiveIn(GitPtrLo); 454 MBB.addLiveIn(GitPtrLo); 455 BuildMI(MBB, I, DL, SMovB32, RsrcLo) 456 .addReg(GitPtrLo) 457 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 458 459 // We now have the GIT ptr - now get the scratch descriptor from the entry 460 // at offset 0 (or offset 16 for a compute shader). 461 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 462 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); 463 auto MMO = MF.getMachineMemOperand(PtrInfo, 464 MachineMemOperand::MOLoad | 465 MachineMemOperand::MOInvariant | 466 MachineMemOperand::MODereferenceable, 467 16, Align(4)); 468 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 469 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 470 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 471 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) 472 .addReg(Rsrc01) 473 .addImm(EncodedOffset) // offset 474 .addImm(0) // glc 475 .addImm(0) // dlc 476 .addReg(ScratchRsrcReg, RegState::ImplicitDefine) 477 .addMemOperand(MMO); 478 } else if (ST.isMesaGfxShader(Fn) || 479 (PreloadedScratchRsrcReg == AMDGPU::NoRegister)) { 480 assert(!ST.isAmdHsaOrMesa(Fn)); 481 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 482 483 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); 484 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); 485 486 // Use relocations to get the pointer, and setup the other bits manually. 487 uint64_t Rsrc23 = TII->getScratchRsrcWords23(); 488 489 if (MFI->hasImplicitBufferPtr()) { 490 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 491 492 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { 493 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); 494 495 BuildMI(MBB, I, DL, Mov64, Rsrc01) 496 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 497 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 498 } else { 499 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 500 501 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 502 auto MMO = MF.getMachineMemOperand( 503 PtrInfo, 504 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 505 MachineMemOperand::MODereferenceable, 506 8, Align(4)); 507 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) 508 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 509 .addImm(0) // offset 510 .addImm(0) // glc 511 .addImm(0) // dlc 512 .addMemOperand(MMO) 513 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 514 515 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 516 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 517 } 518 } else { 519 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 520 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 521 522 BuildMI(MBB, I, DL, SMovB32, Rsrc0) 523 .addExternalSymbol("SCRATCH_RSRC_DWORD0") 524 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 525 526 BuildMI(MBB, I, DL, SMovB32, Rsrc1) 527 .addExternalSymbol("SCRATCH_RSRC_DWORD1") 528 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 529 530 } 531 532 BuildMI(MBB, I, DL, SMovB32, Rsrc2) 533 .addImm(Rsrc23 & 0xffffffff) 534 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 535 536 BuildMI(MBB, I, DL, SMovB32, Rsrc3) 537 .addImm(Rsrc23 >> 32) 538 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 539 } else if (ST.isAmdHsaOrMesa(Fn)) { 540 assert(PreloadedScratchRsrcReg != AMDGPU::NoRegister); 541 542 if (ScratchRsrcReg != PreloadedScratchRsrcReg) { 543 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) 544 .addReg(PreloadedScratchRsrcReg, RegState::Kill); 545 } 546 } 547 548 // Add the scratch wave offset into the scratch RSRC. 549 // 550 // We only want to update the first 48 bits, which is the base address 551 // pointer, without touching the adjacent 16 bits of flags. We know this add 552 // cannot carry-out from bit 47, otherwise the scratch allocation would be 553 // impossible to fit in the 48-bit global address space. 554 // 555 // TODO: Evaluate if it is better to just construct an SRD using the flat 556 // scratch init and some constants rather than update the one we are passed. 557 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 558 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 559 560 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in 561 // the kernel body via inreg arguments. 562 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0) 563 .addReg(ScratchRsrcSub0) 564 .addReg(ScratchWaveOffsetReg) 565 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 566 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1) 567 .addReg(ScratchRsrcSub1) 568 .addImm(0) 569 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 570 } 571 572 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { 573 switch (ID) { 574 case TargetStackID::Default: 575 case TargetStackID::NoAlloc: 576 case TargetStackID::SGPRSpill: 577 return true; 578 case TargetStackID::SVEVector: 579 return false; 580 } 581 llvm_unreachable("Invalid TargetStackID::Value"); 582 } 583 584 void SIFrameLowering::emitPrologue(MachineFunction &MF, 585 MachineBasicBlock &MBB) const { 586 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 587 if (FuncInfo->isEntryFunction()) { 588 emitEntryFunctionPrologue(MF, MBB); 589 return; 590 } 591 592 const MachineFrameInfo &MFI = MF.getFrameInfo(); 593 MachineRegisterInfo &MRI = MF.getRegInfo(); 594 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 595 const SIInstrInfo *TII = ST.getInstrInfo(); 596 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 597 598 unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 599 unsigned FramePtrReg = FuncInfo->getFrameOffsetReg(); 600 LivePhysRegs LiveRegs; 601 602 MachineBasicBlock::iterator MBBI = MBB.begin(); 603 DebugLoc DL; 604 605 bool HasFP = false; 606 uint32_t NumBytes = MFI.getStackSize(); 607 uint32_t RoundedSize = NumBytes; 608 // To avoid clobbering VGPRs in lanes that weren't active on function entry, 609 // turn on all lanes before doing the spill to memory. 610 unsigned ScratchExecCopy = AMDGPU::NoRegister; 611 612 // Emit the copy if we need an FP, and are using a free SGPR to save it. 613 if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) { 614 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy) 615 .addReg(FramePtrReg) 616 .setMIFlag(MachineInstr::FrameSetup); 617 } 618 619 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg 620 : FuncInfo->getSGPRSpillVGPRs()) { 621 if (!Reg.FI.hasValue()) 622 continue; 623 624 if (ScratchExecCopy == AMDGPU::NoRegister) { 625 if (LiveRegs.empty()) { 626 LiveRegs.init(TRI); 627 LiveRegs.addLiveIns(MBB); 628 if (FuncInfo->SGPRForFPSaveRestoreCopy) 629 LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy); 630 } 631 632 ScratchExecCopy 633 = findScratchNonCalleeSaveRegister(MRI, LiveRegs, 634 *TRI.getWaveMaskRegClass()); 635 assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy); 636 637 const unsigned OrSaveExec = ST.isWave32() ? 638 AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; 639 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), 640 ScratchExecCopy) 641 .addImm(-1); 642 } 643 644 buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR, 645 FuncInfo->getScratchRSrcReg(), 646 StackPtrReg, 647 Reg.FI.getValue()); 648 } 649 650 if (ScratchExecCopy != AMDGPU::NoRegister) { 651 // FIXME: Split block and make terminator. 652 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 653 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 654 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 655 .addReg(ScratchExecCopy, RegState::Kill); 656 LiveRegs.addReg(ScratchExecCopy); 657 } 658 659 660 if (FuncInfo->FramePointerSaveIndex) { 661 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 662 assert(!MFI.isDeadObjectIndex(FI) && 663 MFI.getStackID(FI) == TargetStackID::SGPRSpill); 664 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill 665 = FuncInfo->getSGPRToVGPRSpills(FI); 666 assert(Spill.size() == 1); 667 668 // Save FP before setting it up. 669 // FIXME: This should respect spillSGPRToVGPR; 670 BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), 671 Spill[0].VGPR) 672 .addReg(FramePtrReg) 673 .addImm(Spill[0].Lane) 674 .addReg(Spill[0].VGPR, RegState::Undef); 675 } 676 677 if (TRI.needsStackRealignment(MF)) { 678 HasFP = true; 679 const unsigned Alignment = MFI.getMaxAlign().value(); 680 681 RoundedSize += Alignment; 682 if (LiveRegs.empty()) { 683 LiveRegs.init(TRI); 684 LiveRegs.addLiveIns(MBB); 685 LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); 686 } 687 688 unsigned ScratchSPReg = findScratchNonCalleeSaveRegister( 689 MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass); 690 assert(ScratchSPReg != AMDGPU::NoRegister && 691 ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy); 692 693 // s_add_u32 tmp_reg, s32, NumBytes 694 // s_and_b32 s32, tmp_reg, 0b111...0000 695 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg) 696 .addReg(StackPtrReg) 697 .addImm((Alignment - 1) * ST.getWavefrontSize()) 698 .setMIFlag(MachineInstr::FrameSetup); 699 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) 700 .addReg(ScratchSPReg, RegState::Kill) 701 .addImm(-Alignment * ST.getWavefrontSize()) 702 .setMIFlag(MachineInstr::FrameSetup); 703 FuncInfo->setIsStackRealigned(true); 704 } else if ((HasFP = hasFP(MF))) { 705 // If we need a base pointer, set it up here. It's whatever the value of 706 // the stack pointer is at this point. Any variable size objects will be 707 // allocated after this, so we can still use the base pointer to reference 708 // locals. 709 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 710 .addReg(StackPtrReg) 711 .setMIFlag(MachineInstr::FrameSetup); 712 } 713 714 if (HasFP && RoundedSize != 0) { 715 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) 716 .addReg(StackPtrReg) 717 .addImm(RoundedSize * ST.getWavefrontSize()) 718 .setMIFlag(MachineInstr::FrameSetup); 719 } 720 721 assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister || 722 FuncInfo->FramePointerSaveIndex)) && 723 "Needed to save FP but didn't save it anywhere"); 724 725 assert((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister && 726 !FuncInfo->FramePointerSaveIndex)) && 727 "Saved FP but didn't need it"); 728 } 729 730 void SIFrameLowering::emitEpilogue(MachineFunction &MF, 731 MachineBasicBlock &MBB) const { 732 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 733 if (FuncInfo->isEntryFunction()) 734 return; 735 736 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 737 const SIInstrInfo *TII = ST.getInstrInfo(); 738 MachineRegisterInfo &MRI = MF.getRegInfo(); 739 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 740 LivePhysRegs LiveRegs; 741 DebugLoc DL; 742 743 const MachineFrameInfo &MFI = MF.getFrameInfo(); 744 uint32_t NumBytes = MFI.getStackSize(); 745 uint32_t RoundedSize = FuncInfo->isStackRealigned() 746 ? NumBytes + MFI.getMaxAlign().value() 747 : NumBytes; 748 749 if (RoundedSize != 0 && hasFP(MF)) { 750 const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 751 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) 752 .addReg(StackPtrReg) 753 .addImm(RoundedSize * ST.getWavefrontSize()) 754 .setMIFlag(MachineInstr::FrameDestroy); 755 } 756 757 if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) { 758 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg()) 759 .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) 760 .setMIFlag(MachineInstr::FrameSetup); 761 } 762 763 if (FuncInfo->FramePointerSaveIndex) { 764 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 765 766 assert(!MF.getFrameInfo().isDeadObjectIndex(FI) && 767 MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill); 768 769 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill 770 = FuncInfo->getSGPRToVGPRSpills(FI); 771 assert(Spill.size() == 1); 772 BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), 773 FuncInfo->getFrameOffsetReg()) 774 .addReg(Spill[0].VGPR) 775 .addImm(Spill[0].Lane); 776 } 777 778 unsigned ScratchExecCopy = AMDGPU::NoRegister; 779 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg 780 : FuncInfo->getSGPRSpillVGPRs()) { 781 if (!Reg.FI.hasValue()) 782 continue; 783 784 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 785 if (ScratchExecCopy == AMDGPU::NoRegister) { 786 // See emitPrologue 787 if (LiveRegs.empty()) { 788 LiveRegs.init(*ST.getRegisterInfo()); 789 LiveRegs.addLiveOuts(MBB); 790 LiveRegs.stepBackward(*MBBI); 791 } 792 793 ScratchExecCopy = findScratchNonCalleeSaveRegister( 794 MRI, LiveRegs, *TRI.getWaveMaskRegClass()); 795 LiveRegs.removeReg(ScratchExecCopy); 796 797 const unsigned OrSaveExec = 798 ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; 799 800 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy) 801 .addImm(-1); 802 } 803 804 buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR, 805 FuncInfo->getScratchRSrcReg(), 806 FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue()); 807 } 808 809 if (ScratchExecCopy != AMDGPU::NoRegister) { 810 // FIXME: Split block and make terminator. 811 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 812 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 813 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 814 .addReg(ScratchExecCopy, RegState::Kill); 815 } 816 } 817 818 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not 819 // memory. They should have been removed by now. 820 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { 821 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 822 I != E; ++I) { 823 if (!MFI.isDeadObjectIndex(I)) 824 return false; 825 } 826 827 return true; 828 } 829 830 #ifndef NDEBUG 831 static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI, 832 Optional<int> FramePointerSaveIndex) { 833 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 834 I != E; ++I) { 835 if (!MFI.isDeadObjectIndex(I) && 836 MFI.getStackID(I) == TargetStackID::SGPRSpill && 837 FramePointerSaveIndex && I != FramePointerSaveIndex) { 838 return false; 839 } 840 } 841 842 return true; 843 } 844 #endif 845 846 int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 847 Register &FrameReg) const { 848 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 849 850 FrameReg = RI->getFrameRegister(MF); 851 return MF.getFrameInfo().getObjectOffset(FI); 852 } 853 854 void SIFrameLowering::processFunctionBeforeFrameFinalized( 855 MachineFunction &MF, 856 RegScavenger *RS) const { 857 MachineFrameInfo &MFI = MF.getFrameInfo(); 858 859 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 860 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 861 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 862 863 FuncInfo->removeDeadFrameIndices(MFI); 864 assert(allSGPRSpillsAreDead(MFI, None) && 865 "SGPR spill should have been removed in SILowerSGPRSpills"); 866 867 // FIXME: The other checks should be redundant with allStackObjectsAreDead, 868 // but currently hasNonSpillStackObjects is set only from source 869 // allocas. Stack temps produced from legalization are not counted currently. 870 if (!allStackObjectsAreDead(MFI)) { 871 assert(RS && "RegScavenger required if spilling"); 872 873 if (FuncInfo->isEntryFunction()) { 874 int ScavengeFI = MFI.CreateFixedObject( 875 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); 876 RS->addScavengingFrameIndex(ScavengeFI); 877 } else { 878 int ScavengeFI = MFI.CreateStackObject( 879 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 880 TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass), 881 false); 882 RS->addScavengingFrameIndex(ScavengeFI); 883 } 884 } 885 } 886 887 // Only report VGPRs to generic code. 888 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, 889 BitVector &SavedVGPRs, 890 RegScavenger *RS) const { 891 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); 892 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 893 if (MFI->isEntryFunction()) 894 return; 895 896 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 897 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 898 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 899 900 // Ignore the SGPRs the default implementation found. 901 SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask()); 902 903 // hasFP only knows about stack objects that already exist. We're now 904 // determining the stack slots that will be created, so we have to predict 905 // them. Stack objects force FP usage with calls. 906 // 907 // Note a new VGPR CSR may be introduced if one is used for the spill, but we 908 // don't want to report it here. 909 // 910 // FIXME: Is this really hasReservedCallFrame? 911 const bool WillHaveFP = 912 FrameInfo.hasCalls() && 913 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); 914 915 // VGPRs used for SGPR spilling need to be specially inserted in the prolog, 916 // so don't allow the default insertion to handle them. 917 for (auto SSpill : MFI->getSGPRSpillVGPRs()) 918 SavedVGPRs.reset(SSpill.VGPR); 919 920 const bool HasFP = WillHaveFP || hasFP(MF); 921 if (!HasFP) 922 return; 923 924 if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { 925 int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr, 926 TargetStackID::SGPRSpill); 927 928 // If there is already a VGPR with free lanes, use it. We may already have 929 // to pay the penalty for spilling a CSR VGPR. 930 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) 931 llvm_unreachable("allocate SGPR spill should have worked"); 932 933 MFI->FramePointerSaveIndex = NewFI; 934 935 LLVM_DEBUG( 936 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 937 dbgs() << "Spilling FP to " << printReg(Spill.VGPR, TRI) 938 << ':' << Spill.Lane << '\n'); 939 return; 940 } 941 942 MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo()); 943 944 if (!MFI->SGPRForFPSaveRestoreCopy) { 945 // There's no free lane to spill, and no free register to save FP, so we're 946 // forced to spill another VGPR to use for the spill. 947 int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr, 948 TargetStackID::SGPRSpill); 949 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) 950 llvm_unreachable("allocate SGPR spill should have worked"); 951 MFI->FramePointerSaveIndex = NewFI; 952 953 LLVM_DEBUG( 954 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 955 dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI) 956 << ':' << Spill.Lane << '\n';); 957 } else { 958 LLVM_DEBUG(dbgs() << "Saving FP with copy to " << 959 printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n'); 960 } 961 } 962 963 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, 964 BitVector &SavedRegs, 965 RegScavenger *RS) const { 966 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 967 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 968 if (MFI->isEntryFunction()) 969 return; 970 971 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 972 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 973 974 // The SP is specifically managed and we don't want extra spills of it. 975 SavedRegs.reset(MFI->getStackPtrOffsetReg()); 976 SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask()); 977 } 978 979 bool SIFrameLowering::assignCalleeSavedSpillSlots( 980 MachineFunction &MF, const TargetRegisterInfo *TRI, 981 std::vector<CalleeSavedInfo> &CSI) const { 982 if (CSI.empty()) 983 return true; // Early exit if no callee saved registers are modified! 984 985 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 986 if (!FuncInfo->SGPRForFPSaveRestoreCopy) 987 return false; 988 989 for (auto &CS : CSI) { 990 if (CS.getReg() == FuncInfo->getFrameOffsetReg()) { 991 if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) 992 CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); 993 break; 994 } 995 } 996 997 return false; 998 } 999 1000 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( 1001 MachineFunction &MF, 1002 MachineBasicBlock &MBB, 1003 MachineBasicBlock::iterator I) const { 1004 int64_t Amount = I->getOperand(0).getImm(); 1005 if (Amount == 0) 1006 return MBB.erase(I); 1007 1008 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1009 const SIInstrInfo *TII = ST.getInstrInfo(); 1010 const DebugLoc &DL = I->getDebugLoc(); 1011 unsigned Opc = I->getOpcode(); 1012 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 1013 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 1014 1015 if (!hasReservedCallFrame(MF)) { 1016 Amount = alignTo(Amount, getStackAlign()); 1017 assert(isUInt<32>(Amount) && "exceeded stack address space size"); 1018 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1019 unsigned SPReg = MFI->getStackPtrOffsetReg(); 1020 1021 unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 1022 BuildMI(MBB, I, DL, TII->get(Op), SPReg) 1023 .addReg(SPReg) 1024 .addImm(Amount * ST.getWavefrontSize()); 1025 } else if (CalleePopAmount != 0) { 1026 llvm_unreachable("is this used?"); 1027 } 1028 1029 return MBB.erase(I); 1030 } 1031 1032 bool SIFrameLowering::hasFP(const MachineFunction &MF) const { 1033 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1034 1035 // For entry functions we can use an immediate offset in most cases, so the 1036 // presence of calls doesn't imply we need a distinct frame pointer. 1037 if (MFI.hasCalls() && 1038 !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) { 1039 // All offsets are unsigned, so need to be addressed in the same direction 1040 // as stack growth. 1041 1042 // FIXME: This function is pretty broken, since it can be called before the 1043 // frame layout is determined or CSR spills are inserted. 1044 return MFI.getStackSize() != 0; 1045 } 1046 1047 return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || 1048 MFI.hasStackMap() || MFI.hasPatchPoint() || 1049 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) || 1050 MF.getTarget().Options.DisableFramePointerElim(MF); 1051 } 1052