1 //===----------------------- SIFrameLowering.cpp --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 9 #include "SIFrameLowering.h" 10 #include "AMDGPUSubtarget.h" 11 #include "SIInstrInfo.h" 12 #include "SIMachineFunctionInfo.h" 13 #include "SIRegisterInfo.h" 14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 15 16 #include "llvm/CodeGen/LivePhysRegs.h" 17 #include "llvm/CodeGen/MachineFrameInfo.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/RegisterScavenging.h" 21 22 using namespace llvm; 23 24 #define DEBUG_TYPE "frame-info" 25 26 27 static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST, 28 const MachineFunction &MF) { 29 return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(), 30 ST.getMaxNumSGPRs(MF) / 4); 31 } 32 33 // Find a scratch register that we can use at the start of the prologue to 34 // re-align the stack pointer. We avoid using callee-save registers since they 35 // may appear to be free when this is called from canUseAsPrologue (during 36 // shrink wrapping), but then no longer be free when this is called from 37 // emitPrologue. 38 // 39 // FIXME: This is a bit conservative, since in the above case we could use one 40 // of the callee-save registers as a scratch temp to re-align the stack pointer, 41 // but we would then have to make sure that we were in fact saving at least one 42 // callee-save register in the prologue, which is additional complexity that 43 // doesn't seem worth the benefit. 44 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, 45 LivePhysRegs &LiveRegs, 46 const TargetRegisterClass &RC, 47 bool Unused = false) { 48 // Mark callee saved registers as used so we will not choose them. 49 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 50 for (unsigned i = 0; CSRegs[i]; ++i) 51 LiveRegs.addReg(CSRegs[i]); 52 53 if (Unused) { 54 // We are looking for a register that can be used throughout the entire 55 // function, so any use is unacceptable. 56 for (MCRegister Reg : RC) { 57 if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) 58 return Reg; 59 } 60 } else { 61 for (MCRegister Reg : RC) { 62 if (LiveRegs.available(MRI, Reg)) 63 return Reg; 64 } 65 } 66 67 // If we require an unused register, this is used in contexts where failure is 68 // an option and has an alternative plan. In other contexts, this must 69 // succeed0. 70 if (!Unused) 71 report_fatal_error("failed to find free scratch register"); 72 73 return MCRegister(); 74 } 75 76 static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) { 77 LivePhysRegs LiveRegs; 78 LiveRegs.init(*MRI.getTargetRegisterInfo()); 79 return findScratchNonCalleeSaveRegister( 80 MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); 81 } 82 83 // We need to specially emit stack operations here because a different frame 84 // register is used than in the rest of the function, as getFrameRegister would 85 // use. 86 static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, 87 MachineBasicBlock::iterator I, 88 const SIInstrInfo *TII, Register SpillReg, 89 Register ScratchRsrcReg, Register SPReg, int FI) { 90 MachineFunction *MF = MBB.getParent(); 91 MachineFrameInfo &MFI = MF->getFrameInfo(); 92 93 int64_t Offset = MFI.getObjectOffset(FI); 94 95 MachineMemOperand *MMO = MF->getMachineMemOperand( 96 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4, 97 MFI.getObjectAlign(FI)); 98 99 if (isUInt<12>(Offset)) { 100 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET)) 101 .addReg(SpillReg, RegState::Kill) 102 .addReg(ScratchRsrcReg) 103 .addReg(SPReg) 104 .addImm(Offset) 105 .addImm(0) // glc 106 .addImm(0) // slc 107 .addImm(0) // tfe 108 .addImm(0) // dlc 109 .addImm(0) // swz 110 .addMemOperand(MMO); 111 return; 112 } 113 114 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 115 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 116 117 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 118 .addImm(Offset); 119 120 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN)) 121 .addReg(SpillReg, RegState::Kill) 122 .addReg(OffsetReg, RegState::Kill) 123 .addReg(ScratchRsrcReg) 124 .addReg(SPReg) 125 .addImm(0) 126 .addImm(0) // glc 127 .addImm(0) // slc 128 .addImm(0) // tfe 129 .addImm(0) // dlc 130 .addImm(0) // swz 131 .addMemOperand(MMO); 132 } 133 134 static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, 135 MachineBasicBlock::iterator I, 136 const SIInstrInfo *TII, Register SpillReg, 137 Register ScratchRsrcReg, Register SPReg, int FI) { 138 MachineFunction *MF = MBB.getParent(); 139 MachineFrameInfo &MFI = MF->getFrameInfo(); 140 int64_t Offset = MFI.getObjectOffset(FI); 141 142 MachineMemOperand *MMO = MF->getMachineMemOperand( 143 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4, 144 MFI.getObjectAlign(FI)); 145 146 if (isUInt<12>(Offset)) { 147 BuildMI(MBB, I, DebugLoc(), 148 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg) 149 .addReg(ScratchRsrcReg) 150 .addReg(SPReg) 151 .addImm(Offset) 152 .addImm(0) // glc 153 .addImm(0) // slc 154 .addImm(0) // tfe 155 .addImm(0) // dlc 156 .addImm(0) // swz 157 .addMemOperand(MMO); 158 return; 159 } 160 161 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 162 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 163 164 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 165 .addImm(Offset); 166 167 BuildMI(MBB, I, DebugLoc(), 168 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg) 169 .addReg(OffsetReg, RegState::Kill) 170 .addReg(ScratchRsrcReg) 171 .addReg(SPReg) 172 .addImm(0) 173 .addImm(0) // glc 174 .addImm(0) // slc 175 .addImm(0) // tfe 176 .addImm(0) // dlc 177 .addImm(0) // swz 178 .addMemOperand(MMO); 179 } 180 181 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` 182 void SIFrameLowering::emitEntryFunctionFlatScratchInit( 183 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 184 const DebugLoc &DL, Register ScratchWaveOffsetReg) const { 185 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 186 const SIInstrInfo *TII = ST.getInstrInfo(); 187 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 188 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 189 190 // We don't need this if we only have spills since there is no user facing 191 // scratch. 192 193 // TODO: If we know we don't have flat instructions earlier, we can omit 194 // this from the input registers. 195 // 196 // TODO: We only need to know if we access scratch space through a flat 197 // pointer. Because we only detect if flat instructions are used at all, 198 // this will be used more often than necessary on VI. 199 200 Register FlatScratchInitReg = 201 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); 202 203 MachineRegisterInfo &MRI = MF.getRegInfo(); 204 MRI.addLiveIn(FlatScratchInitReg); 205 MBB.addLiveIn(FlatScratchInitReg); 206 207 Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); 208 Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); 209 210 // Do a 64-bit pointer add. 211 if (ST.flatScratchIsPointer()) { 212 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 213 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 214 .addReg(FlatScrInitLo) 215 .addReg(ScratchWaveOffsetReg); 216 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) 217 .addReg(FlatScrInitHi) 218 .addImm(0); 219 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 220 addReg(FlatScrInitLo). 221 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | 222 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 223 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 224 addReg(FlatScrInitHi). 225 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | 226 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 227 return; 228 } 229 230 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) 231 .addReg(FlatScrInitLo) 232 .addReg(ScratchWaveOffsetReg); 233 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI) 234 .addReg(FlatScrInitHi) 235 .addImm(0); 236 237 return; 238 } 239 240 assert(ST.getGeneration() < AMDGPUSubtarget::GFX10); 241 242 // Copy the size in bytes. 243 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) 244 .addReg(FlatScrInitHi, RegState::Kill); 245 246 // Add wave offset in bytes to private base offset. 247 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. 248 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 249 .addReg(FlatScrInitLo) 250 .addReg(ScratchWaveOffsetReg); 251 252 // Convert offset to 256-byte units. 253 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) 254 .addReg(FlatScrInitLo, RegState::Kill) 255 .addImm(8); 256 } 257 258 // Shift down registers reserved for the scratch RSRC. 259 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( 260 MachineFunction &MF, Register ScratchWaveOffsetReg) const { 261 262 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 263 const SIInstrInfo *TII = ST.getInstrInfo(); 264 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 265 MachineRegisterInfo &MRI = MF.getRegInfo(); 266 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 267 268 assert(MFI->isEntryFunction()); 269 270 Register ScratchRsrcReg = MFI->getScratchRSrcReg(); 271 272 if (ScratchRsrcReg == AMDGPU::NoRegister || 273 !MRI.isPhysRegUsed(ScratchRsrcReg)) 274 return AMDGPU::NoRegister; 275 276 if (ST.hasSGPRInitBug() || 277 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) 278 return ScratchRsrcReg; 279 280 // We reserved the last registers for this. Shift it down to the end of those 281 // which were actually used. 282 // 283 // FIXME: It might be safer to use a pseudoregister before replacement. 284 285 // FIXME: We should be able to eliminate unused input registers. We only 286 // cannot do this for the resources required for scratch access. For now we 287 // skip over user SGPRs and may leave unused holes. 288 289 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; 290 ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF); 291 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); 292 293 // Skip the last N reserved elements because they should have already been 294 // reserved for VCC etc. 295 for (MCPhysReg Reg : AllSGPR128s) { 296 // Pick the first unallocated one. Make sure we don't clobber the other 297 // reserved input we needed. 298 // 299 // FIXME: The preloaded SGPR count is not accurate for shaders as the 300 // scratch wave offset may be in a fixed SGPR or 301 // SITargetLowering::allocateSystemSGPRs may choose some free SGPR for the 302 // scratch wave offset. We explicitly avoid the scratch wave offset to 303 // account for this. 304 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 305 !TRI->isSubRegisterEq(Reg, ScratchWaveOffsetReg)) { 306 MRI.replaceRegWith(ScratchRsrcReg, Reg); 307 MFI->setScratchRSrcReg(Reg); 308 return Reg; 309 } 310 } 311 312 return ScratchRsrcReg; 313 } 314 315 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, 316 MachineBasicBlock &MBB) const { 317 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); 318 319 // FIXME: If we only have SGPR spills, we won't actually be using scratch 320 // memory since these spill to VGPRs. We should be cleaning up these unused 321 // SGPR spill frame indices somewhere. 322 323 // FIXME: We still have implicit uses on SGPR spill instructions in case they 324 // need to spill to vector memory. It's likely that will not happen, but at 325 // this point it appears we need the setup. This part of the prolog should be 326 // emitted after frame indices are eliminated. 327 328 // FIXME: Remove all of the isPhysRegUsed checks 329 330 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 331 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 332 const SIInstrInfo *TII = ST.getInstrInfo(); 333 MachineRegisterInfo &MRI = MF.getRegInfo(); 334 const Function &F = MF.getFunction(); 335 336 assert(MFI->isEntryFunction()); 337 338 Register ScratchWaveOffsetReg = MFI->getPreloadedReg( 339 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); 340 // FIXME: Hack to not crash in situations which emitted an error. 341 if (ScratchWaveOffsetReg == AMDGPU::NoRegister) 342 return; 343 344 // We need to do the replacement of the private segment buffer register even 345 // if there are no stack objects. There could be stores to undef or a 346 // constant without an associated object. 347 // 348 // This will return `AMDGPU::NoRegister` in cases where there are no actual 349 // uses of the SRSRC. 350 Register ScratchRsrcReg = 351 getEntryFunctionReservedScratchRsrcReg(MF, ScratchWaveOffsetReg); 352 353 // Make the selected register live throughout the function. 354 if (ScratchRsrcReg != AMDGPU::NoRegister) { 355 for (MachineBasicBlock &OtherBB : MF) { 356 if (&OtherBB != &MBB) { 357 OtherBB.addLiveIn(ScratchRsrcReg); 358 } 359 } 360 } 361 362 // Now that we have fixed the reserved SRSRC we need to locate the 363 // (potentially) preloaded SRSRC. 364 Register PreloadedScratchRsrcReg = AMDGPU::NoRegister; 365 if (ST.isAmdHsaOrMesa(F)) { 366 PreloadedScratchRsrcReg = 367 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); 368 if (ScratchRsrcReg != AMDGPU::NoRegister && 369 PreloadedScratchRsrcReg != AMDGPU::NoRegister) { 370 // We added live-ins during argument lowering, but since they were not 371 // used they were deleted. We're adding the uses now, so add them back. 372 MRI.addLiveIn(PreloadedScratchRsrcReg); 373 MBB.addLiveIn(PreloadedScratchRsrcReg); 374 } 375 } 376 377 // Debug location must be unknown since the first debug location is used to 378 // determine the end of the prologue. 379 DebugLoc DL; 380 MachineBasicBlock::iterator I = MBB.begin(); 381 382 if (MF.getFrameInfo().hasCalls()) { 383 Register SPReg = MFI->getStackPtrOffsetReg(); 384 assert(SPReg != AMDGPU::SP_REG); 385 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) 386 .addImm(MF.getFrameInfo().getStackSize() * ST.getWavefrontSize()); 387 } 388 389 if (hasFP(MF)) { 390 Register FPReg = MFI->getFrameOffsetReg(); 391 assert(FPReg != AMDGPU::FP_REG); 392 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); 393 } 394 395 if (MFI->hasFlatScratchInit() || ScratchRsrcReg != AMDGPU::NoRegister) { 396 MRI.addLiveIn(ScratchWaveOffsetReg); 397 MBB.addLiveIn(ScratchWaveOffsetReg); 398 } 399 400 if (MFI->hasFlatScratchInit()) { 401 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg); 402 } 403 404 if (ScratchRsrcReg != AMDGPU::NoRegister) { 405 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL, 406 PreloadedScratchRsrcReg, 407 ScratchRsrcReg, ScratchWaveOffsetReg); 408 } 409 } 410 411 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg` 412 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup( 413 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 414 const DebugLoc &DL, Register PreloadedScratchRsrcReg, 415 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const { 416 417 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 418 const SIInstrInfo *TII = ST.getInstrInfo(); 419 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 420 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 421 const Function &Fn = MF.getFunction(); 422 423 if (ST.isAmdPalOS()) { 424 // The pointer to the GIT is formed from the offset passed in and either 425 // the amdgpu-git-ptr-high function attribute or the top part of the PC 426 Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 427 Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 428 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 429 430 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 431 432 if (MFI->getGITPtrHigh() != 0xffffffff) { 433 BuildMI(MBB, I, DL, SMovB32, RsrcHi) 434 .addImm(MFI->getGITPtrHigh()) 435 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 436 } else { 437 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); 438 BuildMI(MBB, I, DL, GetPC64, Rsrc01); 439 } 440 auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in 441 if (ST.hasMergedShaders()) { 442 switch (MF.getFunction().getCallingConv()) { 443 case CallingConv::AMDGPU_HS: 444 case CallingConv::AMDGPU_GS: 445 // Low GIT address is passed in s8 rather than s0 for an LS+HS or 446 // ES+GS merged shader on gfx9+. 447 GitPtrLo = AMDGPU::SGPR8; 448 break; 449 default: 450 break; 451 } 452 } 453 MF.getRegInfo().addLiveIn(GitPtrLo); 454 MBB.addLiveIn(GitPtrLo); 455 BuildMI(MBB, I, DL, SMovB32, RsrcLo) 456 .addReg(GitPtrLo) 457 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 458 459 // We now have the GIT ptr - now get the scratch descriptor from the entry 460 // at offset 0 (or offset 16 for a compute shader). 461 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 462 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); 463 auto MMO = MF.getMachineMemOperand(PtrInfo, 464 MachineMemOperand::MOLoad | 465 MachineMemOperand::MOInvariant | 466 MachineMemOperand::MODereferenceable, 467 16, Align(4)); 468 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 469 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 470 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 471 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) 472 .addReg(Rsrc01) 473 .addImm(EncodedOffset) // offset 474 .addImm(0) // glc 475 .addImm(0) // dlc 476 .addReg(ScratchRsrcReg, RegState::ImplicitDefine) 477 .addMemOperand(MMO); 478 } else if (ST.isMesaGfxShader(Fn) || 479 (PreloadedScratchRsrcReg == AMDGPU::NoRegister)) { 480 assert(!ST.isAmdHsaOrMesa(Fn)); 481 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 482 483 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); 484 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); 485 486 // Use relocations to get the pointer, and setup the other bits manually. 487 uint64_t Rsrc23 = TII->getScratchRsrcWords23(); 488 489 if (MFI->hasImplicitBufferPtr()) { 490 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 491 492 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { 493 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); 494 495 BuildMI(MBB, I, DL, Mov64, Rsrc01) 496 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 497 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 498 } else { 499 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 500 501 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 502 auto MMO = MF.getMachineMemOperand( 503 PtrInfo, 504 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 505 MachineMemOperand::MODereferenceable, 506 8, Align(4)); 507 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) 508 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 509 .addImm(0) // offset 510 .addImm(0) // glc 511 .addImm(0) // dlc 512 .addMemOperand(MMO) 513 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 514 515 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 516 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 517 } 518 } else { 519 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 520 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 521 522 BuildMI(MBB, I, DL, SMovB32, Rsrc0) 523 .addExternalSymbol("SCRATCH_RSRC_DWORD0") 524 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 525 526 BuildMI(MBB, I, DL, SMovB32, Rsrc1) 527 .addExternalSymbol("SCRATCH_RSRC_DWORD1") 528 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 529 530 } 531 532 BuildMI(MBB, I, DL, SMovB32, Rsrc2) 533 .addImm(Rsrc23 & 0xffffffff) 534 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 535 536 BuildMI(MBB, I, DL, SMovB32, Rsrc3) 537 .addImm(Rsrc23 >> 32) 538 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 539 } else if (ST.isAmdHsaOrMesa(Fn)) { 540 assert(PreloadedScratchRsrcReg != AMDGPU::NoRegister); 541 542 if (ScratchRsrcReg != PreloadedScratchRsrcReg) { 543 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) 544 .addReg(PreloadedScratchRsrcReg, RegState::Kill); 545 } 546 } 547 548 // Add the scratch wave offset into the scratch RSRC. 549 // 550 // We only want to update the first 48 bits, which is the base address 551 // pointer, without touching the adjacent 16 bits of flags. We know this add 552 // cannot carry-out from bit 47, otherwise the scratch allocation would be 553 // impossible to fit in the 48-bit global address space. 554 // 555 // TODO: Evaluate if it is better to just construct an SRD using the flat 556 // scratch init and some constants rather than update the one we are passed. 557 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 558 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 559 560 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in 561 // the kernel body via inreg arguments. 562 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0) 563 .addReg(ScratchRsrcSub0) 564 .addReg(ScratchWaveOffsetReg) 565 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 566 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1) 567 .addReg(ScratchRsrcSub1) 568 .addImm(0) 569 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 570 } 571 572 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { 573 switch (ID) { 574 case TargetStackID::Default: 575 case TargetStackID::NoAlloc: 576 case TargetStackID::SGPRSpill: 577 return true; 578 case TargetStackID::SVEVector: 579 return false; 580 } 581 llvm_unreachable("Invalid TargetStackID::Value"); 582 } 583 584 void SIFrameLowering::emitPrologue(MachineFunction &MF, 585 MachineBasicBlock &MBB) const { 586 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 587 if (FuncInfo->isEntryFunction()) { 588 emitEntryFunctionPrologue(MF, MBB); 589 return; 590 } 591 592 const MachineFrameInfo &MFI = MF.getFrameInfo(); 593 MachineRegisterInfo &MRI = MF.getRegInfo(); 594 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 595 const SIInstrInfo *TII = ST.getInstrInfo(); 596 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 597 598 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 599 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 600 LivePhysRegs LiveRegs; 601 602 MachineBasicBlock::iterator MBBI = MBB.begin(); 603 DebugLoc DL; 604 605 bool HasFP = false; 606 uint32_t NumBytes = MFI.getStackSize(); 607 uint32_t RoundedSize = NumBytes; 608 // To avoid clobbering VGPRs in lanes that weren't active on function entry, 609 // turn on all lanes before doing the spill to memory. 610 Register ScratchExecCopy; 611 612 // Emit the copy if we need an FP, and are using a free SGPR to save it. 613 if (FuncInfo->SGPRForFPSaveRestoreCopy) { 614 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy) 615 .addReg(FramePtrReg) 616 .setMIFlag(MachineInstr::FrameSetup); 617 // Make the register live throughout the function. 618 for (MachineBasicBlock &MBB : MF) 619 MBB.addLiveIn(FuncInfo->SGPRForFPSaveRestoreCopy); 620 } 621 622 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg 623 : FuncInfo->getSGPRSpillVGPRs()) { 624 if (!Reg.FI.hasValue()) 625 continue; 626 627 if (!ScratchExecCopy) { 628 if (LiveRegs.empty()) { 629 LiveRegs.init(TRI); 630 LiveRegs.addLiveIns(MBB); 631 if (FuncInfo->SGPRForFPSaveRestoreCopy) 632 LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy); 633 } 634 635 ScratchExecCopy 636 = findScratchNonCalleeSaveRegister(MRI, LiveRegs, 637 *TRI.getWaveMaskRegClass()); 638 assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy); 639 640 const unsigned OrSaveExec = ST.isWave32() ? 641 AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; 642 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), 643 ScratchExecCopy) 644 .addImm(-1); 645 } 646 647 buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR, 648 FuncInfo->getScratchRSrcReg(), 649 StackPtrReg, 650 Reg.FI.getValue()); 651 } 652 653 if (ScratchExecCopy != AMDGPU::NoRegister) { 654 // FIXME: Split block and make terminator. 655 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 656 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 657 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 658 .addReg(ScratchExecCopy, RegState::Kill); 659 LiveRegs.addReg(ScratchExecCopy); 660 } 661 662 663 if (FuncInfo->FramePointerSaveIndex) { 664 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 665 assert(!MFI.isDeadObjectIndex(FI) && 666 MFI.getStackID(FI) == TargetStackID::SGPRSpill); 667 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill 668 = FuncInfo->getSGPRToVGPRSpills(FI); 669 assert(Spill.size() == 1); 670 671 // Save FP before setting it up. 672 // FIXME: This should respect spillSGPRToVGPR; 673 BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), 674 Spill[0].VGPR) 675 .addReg(FramePtrReg) 676 .addImm(Spill[0].Lane) 677 .addReg(Spill[0].VGPR, RegState::Undef); 678 } 679 680 if (TRI.needsStackRealignment(MF)) { 681 HasFP = true; 682 const unsigned Alignment = MFI.getMaxAlign().value(); 683 684 RoundedSize += Alignment; 685 if (LiveRegs.empty()) { 686 LiveRegs.init(TRI); 687 LiveRegs.addLiveIns(MBB); 688 LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); 689 } 690 691 Register ScratchSPReg = findScratchNonCalleeSaveRegister( 692 MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass); 693 assert(ScratchSPReg != AMDGPU::NoRegister && 694 ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy); 695 696 // s_add_u32 tmp_reg, s32, NumBytes 697 // s_and_b32 s32, tmp_reg, 0b111...0000 698 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg) 699 .addReg(StackPtrReg) 700 .addImm((Alignment - 1) * ST.getWavefrontSize()) 701 .setMIFlag(MachineInstr::FrameSetup); 702 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) 703 .addReg(ScratchSPReg, RegState::Kill) 704 .addImm(-Alignment * ST.getWavefrontSize()) 705 .setMIFlag(MachineInstr::FrameSetup); 706 FuncInfo->setIsStackRealigned(true); 707 } else if ((HasFP = hasFP(MF))) { 708 // If we need a base pointer, set it up here. It's whatever the value of 709 // the stack pointer is at this point. Any variable size objects will be 710 // allocated after this, so we can still use the base pointer to reference 711 // locals. 712 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 713 .addReg(StackPtrReg) 714 .setMIFlag(MachineInstr::FrameSetup); 715 } 716 717 if (HasFP && RoundedSize != 0) { 718 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) 719 .addReg(StackPtrReg) 720 .addImm(RoundedSize * ST.getWavefrontSize()) 721 .setMIFlag(MachineInstr::FrameSetup); 722 } 723 724 assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy || 725 FuncInfo->FramePointerSaveIndex)) && 726 "Needed to save FP but didn't save it anywhere"); 727 728 assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy && 729 !FuncInfo->FramePointerSaveIndex)) && 730 "Saved FP but didn't need it"); 731 } 732 733 void SIFrameLowering::emitEpilogue(MachineFunction &MF, 734 MachineBasicBlock &MBB) const { 735 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 736 if (FuncInfo->isEntryFunction()) 737 return; 738 739 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 740 const SIInstrInfo *TII = ST.getInstrInfo(); 741 MachineRegisterInfo &MRI = MF.getRegInfo(); 742 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 743 LivePhysRegs LiveRegs; 744 DebugLoc DL; 745 746 const MachineFrameInfo &MFI = MF.getFrameInfo(); 747 uint32_t NumBytes = MFI.getStackSize(); 748 uint32_t RoundedSize = FuncInfo->isStackRealigned() 749 ? NumBytes + MFI.getMaxAlign().value() 750 : NumBytes; 751 752 if (RoundedSize != 0 && hasFP(MF)) { 753 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 754 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) 755 .addReg(StackPtrReg) 756 .addImm(RoundedSize * ST.getWavefrontSize()) 757 .setMIFlag(MachineInstr::FrameDestroy); 758 } 759 760 if (FuncInfo->SGPRForFPSaveRestoreCopy) { 761 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg()) 762 .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) 763 .setMIFlag(MachineInstr::FrameSetup); 764 } 765 766 if (FuncInfo->FramePointerSaveIndex) { 767 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 768 769 assert(!MF.getFrameInfo().isDeadObjectIndex(FI) && 770 MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill); 771 772 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill 773 = FuncInfo->getSGPRToVGPRSpills(FI); 774 assert(Spill.size() == 1); 775 BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), 776 FuncInfo->getFrameOffsetReg()) 777 .addReg(Spill[0].VGPR) 778 .addImm(Spill[0].Lane); 779 } 780 781 Register ScratchExecCopy; 782 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg 783 : FuncInfo->getSGPRSpillVGPRs()) { 784 if (!Reg.FI.hasValue()) 785 continue; 786 787 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 788 if (ScratchExecCopy == AMDGPU::NoRegister) { 789 // See emitPrologue 790 if (LiveRegs.empty()) { 791 LiveRegs.init(*ST.getRegisterInfo()); 792 LiveRegs.addLiveOuts(MBB); 793 LiveRegs.stepBackward(*MBBI); 794 } 795 796 ScratchExecCopy = findScratchNonCalleeSaveRegister( 797 MRI, LiveRegs, *TRI.getWaveMaskRegClass()); 798 LiveRegs.removeReg(ScratchExecCopy); 799 800 const unsigned OrSaveExec = 801 ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; 802 803 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy) 804 .addImm(-1); 805 } 806 807 buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR, 808 FuncInfo->getScratchRSrcReg(), 809 FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue()); 810 } 811 812 if (ScratchExecCopy != AMDGPU::NoRegister) { 813 // FIXME: Split block and make terminator. 814 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 815 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 816 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 817 .addReg(ScratchExecCopy, RegState::Kill); 818 } 819 } 820 821 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not 822 // memory. They should have been removed by now. 823 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { 824 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 825 I != E; ++I) { 826 if (!MFI.isDeadObjectIndex(I)) 827 return false; 828 } 829 830 return true; 831 } 832 833 #ifndef NDEBUG 834 static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI, 835 Optional<int> FramePointerSaveIndex) { 836 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 837 I != E; ++I) { 838 if (!MFI.isDeadObjectIndex(I) && 839 MFI.getStackID(I) == TargetStackID::SGPRSpill && 840 FramePointerSaveIndex && I != FramePointerSaveIndex) { 841 return false; 842 } 843 } 844 845 return true; 846 } 847 #endif 848 849 int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 850 Register &FrameReg) const { 851 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 852 853 FrameReg = RI->getFrameRegister(MF); 854 return MF.getFrameInfo().getObjectOffset(FI); 855 } 856 857 void SIFrameLowering::processFunctionBeforeFrameFinalized( 858 MachineFunction &MF, 859 RegScavenger *RS) const { 860 MachineFrameInfo &MFI = MF.getFrameInfo(); 861 862 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 863 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 864 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 865 866 FuncInfo->removeDeadFrameIndices(MFI); 867 assert(allSGPRSpillsAreDead(MFI, None) && 868 "SGPR spill should have been removed in SILowerSGPRSpills"); 869 870 // FIXME: The other checks should be redundant with allStackObjectsAreDead, 871 // but currently hasNonSpillStackObjects is set only from source 872 // allocas. Stack temps produced from legalization are not counted currently. 873 if (!allStackObjectsAreDead(MFI)) { 874 assert(RS && "RegScavenger required if spilling"); 875 876 if (FuncInfo->isEntryFunction()) { 877 int ScavengeFI = MFI.CreateFixedObject( 878 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); 879 RS->addScavengingFrameIndex(ScavengeFI); 880 } else { 881 int ScavengeFI = MFI.CreateStackObject( 882 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 883 TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass), 884 false); 885 RS->addScavengingFrameIndex(ScavengeFI); 886 } 887 } 888 } 889 890 // Only report VGPRs to generic code. 891 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, 892 BitVector &SavedVGPRs, 893 RegScavenger *RS) const { 894 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); 895 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 896 if (MFI->isEntryFunction()) 897 return; 898 899 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 900 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 901 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 902 903 // Ignore the SGPRs the default implementation found. 904 SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask()); 905 906 // hasFP only knows about stack objects that already exist. We're now 907 // determining the stack slots that will be created, so we have to predict 908 // them. Stack objects force FP usage with calls. 909 // 910 // Note a new VGPR CSR may be introduced if one is used for the spill, but we 911 // don't want to report it here. 912 // 913 // FIXME: Is this really hasReservedCallFrame? 914 const bool WillHaveFP = 915 FrameInfo.hasCalls() && 916 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); 917 918 // VGPRs used for SGPR spilling need to be specially inserted in the prolog, 919 // so don't allow the default insertion to handle them. 920 for (auto SSpill : MFI->getSGPRSpillVGPRs()) 921 SavedVGPRs.reset(SSpill.VGPR); 922 923 const bool HasFP = WillHaveFP || hasFP(MF); 924 if (!HasFP) 925 return; 926 927 if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { 928 int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr, 929 TargetStackID::SGPRSpill); 930 931 // If there is already a VGPR with free lanes, use it. We may already have 932 // to pay the penalty for spilling a CSR VGPR. 933 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) 934 llvm_unreachable("allocate SGPR spill should have worked"); 935 936 MFI->FramePointerSaveIndex = NewFI; 937 938 LLVM_DEBUG( 939 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 940 dbgs() << "Spilling FP to " << printReg(Spill.VGPR, TRI) 941 << ':' << Spill.Lane << '\n'); 942 return; 943 } 944 945 MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo()); 946 947 if (!MFI->SGPRForFPSaveRestoreCopy) { 948 // There's no free lane to spill, and no free register to save FP, so we're 949 // forced to spill another VGPR to use for the spill. 950 int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr, 951 TargetStackID::SGPRSpill); 952 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) 953 llvm_unreachable("allocate SGPR spill should have worked"); 954 MFI->FramePointerSaveIndex = NewFI; 955 956 LLVM_DEBUG( 957 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 958 dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI) 959 << ':' << Spill.Lane << '\n';); 960 } else { 961 LLVM_DEBUG(dbgs() << "Saving FP with copy to " << 962 printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n'); 963 } 964 } 965 966 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, 967 BitVector &SavedRegs, 968 RegScavenger *RS) const { 969 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 970 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 971 if (MFI->isEntryFunction()) 972 return; 973 974 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 975 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 976 977 // The SP is specifically managed and we don't want extra spills of it. 978 SavedRegs.reset(MFI->getStackPtrOffsetReg()); 979 SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask()); 980 } 981 982 bool SIFrameLowering::assignCalleeSavedSpillSlots( 983 MachineFunction &MF, const TargetRegisterInfo *TRI, 984 std::vector<CalleeSavedInfo> &CSI) const { 985 if (CSI.empty()) 986 return true; // Early exit if no callee saved registers are modified! 987 988 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 989 if (!FuncInfo->SGPRForFPSaveRestoreCopy) 990 return false; 991 992 for (auto &CS : CSI) { 993 if (CS.getReg() == FuncInfo->getFrameOffsetReg()) { 994 if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) 995 CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); 996 break; 997 } 998 } 999 1000 return false; 1001 } 1002 1003 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( 1004 MachineFunction &MF, 1005 MachineBasicBlock &MBB, 1006 MachineBasicBlock::iterator I) const { 1007 int64_t Amount = I->getOperand(0).getImm(); 1008 if (Amount == 0) 1009 return MBB.erase(I); 1010 1011 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1012 const SIInstrInfo *TII = ST.getInstrInfo(); 1013 const DebugLoc &DL = I->getDebugLoc(); 1014 unsigned Opc = I->getOpcode(); 1015 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 1016 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 1017 1018 if (!hasReservedCallFrame(MF)) { 1019 Amount = alignTo(Amount, getStackAlign()); 1020 assert(isUInt<32>(Amount) && "exceeded stack address space size"); 1021 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1022 Register SPReg = MFI->getStackPtrOffsetReg(); 1023 1024 unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 1025 BuildMI(MBB, I, DL, TII->get(Op), SPReg) 1026 .addReg(SPReg) 1027 .addImm(Amount * ST.getWavefrontSize()); 1028 } else if (CalleePopAmount != 0) { 1029 llvm_unreachable("is this used?"); 1030 } 1031 1032 return MBB.erase(I); 1033 } 1034 1035 bool SIFrameLowering::hasFP(const MachineFunction &MF) const { 1036 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1037 1038 // For entry functions we can use an immediate offset in most cases, so the 1039 // presence of calls doesn't imply we need a distinct frame pointer. 1040 if (MFI.hasCalls() && 1041 !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) { 1042 // All offsets are unsigned, so need to be addressed in the same direction 1043 // as stack growth. 1044 1045 // FIXME: This function is pretty broken, since it can be called before the 1046 // frame layout is determined or CSR spills are inserted. 1047 return MFI.getStackSize() != 0; 1048 } 1049 1050 return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || 1051 MFI.hasStackMap() || MFI.hasPatchPoint() || 1052 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) || 1053 MF.getTarget().Options.DisableFramePointerElim(MF); 1054 } 1055