1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPUSubtarget.h"
11 #include "SIInstrInfo.h"
12 #include "SIMachineFunctionInfo.h"
13 #include "SIRegisterInfo.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 
16 #include "llvm/CodeGen/LivePhysRegs.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/RegisterScavenging.h"
21 
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "frame-info"
25 
26 
27 static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST,
28                                          const MachineFunction &MF) {
29   return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
30                       ST.getMaxNumSGPRs(MF) / 4);
31 }
32 
33 static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST,
34                                        const MachineFunction &MF) {
35   return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
36 }
37 
38 // Find a scratch register that we can use at the start of the prologue to
39 // re-align the stack pointer. We avoid using callee-save registers since they
40 // may appear to be free when this is called from canUseAsPrologue (during
41 // shrink wrapping), but then no longer be free when this is called from
42 // emitPrologue.
43 //
44 // FIXME: This is a bit conservative, since in the above case we could use one
45 // of the callee-save registers as a scratch temp to re-align the stack pointer,
46 // but we would then have to make sure that we were in fact saving at least one
47 // callee-save register in the prologue, which is additional complexity that
48 // doesn't seem worth the benefit.
49 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
50                                                    LivePhysRegs &LiveRegs,
51                                                    const TargetRegisterClass &RC,
52                                                    bool Unused = false) {
53   // Mark callee saved registers as used so we will not choose them.
54   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
55   for (unsigned i = 0; CSRegs[i]; ++i)
56     LiveRegs.addReg(CSRegs[i]);
57 
58   if (Unused) {
59     // We are looking for a register that can be used throughout the entire
60     // function, so any use is unacceptable.
61     for (MCRegister Reg : RC) {
62       if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
63         return Reg;
64     }
65   } else {
66     for (MCRegister Reg : RC) {
67       if (LiveRegs.available(MRI, Reg))
68         return Reg;
69     }
70   }
71 
72   // If we require an unused register, this is used in contexts where failure is
73   // an option and has an alternative plan. In other contexts, this must
74   // succeed0.
75   if (!Unused)
76     report_fatal_error("failed to find free scratch register");
77 
78   return MCRegister();
79 }
80 
81 static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) {
82   LivePhysRegs LiveRegs;
83   LiveRegs.init(*MRI.getTargetRegisterInfo());
84   return findScratchNonCalleeSaveRegister(
85     MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
86 }
87 
88 // We need to specially emit stack operations here because a different frame
89 // register is used than in the rest of the function, as getFrameRegister would
90 // use.
91 static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
92                              MachineBasicBlock::iterator I,
93                              const SIInstrInfo *TII, Register SpillReg,
94                              Register ScratchRsrcReg, Register SPReg, int FI) {
95   MachineFunction *MF = MBB.getParent();
96   MachineFrameInfo &MFI = MF->getFrameInfo();
97 
98   int64_t Offset = MFI.getObjectOffset(FI);
99 
100   MachineMemOperand *MMO = MF->getMachineMemOperand(
101       MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
102       MFI.getObjectAlign(FI));
103 
104   if (isUInt<12>(Offset)) {
105     BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
106       .addReg(SpillReg, RegState::Kill)
107       .addReg(ScratchRsrcReg)
108       .addReg(SPReg)
109       .addImm(Offset)
110       .addImm(0) // glc
111       .addImm(0) // slc
112       .addImm(0) // tfe
113       .addImm(0) // dlc
114       .addImm(0) // swz
115       .addMemOperand(MMO);
116     return;
117   }
118 
119   MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
120     MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
121 
122   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
123     .addImm(Offset);
124 
125   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
126     .addReg(SpillReg, RegState::Kill)
127     .addReg(OffsetReg, RegState::Kill)
128     .addReg(ScratchRsrcReg)
129     .addReg(SPReg)
130     .addImm(0)
131     .addImm(0) // glc
132     .addImm(0) // slc
133     .addImm(0) // tfe
134     .addImm(0) // dlc
135     .addImm(0) // swz
136     .addMemOperand(MMO);
137 }
138 
139 static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
140                               MachineBasicBlock::iterator I,
141                               const SIInstrInfo *TII, Register SpillReg,
142                               Register ScratchRsrcReg, Register SPReg, int FI) {
143   MachineFunction *MF = MBB.getParent();
144   MachineFrameInfo &MFI = MF->getFrameInfo();
145   int64_t Offset = MFI.getObjectOffset(FI);
146 
147   MachineMemOperand *MMO = MF->getMachineMemOperand(
148       MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
149       MFI.getObjectAlign(FI));
150 
151   if (isUInt<12>(Offset)) {
152     BuildMI(MBB, I, DebugLoc(),
153             TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
154       .addReg(ScratchRsrcReg)
155       .addReg(SPReg)
156       .addImm(Offset)
157       .addImm(0) // glc
158       .addImm(0) // slc
159       .addImm(0) // tfe
160       .addImm(0) // dlc
161       .addImm(0) // swz
162       .addMemOperand(MMO);
163     return;
164   }
165 
166   MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
167     MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
168 
169   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
170     .addImm(Offset);
171 
172   BuildMI(MBB, I, DebugLoc(),
173           TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
174     .addReg(OffsetReg, RegState::Kill)
175     .addReg(ScratchRsrcReg)
176     .addReg(SPReg)
177     .addImm(0)
178     .addImm(0) // glc
179     .addImm(0) // slc
180     .addImm(0) // tfe
181     .addImm(0) // dlc
182     .addImm(0) // swz
183     .addMemOperand(MMO);
184 }
185 
186 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
187 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
188     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
189     const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
190   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
191   const SIInstrInfo *TII = ST.getInstrInfo();
192   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
193   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
194 
195   // We don't need this if we only have spills since there is no user facing
196   // scratch.
197 
198   // TODO: If we know we don't have flat instructions earlier, we can omit
199   // this from the input registers.
200   //
201   // TODO: We only need to know if we access scratch space through a flat
202   // pointer. Because we only detect if flat instructions are used at all,
203   // this will be used more often than necessary on VI.
204 
205   Register FlatScratchInitReg =
206       MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
207 
208   MachineRegisterInfo &MRI = MF.getRegInfo();
209   MRI.addLiveIn(FlatScratchInitReg);
210   MBB.addLiveIn(FlatScratchInitReg);
211 
212   Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
213   Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
214 
215   // Do a 64-bit pointer add.
216   if (ST.flatScratchIsPointer()) {
217     if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
218       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
219         .addReg(FlatScrInitLo)
220         .addReg(ScratchWaveOffsetReg);
221       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
222         .addReg(FlatScrInitHi)
223         .addImm(0);
224       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
225         addReg(FlatScrInitLo).
226         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
227                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
228       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
229         addReg(FlatScrInitHi).
230         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
231                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
232       return;
233     }
234 
235     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
236       .addReg(FlatScrInitLo)
237       .addReg(ScratchWaveOffsetReg);
238     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
239       .addReg(FlatScrInitHi)
240       .addImm(0);
241 
242     return;
243   }
244 
245   assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
246 
247   // Copy the size in bytes.
248   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
249     .addReg(FlatScrInitHi, RegState::Kill);
250 
251   // Add wave offset in bytes to private base offset.
252   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
253   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
254     .addReg(FlatScrInitLo)
255     .addReg(ScratchWaveOffsetReg);
256 
257   // Convert offset to 256-byte units.
258   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
259     .addReg(FlatScrInitLo, RegState::Kill)
260     .addImm(8);
261 }
262 
263 // Shift down registers reserved for the scratch RSRC.
264 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
265     MachineFunction &MF) const {
266 
267   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
268   const SIInstrInfo *TII = ST.getInstrInfo();
269   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
270   MachineRegisterInfo &MRI = MF.getRegInfo();
271   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
272 
273   assert(MFI->isEntryFunction());
274 
275   Register ScratchRsrcReg = MFI->getScratchRSrcReg();
276 
277   if (!ScratchRsrcReg || !MRI.isPhysRegUsed(ScratchRsrcReg))
278     return Register();
279 
280   if (ST.hasSGPRInitBug() ||
281       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
282     return ScratchRsrcReg;
283 
284   // We reserved the last registers for this. Shift it down to the end of those
285   // which were actually used.
286   //
287   // FIXME: It might be safer to use a pseudoregister before replacement.
288 
289   // FIXME: We should be able to eliminate unused input registers. We only
290   // cannot do this for the resources required for scratch access. For now we
291   // skip over user SGPRs and may leave unused holes.
292 
293   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
294   ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
295   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
296 
297   // Skip the last N reserved elements because they should have already been
298   // reserved for VCC etc.
299   Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
300   for (MCPhysReg Reg : AllSGPR128s) {
301     // Pick the first unallocated one. Make sure we don't clobber the other
302     // reserved input we needed. Also for PAL, make sure we don't clobber
303     // the GIT pointer passed in SGPR0 or SGPR8.
304     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
305         !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
306       MRI.replaceRegWith(ScratchRsrcReg, Reg);
307       MFI->setScratchRSrcReg(Reg);
308       return Reg;
309     }
310   }
311 
312   return ScratchRsrcReg;
313 }
314 
315 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
316                                                 MachineBasicBlock &MBB) const {
317   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
318 
319   // FIXME: If we only have SGPR spills, we won't actually be using scratch
320   // memory since these spill to VGPRs. We should be cleaning up these unused
321   // SGPR spill frame indices somewhere.
322 
323   // FIXME: We still have implicit uses on SGPR spill instructions in case they
324   // need to spill to vector memory. It's likely that will not happen, but at
325   // this point it appears we need the setup. This part of the prolog should be
326   // emitted after frame indices are eliminated.
327 
328   // FIXME: Remove all of the isPhysRegUsed checks
329 
330   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
331   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
332   const SIInstrInfo *TII = ST.getInstrInfo();
333   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
334   MachineRegisterInfo &MRI = MF.getRegInfo();
335   const Function &F = MF.getFunction();
336 
337   assert(MFI->isEntryFunction());
338 
339   Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
340       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
341   // FIXME: Hack to not crash in situations which emitted an error.
342   if (!PreloadedScratchWaveOffsetReg)
343     return;
344 
345   // We need to do the replacement of the private segment buffer register even
346   // if there are no stack objects. There could be stores to undef or a
347   // constant without an associated object.
348   //
349   // This will return `Register()` in cases where there are no actual
350   // uses of the SRSRC.
351   Register ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
352 
353   // Make the selected register live throughout the function.
354   if (ScratchRsrcReg) {
355     for (MachineBasicBlock &OtherBB : MF) {
356       if (&OtherBB != &MBB) {
357         OtherBB.addLiveIn(ScratchRsrcReg);
358       }
359     }
360   }
361 
362   // Now that we have fixed the reserved SRSRC we need to locate the
363   // (potentially) preloaded SRSRC.
364   Register PreloadedScratchRsrcReg;
365   if (ST.isAmdHsaOrMesa(F)) {
366     PreloadedScratchRsrcReg =
367         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
368     if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
369       // We added live-ins during argument lowering, but since they were not
370       // used they were deleted. We're adding the uses now, so add them back.
371       MRI.addLiveIn(PreloadedScratchRsrcReg);
372       MBB.addLiveIn(PreloadedScratchRsrcReg);
373     }
374   }
375 
376   // Debug location must be unknown since the first debug location is used to
377   // determine the end of the prologue.
378   DebugLoc DL;
379   MachineBasicBlock::iterator I = MBB.begin();
380 
381   // We found the SRSRC first because it needs four registers and has an
382   // alignment requirement. If the SRSRC that we found is clobbering with
383   // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
384   // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
385   // wave offset to a free SGPR.
386   Register ScratchWaveOffsetReg;
387   if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
388     ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
389     unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
390     AllSGPRs = AllSGPRs.slice(
391         std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
392     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
393     for (MCPhysReg Reg : AllSGPRs) {
394       if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
395           !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
396         ScratchWaveOffsetReg = Reg;
397         BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
398             .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
399         break;
400       }
401     }
402   } else {
403     ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
404   }
405   assert(ScratchWaveOffsetReg);
406 
407   if (MF.getFrameInfo().hasCalls()) {
408     Register SPReg = MFI->getStackPtrOffsetReg();
409     assert(SPReg != AMDGPU::SP_REG);
410     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
411         .addImm(MF.getFrameInfo().getStackSize() * ST.getWavefrontSize());
412   }
413 
414   if (hasFP(MF)) {
415     Register FPReg = MFI->getFrameOffsetReg();
416     assert(FPReg != AMDGPU::FP_REG);
417     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
418   }
419 
420   if (MFI->hasFlatScratchInit() || ScratchRsrcReg) {
421     MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
422     MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
423   }
424 
425   if (MFI->hasFlatScratchInit()) {
426     emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
427   }
428 
429   if (ScratchRsrcReg) {
430     emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
431                                          PreloadedScratchRsrcReg,
432                                          ScratchRsrcReg, ScratchWaveOffsetReg);
433   }
434 }
435 
436 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
437 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
438     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
439     const DebugLoc &DL, Register PreloadedScratchRsrcReg,
440     Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
441 
442   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
443   const SIInstrInfo *TII = ST.getInstrInfo();
444   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
445   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
446   const Function &Fn = MF.getFunction();
447 
448   if (ST.isAmdPalOS()) {
449     // The pointer to the GIT is formed from the offset passed in and either
450     // the amdgpu-git-ptr-high function attribute or the top part of the PC
451     Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
452     Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
453     Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
454 
455     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
456 
457     if (MFI->getGITPtrHigh() != 0xffffffff) {
458       BuildMI(MBB, I, DL, SMovB32, RsrcHi)
459         .addImm(MFI->getGITPtrHigh())
460         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
461     } else {
462       const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
463       BuildMI(MBB, I, DL, GetPC64, Rsrc01);
464     }
465     Register GitPtrLo = MFI->getGITPtrLoReg(MF);
466     MF.getRegInfo().addLiveIn(GitPtrLo);
467     MBB.addLiveIn(GitPtrLo);
468     BuildMI(MBB, I, DL, SMovB32, RsrcLo)
469       .addReg(GitPtrLo)
470       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
471 
472     // We now have the GIT ptr - now get the scratch descriptor from the entry
473     // at offset 0 (or offset 16 for a compute shader).
474     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
475     const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
476     auto MMO = MF.getMachineMemOperand(PtrInfo,
477                                        MachineMemOperand::MOLoad |
478                                            MachineMemOperand::MOInvariant |
479                                            MachineMemOperand::MODereferenceable,
480                                        16, Align(4));
481     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
482     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
483     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
484     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
485       .addReg(Rsrc01)
486       .addImm(EncodedOffset) // offset
487       .addImm(0) // glc
488       .addImm(0) // dlc
489       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
490       .addMemOperand(MMO);
491   } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
492     assert(!ST.isAmdHsaOrMesa(Fn));
493     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
494 
495     Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
496     Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
497 
498     // Use relocations to get the pointer, and setup the other bits manually.
499     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
500 
501     if (MFI->hasImplicitBufferPtr()) {
502       Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
503 
504       if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
505         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
506 
507         BuildMI(MBB, I, DL, Mov64, Rsrc01)
508           .addReg(MFI->getImplicitBufferPtrUserSGPR())
509           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
510       } else {
511         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
512 
513         MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
514         auto MMO = MF.getMachineMemOperand(
515             PtrInfo,
516             MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
517                 MachineMemOperand::MODereferenceable,
518             8, Align(4));
519         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
520           .addReg(MFI->getImplicitBufferPtrUserSGPR())
521           .addImm(0) // offset
522           .addImm(0) // glc
523           .addImm(0) // dlc
524           .addMemOperand(MMO)
525           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
526 
527         MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
528         MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
529       }
530     } else {
531       Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
532       Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
533 
534       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
535         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
536         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
537 
538       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
539         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
540         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
541 
542     }
543 
544     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
545       .addImm(Rsrc23 & 0xffffffff)
546       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
547 
548     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
549       .addImm(Rsrc23 >> 32)
550       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
551   } else if (ST.isAmdHsaOrMesa(Fn)) {
552     assert(PreloadedScratchRsrcReg);
553 
554     if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
555       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
556           .addReg(PreloadedScratchRsrcReg, RegState::Kill);
557     }
558   }
559 
560   // Add the scratch wave offset into the scratch RSRC.
561   //
562   // We only want to update the first 48 bits, which is the base address
563   // pointer, without touching the adjacent 16 bits of flags. We know this add
564   // cannot carry-out from bit 47, otherwise the scratch allocation would be
565   // impossible to fit in the 48-bit global address space.
566   //
567   // TODO: Evaluate if it is better to just construct an SRD using the flat
568   // scratch init and some constants rather than update the one we are passed.
569   Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
570   Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
571 
572   // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
573   // the kernel body via inreg arguments.
574   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
575       .addReg(ScratchRsrcSub0)
576       .addReg(ScratchWaveOffsetReg)
577       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
578   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
579       .addReg(ScratchRsrcSub1)
580       .addImm(0)
581       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
582 }
583 
584 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
585   switch (ID) {
586   case TargetStackID::Default:
587   case TargetStackID::NoAlloc:
588   case TargetStackID::SGPRSpill:
589     return true;
590   case TargetStackID::SVEVector:
591     return false;
592   }
593   llvm_unreachable("Invalid TargetStackID::Value");
594 }
595 
596 void SIFrameLowering::emitPrologue(MachineFunction &MF,
597                                    MachineBasicBlock &MBB) const {
598   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
599   if (FuncInfo->isEntryFunction()) {
600     emitEntryFunctionPrologue(MF, MBB);
601     return;
602   }
603 
604   const MachineFrameInfo &MFI = MF.getFrameInfo();
605   MachineRegisterInfo &MRI = MF.getRegInfo();
606   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
607   const SIInstrInfo *TII = ST.getInstrInfo();
608   const SIRegisterInfo &TRI = TII->getRegisterInfo();
609 
610   Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
611   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
612   LivePhysRegs LiveRegs;
613 
614   MachineBasicBlock::iterator MBBI = MBB.begin();
615   DebugLoc DL;
616 
617   bool HasFP = false;
618   uint32_t NumBytes = MFI.getStackSize();
619   uint32_t RoundedSize = NumBytes;
620   // To avoid clobbering VGPRs in lanes that weren't active on function entry,
621   // turn on all lanes before doing the spill to memory.
622   Register ScratchExecCopy;
623 
624   // Emit the copy if we need an FP, and are using a free SGPR to save it.
625   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
626     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
627       .addReg(FramePtrReg)
628       .setMIFlag(MachineInstr::FrameSetup);
629     // Make the register live throughout the function.
630     for (MachineBasicBlock &MBB : MF)
631       MBB.addLiveIn(FuncInfo->SGPRForFPSaveRestoreCopy);
632   }
633 
634   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
635          : FuncInfo->getSGPRSpillVGPRs()) {
636     if (!Reg.FI.hasValue())
637       continue;
638 
639     if (!ScratchExecCopy) {
640       if (LiveRegs.empty()) {
641         LiveRegs.init(TRI);
642         LiveRegs.addLiveIns(MBB);
643         if (FuncInfo->SGPRForFPSaveRestoreCopy)
644           LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
645       }
646 
647       ScratchExecCopy
648         = findScratchNonCalleeSaveRegister(MRI, LiveRegs,
649                                            *TRI.getWaveMaskRegClass());
650       assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
651 
652       const unsigned OrSaveExec = ST.isWave32() ?
653         AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
654       BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
655               ScratchExecCopy)
656         .addImm(-1);
657     }
658 
659     buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
660                      FuncInfo->getScratchRSrcReg(),
661                      StackPtrReg,
662                      Reg.FI.getValue());
663   }
664 
665   if (ScratchExecCopy) {
666     // FIXME: Split block and make terminator.
667     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
668     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
669     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
670       .addReg(ScratchExecCopy, RegState::Kill);
671     LiveRegs.addReg(ScratchExecCopy);
672   }
673 
674   if (FuncInfo->FramePointerSaveIndex) {
675     const int FI = FuncInfo->FramePointerSaveIndex.getValue();
676     assert(!MFI.isDeadObjectIndex(FI) &&
677            MFI.getStackID(FI) == TargetStackID::SGPRSpill);
678     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
679       = FuncInfo->getSGPRToVGPRSpills(FI);
680     assert(Spill.size() == 1);
681 
682     // Save FP before setting it up.
683     // FIXME: This should respect spillSGPRToVGPR;
684     BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
685             Spill[0].VGPR)
686       .addReg(FramePtrReg)
687       .addImm(Spill[0].Lane)
688       .addReg(Spill[0].VGPR, RegState::Undef);
689   }
690 
691   if (TRI.needsStackRealignment(MF)) {
692     HasFP = true;
693     const unsigned Alignment = MFI.getMaxAlign().value();
694 
695     RoundedSize += Alignment;
696     if (LiveRegs.empty()) {
697       LiveRegs.init(TRI);
698       LiveRegs.addLiveIns(MBB);
699       LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
700     }
701 
702     Register ScratchSPReg = findScratchNonCalleeSaveRegister(
703         MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
704     assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy);
705 
706     // s_add_u32 tmp_reg, s32, NumBytes
707     // s_and_b32 s32, tmp_reg, 0b111...0000
708     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
709       .addReg(StackPtrReg)
710       .addImm((Alignment - 1) * ST.getWavefrontSize())
711       .setMIFlag(MachineInstr::FrameSetup);
712     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
713       .addReg(ScratchSPReg, RegState::Kill)
714       .addImm(-Alignment * ST.getWavefrontSize())
715       .setMIFlag(MachineInstr::FrameSetup);
716     FuncInfo->setIsStackRealigned(true);
717   } else if ((HasFP = hasFP(MF))) {
718     // If we need a base pointer, set it up here. It's whatever the value of
719     // the stack pointer is at this point. Any variable size objects will be
720     // allocated after this, so we can still use the base pointer to reference
721     // locals.
722     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
723       .addReg(StackPtrReg)
724       .setMIFlag(MachineInstr::FrameSetup);
725   }
726 
727   if (HasFP && RoundedSize != 0) {
728     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
729       .addReg(StackPtrReg)
730       .addImm(RoundedSize * ST.getWavefrontSize())
731       .setMIFlag(MachineInstr::FrameSetup);
732   }
733 
734   assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
735                      FuncInfo->FramePointerSaveIndex)) &&
736          "Needed to save FP but didn't save it anywhere");
737 
738   assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
739                     !FuncInfo->FramePointerSaveIndex)) &&
740          "Saved FP but didn't need it");
741 }
742 
743 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
744                                    MachineBasicBlock &MBB) const {
745   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
746   if (FuncInfo->isEntryFunction())
747     return;
748 
749   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
750   const SIInstrInfo *TII = ST.getInstrInfo();
751   MachineRegisterInfo &MRI = MF.getRegInfo();
752   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
753   LivePhysRegs LiveRegs;
754   DebugLoc DL;
755 
756   const MachineFrameInfo &MFI = MF.getFrameInfo();
757   uint32_t NumBytes = MFI.getStackSize();
758   uint32_t RoundedSize = FuncInfo->isStackRealigned()
759                              ? NumBytes + MFI.getMaxAlign().value()
760                              : NumBytes;
761 
762   if (RoundedSize != 0 && hasFP(MF)) {
763     const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
764     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
765       .addReg(StackPtrReg)
766       .addImm(RoundedSize * ST.getWavefrontSize())
767       .setMIFlag(MachineInstr::FrameDestroy);
768   }
769 
770   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
771     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
772       .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
773       .setMIFlag(MachineInstr::FrameSetup);
774   }
775 
776   if (FuncInfo->FramePointerSaveIndex) {
777     const int FI = FuncInfo->FramePointerSaveIndex.getValue();
778 
779     assert(!MF.getFrameInfo().isDeadObjectIndex(FI) &&
780            MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill);
781 
782     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
783       = FuncInfo->getSGPRToVGPRSpills(FI);
784     assert(Spill.size() == 1);
785     BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
786             FuncInfo->getFrameOffsetReg())
787       .addReg(Spill[0].VGPR)
788       .addImm(Spill[0].Lane);
789   }
790 
791   Register ScratchExecCopy;
792   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
793          : FuncInfo->getSGPRSpillVGPRs()) {
794     if (!Reg.FI.hasValue())
795       continue;
796 
797     const SIRegisterInfo &TRI = TII->getRegisterInfo();
798     if (!ScratchExecCopy) {
799       // See emitPrologue
800       if (LiveRegs.empty()) {
801         LiveRegs.init(*ST.getRegisterInfo());
802         LiveRegs.addLiveOuts(MBB);
803         LiveRegs.stepBackward(*MBBI);
804       }
805 
806       ScratchExecCopy = findScratchNonCalleeSaveRegister(
807           MRI, LiveRegs, *TRI.getWaveMaskRegClass());
808       LiveRegs.removeReg(ScratchExecCopy);
809 
810       const unsigned OrSaveExec =
811           ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
812 
813       BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
814         .addImm(-1);
815     }
816 
817     buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
818                       FuncInfo->getScratchRSrcReg(),
819                       FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
820   }
821 
822   if (ScratchExecCopy) {
823     // FIXME: Split block and make terminator.
824     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
825     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
826     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
827       .addReg(ScratchExecCopy, RegState::Kill);
828   }
829 }
830 
831 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
832 // memory. They should have been removed by now.
833 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
834   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
835        I != E; ++I) {
836     if (!MFI.isDeadObjectIndex(I))
837       return false;
838   }
839 
840   return true;
841 }
842 
843 #ifndef NDEBUG
844 static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
845                                  Optional<int> FramePointerSaveIndex) {
846   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
847        I != E; ++I) {
848     if (!MFI.isDeadObjectIndex(I) &&
849         MFI.getStackID(I) == TargetStackID::SGPRSpill &&
850         FramePointerSaveIndex && I != FramePointerSaveIndex) {
851       return false;
852     }
853   }
854 
855   return true;
856 }
857 #endif
858 
859 int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
860                                             Register &FrameReg) const {
861   const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
862 
863   FrameReg = RI->getFrameRegister(MF);
864   return MF.getFrameInfo().getObjectOffset(FI);
865 }
866 
867 void SIFrameLowering::processFunctionBeforeFrameFinalized(
868   MachineFunction &MF,
869   RegScavenger *RS) const {
870   MachineFrameInfo &MFI = MF.getFrameInfo();
871 
872   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
873   const SIRegisterInfo *TRI = ST.getRegisterInfo();
874   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
875 
876   FuncInfo->removeDeadFrameIndices(MFI);
877   assert(allSGPRSpillsAreDead(MFI, None) &&
878          "SGPR spill should have been removed in SILowerSGPRSpills");
879 
880   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
881   // but currently hasNonSpillStackObjects is set only from source
882   // allocas. Stack temps produced from legalization are not counted currently.
883   if (!allStackObjectsAreDead(MFI)) {
884     assert(RS && "RegScavenger required if spilling");
885 
886     if (FuncInfo->isEntryFunction()) {
887       int ScavengeFI = MFI.CreateFixedObject(
888         TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
889       RS->addScavengingFrameIndex(ScavengeFI);
890     } else {
891       int ScavengeFI = MFI.CreateStackObject(
892         TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
893         TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass),
894         false);
895       RS->addScavengingFrameIndex(ScavengeFI);
896     }
897   }
898 }
899 
900 // Only report VGPRs to generic code.
901 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
902                                            BitVector &SavedVGPRs,
903                                            RegScavenger *RS) const {
904   TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
905   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
906   if (MFI->isEntryFunction())
907     return;
908 
909   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
910   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
911   const SIRegisterInfo *TRI = ST.getRegisterInfo();
912 
913   // Ignore the SGPRs the default implementation found.
914   SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
915 
916   // hasFP only knows about stack objects that already exist. We're now
917   // determining the stack slots that will be created, so we have to predict
918   // them. Stack objects force FP usage with calls.
919   //
920   // Note a new VGPR CSR may be introduced if one is used for the spill, but we
921   // don't want to report it here.
922   //
923   // FIXME: Is this really hasReservedCallFrame?
924   const bool WillHaveFP =
925       FrameInfo.hasCalls() &&
926       (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
927 
928   // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
929   // so don't allow the default insertion to handle them.
930   for (auto SSpill : MFI->getSGPRSpillVGPRs())
931     SavedVGPRs.reset(SSpill.VGPR);
932 
933   const bool HasFP = WillHaveFP || hasFP(MF);
934   if (!HasFP)
935     return;
936 
937   if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
938     int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
939                                                     TargetStackID::SGPRSpill);
940 
941     // If there is already a VGPR with free lanes, use it. We may already have
942     // to pay the penalty for spilling a CSR VGPR.
943     if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
944       llvm_unreachable("allocate SGPR spill should have worked");
945 
946     MFI->FramePointerSaveIndex = NewFI;
947 
948     LLVM_DEBUG(
949       auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
950       dbgs() << "Spilling FP to  " << printReg(Spill.VGPR, TRI)
951              << ':' << Spill.Lane << '\n');
952     return;
953   }
954 
955   MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
956 
957   if (!MFI->SGPRForFPSaveRestoreCopy) {
958     // There's no free lane to spill, and no free register to save FP, so we're
959     // forced to spill another VGPR to use for the spill.
960     int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
961                                                     TargetStackID::SGPRSpill);
962     if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
963       llvm_unreachable("allocate SGPR spill should have worked");
964     MFI->FramePointerSaveIndex = NewFI;
965 
966     LLVM_DEBUG(
967       auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
968       dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)
969              << ':' << Spill.Lane << '\n';);
970   } else {
971     LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
972                printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n');
973   }
974 }
975 
976 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
977                                                BitVector &SavedRegs,
978                                                RegScavenger *RS) const {
979   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
980   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
981   if (MFI->isEntryFunction())
982     return;
983 
984   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
985   const SIRegisterInfo *TRI = ST.getRegisterInfo();
986 
987   // The SP is specifically managed and we don't want extra spills of it.
988   SavedRegs.reset(MFI->getStackPtrOffsetReg());
989   SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
990 }
991 
992 bool SIFrameLowering::assignCalleeSavedSpillSlots(
993     MachineFunction &MF, const TargetRegisterInfo *TRI,
994     std::vector<CalleeSavedInfo> &CSI) const {
995   if (CSI.empty())
996     return true; // Early exit if no callee saved registers are modified!
997 
998   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
999   if (!FuncInfo->SGPRForFPSaveRestoreCopy)
1000     return false;
1001 
1002   for (auto &CS : CSI) {
1003     if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
1004       if (FuncInfo->SGPRForFPSaveRestoreCopy)
1005         CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1006       break;
1007     }
1008   }
1009 
1010   return false;
1011 }
1012 
1013 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1014   MachineFunction &MF,
1015   MachineBasicBlock &MBB,
1016   MachineBasicBlock::iterator I) const {
1017   int64_t Amount = I->getOperand(0).getImm();
1018   if (Amount == 0)
1019     return MBB.erase(I);
1020 
1021   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1022   const SIInstrInfo *TII = ST.getInstrInfo();
1023   const DebugLoc &DL = I->getDebugLoc();
1024   unsigned Opc = I->getOpcode();
1025   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1026   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1027 
1028   if (!hasReservedCallFrame(MF)) {
1029     Amount = alignTo(Amount, getStackAlign());
1030     assert(isUInt<32>(Amount) && "exceeded stack address space size");
1031     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1032     Register SPReg = MFI->getStackPtrOffsetReg();
1033 
1034     unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1035     BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1036       .addReg(SPReg)
1037       .addImm(Amount * ST.getWavefrontSize());
1038   } else if (CalleePopAmount != 0) {
1039     llvm_unreachable("is this used?");
1040   }
1041 
1042   return MBB.erase(I);
1043 }
1044 
1045 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1046   const MachineFrameInfo &MFI = MF.getFrameInfo();
1047 
1048   // For entry functions we can use an immediate offset in most cases, so the
1049   // presence of calls doesn't imply we need a distinct frame pointer.
1050   if (MFI.hasCalls() &&
1051       !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1052     // All offsets are unsigned, so need to be addressed in the same direction
1053     // as stack growth.
1054 
1055     // FIXME: This function is pretty broken, since it can be called before the
1056     // frame layout is determined or CSR spills are inserted.
1057     return MFI.getStackSize() != 0;
1058   }
1059 
1060   return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
1061     MFI.hasStackMap() || MFI.hasPatchPoint() ||
1062     MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
1063     MF.getTarget().Options.DisableFramePointerElim(MF);
1064 }
1065