1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPUSubtarget.h"
11 #include "SIInstrInfo.h"
12 #include "SIMachineFunctionInfo.h"
13 #include "SIRegisterInfo.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 
16 #include "llvm/CodeGen/LivePhysRegs.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/RegisterScavenging.h"
21 
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "frame-info"
25 
26 
27 static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST,
28                                          const MachineFunction &MF) {
29   return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
30                       ST.getMaxNumSGPRs(MF) / 4);
31 }
32 
33 // Find a scratch register that we can use at the start of the prologue to
34 // re-align the stack pointer. We avoid using callee-save registers since they
35 // may appear to be free when this is called from canUseAsPrologue (during
36 // shrink wrapping), but then no longer be free when this is called from
37 // emitPrologue.
38 //
39 // FIXME: This is a bit conservative, since in the above case we could use one
40 // of the callee-save registers as a scratch temp to re-align the stack pointer,
41 // but we would then have to make sure that we were in fact saving at least one
42 // callee-save register in the prologue, which is additional complexity that
43 // doesn't seem worth the benefit.
44 static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
45                                                  LivePhysRegs &LiveRegs,
46                                                  const TargetRegisterClass &RC,
47                                                  bool Unused = false) {
48   // Mark callee saved registers as used so we will not choose them.
49   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
50   for (unsigned i = 0; CSRegs[i]; ++i)
51     LiveRegs.addReg(CSRegs[i]);
52 
53   if (Unused) {
54     // We are looking for a register that can be used throughout the entire
55     // function, so any use is unacceptable.
56     for (unsigned Reg : RC) {
57       if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
58         return Reg;
59     }
60   } else {
61     for (unsigned Reg : RC) {
62       if (LiveRegs.available(MRI, Reg))
63         return Reg;
64     }
65   }
66 
67   // If we require an unused register, this is used in contexts where failure is
68   // an option and has an alternative plan. In other contexts, this must
69   // succeed0.
70   if (!Unused)
71     report_fatal_error("failed to find free scratch register");
72 
73   return AMDGPU::NoRegister;
74 }
75 
76 static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) {
77   LivePhysRegs LiveRegs;
78   LiveRegs.init(*MRI.getTargetRegisterInfo());
79   return findScratchNonCalleeSaveRegister(
80     MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
81 }
82 
83 // We need to specially emit stack operations here because a different frame
84 // register is used than in the rest of the function, as getFrameRegister would
85 // use.
86 static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
87                              MachineBasicBlock::iterator I,
88                              const SIInstrInfo *TII, unsigned SpillReg,
89                              unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
90   MachineFunction *MF = MBB.getParent();
91   MachineFrameInfo &MFI = MF->getFrameInfo();
92 
93   int64_t Offset = MFI.getObjectOffset(FI);
94 
95   MachineMemOperand *MMO = MF->getMachineMemOperand(
96       MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
97       MFI.getObjectAlign(FI));
98 
99   if (isUInt<12>(Offset)) {
100     BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
101       .addReg(SpillReg, RegState::Kill)
102       .addReg(ScratchRsrcReg)
103       .addReg(SPReg)
104       .addImm(Offset)
105       .addImm(0) // glc
106       .addImm(0) // slc
107       .addImm(0) // tfe
108       .addImm(0) // dlc
109       .addImm(0) // swz
110       .addMemOperand(MMO);
111     return;
112   }
113 
114   MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
115     MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
116 
117   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
118     .addImm(Offset);
119 
120   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
121     .addReg(SpillReg, RegState::Kill)
122     .addReg(OffsetReg, RegState::Kill)
123     .addReg(ScratchRsrcReg)
124     .addReg(SPReg)
125     .addImm(0)
126     .addImm(0) // glc
127     .addImm(0) // slc
128     .addImm(0) // tfe
129     .addImm(0) // dlc
130     .addImm(0) // swz
131     .addMemOperand(MMO);
132 }
133 
134 static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
135                               MachineBasicBlock::iterator I,
136                               const SIInstrInfo *TII, unsigned SpillReg,
137                               unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
138   MachineFunction *MF = MBB.getParent();
139   MachineFrameInfo &MFI = MF->getFrameInfo();
140   int64_t Offset = MFI.getObjectOffset(FI);
141 
142   MachineMemOperand *MMO = MF->getMachineMemOperand(
143       MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
144       MFI.getObjectAlign(FI));
145 
146   if (isUInt<12>(Offset)) {
147     BuildMI(MBB, I, DebugLoc(),
148             TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
149       .addReg(ScratchRsrcReg)
150       .addReg(SPReg)
151       .addImm(Offset)
152       .addImm(0) // glc
153       .addImm(0) // slc
154       .addImm(0) // tfe
155       .addImm(0) // dlc
156       .addImm(0) // swz
157       .addMemOperand(MMO);
158     return;
159   }
160 
161   MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
162     MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
163 
164   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
165     .addImm(Offset);
166 
167   BuildMI(MBB, I, DebugLoc(),
168           TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
169     .addReg(OffsetReg, RegState::Kill)
170     .addReg(ScratchRsrcReg)
171     .addReg(SPReg)
172     .addImm(0)
173     .addImm(0) // glc
174     .addImm(0) // slc
175     .addImm(0) // tfe
176     .addImm(0) // dlc
177     .addImm(0) // swz
178     .addMemOperand(MMO);
179 }
180 
181 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
182 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
183     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
184     const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
185   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
186   const SIInstrInfo *TII = ST.getInstrInfo();
187   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
188   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
189 
190   // We don't need this if we only have spills since there is no user facing
191   // scratch.
192 
193   // TODO: If we know we don't have flat instructions earlier, we can omit
194   // this from the input registers.
195   //
196   // TODO: We only need to know if we access scratch space through a flat
197   // pointer. Because we only detect if flat instructions are used at all,
198   // this will be used more often than necessary on VI.
199 
200   Register FlatScratchInitReg =
201       MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
202 
203   MachineRegisterInfo &MRI = MF.getRegInfo();
204   MRI.addLiveIn(FlatScratchInitReg);
205   MBB.addLiveIn(FlatScratchInitReg);
206 
207   Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
208   Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
209 
210   // Do a 64-bit pointer add.
211   if (ST.flatScratchIsPointer()) {
212     if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
213       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
214         .addReg(FlatScrInitLo)
215         .addReg(ScratchWaveOffsetReg);
216       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
217         .addReg(FlatScrInitHi)
218         .addImm(0);
219       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
220         addReg(FlatScrInitLo).
221         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
222                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
223       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
224         addReg(FlatScrInitHi).
225         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
226                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
227       return;
228     }
229 
230     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
231       .addReg(FlatScrInitLo)
232       .addReg(ScratchWaveOffsetReg);
233     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
234       .addReg(FlatScrInitHi)
235       .addImm(0);
236 
237     return;
238   }
239 
240   assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
241 
242   // Copy the size in bytes.
243   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
244     .addReg(FlatScrInitHi, RegState::Kill);
245 
246   // Add wave offset in bytes to private base offset.
247   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
248   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
249     .addReg(FlatScrInitLo)
250     .addReg(ScratchWaveOffsetReg);
251 
252   // Convert offset to 256-byte units.
253   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
254     .addReg(FlatScrInitLo, RegState::Kill)
255     .addImm(8);
256 }
257 
258 // Shift down registers reserved for the scratch RSRC.
259 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
260     MachineFunction &MF, Register ScratchWaveOffsetReg) const {
261 
262   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
263   const SIInstrInfo *TII = ST.getInstrInfo();
264   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
265   MachineRegisterInfo &MRI = MF.getRegInfo();
266   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
267 
268   assert(MFI->isEntryFunction());
269 
270   Register ScratchRsrcReg = MFI->getScratchRSrcReg();
271 
272   if (ScratchRsrcReg == AMDGPU::NoRegister ||
273       !MRI.isPhysRegUsed(ScratchRsrcReg))
274     return AMDGPU::NoRegister;
275 
276   if (ST.hasSGPRInitBug() ||
277       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
278     return ScratchRsrcReg;
279 
280   // We reserved the last registers for this. Shift it down to the end of those
281   // which were actually used.
282   //
283   // FIXME: It might be safer to use a pseudoregister before replacement.
284 
285   // FIXME: We should be able to eliminate unused input registers. We only
286   // cannot do this for the resources required for scratch access. For now we
287   // skip over user SGPRs and may leave unused holes.
288 
289   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
290   ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
291   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
292 
293   // Skip the last N reserved elements because they should have already been
294   // reserved for VCC etc.
295   for (MCPhysReg Reg : AllSGPR128s) {
296     // Pick the first unallocated one. Make sure we don't clobber the other
297     // reserved input we needed.
298     //
299     // FIXME: The preloaded SGPR count is not accurate for shaders as the
300     // scratch wave offset may be in a fixed SGPR or
301     // SITargetLowering::allocateSystemSGPRs may choose some free SGPR for the
302     // scratch wave offset. We explicitly avoid the scratch wave offset to
303     // account for this.
304     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
305         !TRI->isSubRegisterEq(Reg, ScratchWaveOffsetReg)) {
306       MRI.replaceRegWith(ScratchRsrcReg, Reg);
307       MFI->setScratchRSrcReg(Reg);
308       return Reg;
309     }
310   }
311 
312   return ScratchRsrcReg;
313 }
314 
315 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
316                                                 MachineBasicBlock &MBB) const {
317   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
318 
319   // FIXME: If we only have SGPR spills, we won't actually be using scratch
320   // memory since these spill to VGPRs. We should be cleaning up these unused
321   // SGPR spill frame indices somewhere.
322 
323   // FIXME: We still have implicit uses on SGPR spill instructions in case they
324   // need to spill to vector memory. It's likely that will not happen, but at
325   // this point it appears we need the setup. This part of the prolog should be
326   // emitted after frame indices are eliminated.
327 
328   // FIXME: Remove all of the isPhysRegUsed checks
329 
330   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
331   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
332   const SIInstrInfo *TII = ST.getInstrInfo();
333   MachineRegisterInfo &MRI = MF.getRegInfo();
334   const Function &F = MF.getFunction();
335 
336   assert(MFI->isEntryFunction());
337 
338   Register ScratchWaveOffsetReg = MFI->getPreloadedReg(
339       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
340   // FIXME: Hack to not crash in situations which emitted an error.
341   if (ScratchWaveOffsetReg == AMDGPU::NoRegister)
342     return;
343 
344   // We need to do the replacement of the private segment buffer register even
345   // if there are no stack objects. There could be stores to undef or a
346   // constant without an associated object.
347   //
348   // This will return `AMDGPU::NoRegister` in cases where there are no actual
349   // uses of the SRSRC.
350   Register ScratchRsrcReg =
351       getEntryFunctionReservedScratchRsrcReg(MF, ScratchWaveOffsetReg);
352 
353   // Make the selected register live throughout the function.
354   if (ScratchRsrcReg != AMDGPU::NoRegister) {
355     for (MachineBasicBlock &OtherBB : MF) {
356       if (&OtherBB != &MBB) {
357         OtherBB.addLiveIn(ScratchRsrcReg);
358       }
359     }
360   }
361 
362   // Now that we have fixed the reserved SRSRC we need to locate the
363   // (potentially) preloaded SRSRC.
364   Register PreloadedScratchRsrcReg = AMDGPU::NoRegister;
365   if (ST.isAmdHsaOrMesa(F)) {
366     PreloadedScratchRsrcReg =
367         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
368     if (ScratchRsrcReg != AMDGPU::NoRegister &&
369         PreloadedScratchRsrcReg != AMDGPU::NoRegister) {
370       // We added live-ins during argument lowering, but since they were not
371       // used they were deleted. We're adding the uses now, so add them back.
372       MRI.addLiveIn(PreloadedScratchRsrcReg);
373       MBB.addLiveIn(PreloadedScratchRsrcReg);
374     }
375   }
376 
377   // Debug location must be unknown since the first debug location is used to
378   // determine the end of the prologue.
379   DebugLoc DL;
380   MachineBasicBlock::iterator I = MBB.begin();
381 
382   if (MF.getFrameInfo().hasCalls()) {
383     Register SPReg = MFI->getStackPtrOffsetReg();
384     assert(SPReg != AMDGPU::SP_REG);
385     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
386         .addImm(MF.getFrameInfo().getStackSize() * ST.getWavefrontSize());
387   }
388 
389   if (hasFP(MF)) {
390     Register FPReg = MFI->getFrameOffsetReg();
391     assert(FPReg != AMDGPU::FP_REG);
392     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
393   }
394 
395   if (MFI->hasFlatScratchInit() || ScratchRsrcReg != AMDGPU::NoRegister) {
396     MRI.addLiveIn(ScratchWaveOffsetReg);
397     MBB.addLiveIn(ScratchWaveOffsetReg);
398   }
399 
400   if (MFI->hasFlatScratchInit()) {
401     emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
402   }
403 
404   if (ScratchRsrcReg != AMDGPU::NoRegister) {
405     emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
406                                          PreloadedScratchRsrcReg,
407                                          ScratchRsrcReg, ScratchWaveOffsetReg);
408   }
409 }
410 
411 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
412 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
413     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
414     const DebugLoc &DL, Register PreloadedScratchRsrcReg,
415     Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
416 
417   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
418   const SIInstrInfo *TII = ST.getInstrInfo();
419   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
420   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
421   const Function &Fn = MF.getFunction();
422 
423   if (ST.isAmdPalOS()) {
424     // The pointer to the GIT is formed from the offset passed in and either
425     // the amdgpu-git-ptr-high function attribute or the top part of the PC
426     Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
427     Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
428     Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
429 
430     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
431 
432     if (MFI->getGITPtrHigh() != 0xffffffff) {
433       BuildMI(MBB, I, DL, SMovB32, RsrcHi)
434         .addImm(MFI->getGITPtrHigh())
435         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
436     } else {
437       const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
438       BuildMI(MBB, I, DL, GetPC64, Rsrc01);
439     }
440     auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
441     if (ST.hasMergedShaders()) {
442       switch (MF.getFunction().getCallingConv()) {
443         case CallingConv::AMDGPU_HS:
444         case CallingConv::AMDGPU_GS:
445           // Low GIT address is passed in s8 rather than s0 for an LS+HS or
446           // ES+GS merged shader on gfx9+.
447           GitPtrLo = AMDGPU::SGPR8;
448           break;
449         default:
450           break;
451       }
452     }
453     MF.getRegInfo().addLiveIn(GitPtrLo);
454     MBB.addLiveIn(GitPtrLo);
455     BuildMI(MBB, I, DL, SMovB32, RsrcLo)
456       .addReg(GitPtrLo)
457       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
458 
459     // We now have the GIT ptr - now get the scratch descriptor from the entry
460     // at offset 0 (or offset 16 for a compute shader).
461     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
462     const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
463     auto MMO = MF.getMachineMemOperand(PtrInfo,
464                                        MachineMemOperand::MOLoad |
465                                            MachineMemOperand::MOInvariant |
466                                            MachineMemOperand::MODereferenceable,
467                                        16, Align(4));
468     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
469     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
470     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
471     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
472       .addReg(Rsrc01)
473       .addImm(EncodedOffset) // offset
474       .addImm(0) // glc
475       .addImm(0) // dlc
476       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
477       .addMemOperand(MMO);
478   } else if (ST.isMesaGfxShader(Fn) ||
479              (PreloadedScratchRsrcReg == AMDGPU::NoRegister)) {
480     assert(!ST.isAmdHsaOrMesa(Fn));
481     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
482 
483     Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
484     Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
485 
486     // Use relocations to get the pointer, and setup the other bits manually.
487     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
488 
489     if (MFI->hasImplicitBufferPtr()) {
490       Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
491 
492       if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
493         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
494 
495         BuildMI(MBB, I, DL, Mov64, Rsrc01)
496           .addReg(MFI->getImplicitBufferPtrUserSGPR())
497           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
498       } else {
499         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
500 
501         MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
502         auto MMO = MF.getMachineMemOperand(
503             PtrInfo,
504             MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
505                 MachineMemOperand::MODereferenceable,
506             8, Align(4));
507         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
508           .addReg(MFI->getImplicitBufferPtrUserSGPR())
509           .addImm(0) // offset
510           .addImm(0) // glc
511           .addImm(0) // dlc
512           .addMemOperand(MMO)
513           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
514 
515         MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
516         MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
517       }
518     } else {
519       Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
520       Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
521 
522       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
523         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
524         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
525 
526       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
527         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
528         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
529 
530     }
531 
532     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
533       .addImm(Rsrc23 & 0xffffffff)
534       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
535 
536     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
537       .addImm(Rsrc23 >> 32)
538       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
539   } else if (ST.isAmdHsaOrMesa(Fn)) {
540     assert(PreloadedScratchRsrcReg != AMDGPU::NoRegister);
541 
542     if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
543       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
544           .addReg(PreloadedScratchRsrcReg, RegState::Kill);
545     }
546   }
547 
548   // Add the scratch wave offset into the scratch RSRC.
549   //
550   // We only want to update the first 48 bits, which is the base address
551   // pointer, without touching the adjacent 16 bits of flags. We know this add
552   // cannot carry-out from bit 47, otherwise the scratch allocation would be
553   // impossible to fit in the 48-bit global address space.
554   //
555   // TODO: Evaluate if it is better to just construct an SRD using the flat
556   // scratch init and some constants rather than update the one we are passed.
557   Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
558   Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
559 
560   // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
561   // the kernel body via inreg arguments.
562   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
563       .addReg(ScratchRsrcSub0)
564       .addReg(ScratchWaveOffsetReg)
565       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
566   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
567       .addReg(ScratchRsrcSub1)
568       .addImm(0)
569       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
570 }
571 
572 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
573   switch (ID) {
574   case TargetStackID::Default:
575   case TargetStackID::NoAlloc:
576   case TargetStackID::SGPRSpill:
577     return true;
578   case TargetStackID::SVEVector:
579     return false;
580   }
581   llvm_unreachable("Invalid TargetStackID::Value");
582 }
583 
584 void SIFrameLowering::emitPrologue(MachineFunction &MF,
585                                    MachineBasicBlock &MBB) const {
586   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
587   if (FuncInfo->isEntryFunction()) {
588     emitEntryFunctionPrologue(MF, MBB);
589     return;
590   }
591 
592   const MachineFrameInfo &MFI = MF.getFrameInfo();
593   MachineRegisterInfo &MRI = MF.getRegInfo();
594   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
595   const SIInstrInfo *TII = ST.getInstrInfo();
596   const SIRegisterInfo &TRI = TII->getRegisterInfo();
597 
598   unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
599   unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
600   LivePhysRegs LiveRegs;
601 
602   MachineBasicBlock::iterator MBBI = MBB.begin();
603   DebugLoc DL;
604 
605   bool HasFP = false;
606   uint32_t NumBytes = MFI.getStackSize();
607   uint32_t RoundedSize = NumBytes;
608   // To avoid clobbering VGPRs in lanes that weren't active on function entry,
609   // turn on all lanes before doing the spill to memory.
610   unsigned ScratchExecCopy = AMDGPU::NoRegister;
611 
612   // Emit the copy if we need an FP, and are using a free SGPR to save it.
613   if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
614     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
615       .addReg(FramePtrReg)
616       .setMIFlag(MachineInstr::FrameSetup);
617   }
618 
619   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
620          : FuncInfo->getSGPRSpillVGPRs()) {
621     if (!Reg.FI.hasValue())
622       continue;
623 
624     if (ScratchExecCopy == AMDGPU::NoRegister) {
625       if (LiveRegs.empty()) {
626         LiveRegs.init(TRI);
627         LiveRegs.addLiveIns(MBB);
628         if (FuncInfo->SGPRForFPSaveRestoreCopy)
629           LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
630       }
631 
632       ScratchExecCopy
633         = findScratchNonCalleeSaveRegister(MRI, LiveRegs,
634                                            *TRI.getWaveMaskRegClass());
635       assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
636 
637       const unsigned OrSaveExec = ST.isWave32() ?
638         AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
639       BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
640               ScratchExecCopy)
641         .addImm(-1);
642     }
643 
644     buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
645                      FuncInfo->getScratchRSrcReg(),
646                      StackPtrReg,
647                      Reg.FI.getValue());
648   }
649 
650   if (ScratchExecCopy != AMDGPU::NoRegister) {
651     // FIXME: Split block and make terminator.
652     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
653     unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
654     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
655       .addReg(ScratchExecCopy, RegState::Kill);
656     LiveRegs.addReg(ScratchExecCopy);
657   }
658 
659 
660   if (FuncInfo->FramePointerSaveIndex) {
661     const int FI = FuncInfo->FramePointerSaveIndex.getValue();
662     assert(!MFI.isDeadObjectIndex(FI) &&
663            MFI.getStackID(FI) == TargetStackID::SGPRSpill);
664     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
665       = FuncInfo->getSGPRToVGPRSpills(FI);
666     assert(Spill.size() == 1);
667 
668     // Save FP before setting it up.
669     // FIXME: This should respect spillSGPRToVGPR;
670     BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
671             Spill[0].VGPR)
672       .addReg(FramePtrReg)
673       .addImm(Spill[0].Lane)
674       .addReg(Spill[0].VGPR, RegState::Undef);
675   }
676 
677   if (TRI.needsStackRealignment(MF)) {
678     HasFP = true;
679     const unsigned Alignment = MFI.getMaxAlign().value();
680 
681     RoundedSize += Alignment;
682     if (LiveRegs.empty()) {
683       LiveRegs.init(TRI);
684       LiveRegs.addLiveIns(MBB);
685       LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
686     }
687 
688     unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(
689         MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
690     assert(ScratchSPReg != AMDGPU::NoRegister &&
691            ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy);
692 
693     // s_add_u32 tmp_reg, s32, NumBytes
694     // s_and_b32 s32, tmp_reg, 0b111...0000
695     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
696       .addReg(StackPtrReg)
697       .addImm((Alignment - 1) * ST.getWavefrontSize())
698       .setMIFlag(MachineInstr::FrameSetup);
699     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
700       .addReg(ScratchSPReg, RegState::Kill)
701       .addImm(-Alignment * ST.getWavefrontSize())
702       .setMIFlag(MachineInstr::FrameSetup);
703     FuncInfo->setIsStackRealigned(true);
704   } else if ((HasFP = hasFP(MF))) {
705     // If we need a base pointer, set it up here. It's whatever the value of
706     // the stack pointer is at this point. Any variable size objects will be
707     // allocated after this, so we can still use the base pointer to reference
708     // locals.
709     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
710       .addReg(StackPtrReg)
711       .setMIFlag(MachineInstr::FrameSetup);
712   }
713 
714   if (HasFP && RoundedSize != 0) {
715     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
716       .addReg(StackPtrReg)
717       .addImm(RoundedSize * ST.getWavefrontSize())
718       .setMIFlag(MachineInstr::FrameSetup);
719   }
720 
721   assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister ||
722                      FuncInfo->FramePointerSaveIndex)) &&
723          "Needed to save FP but didn't save it anywhere");
724 
725   assert((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister &&
726                     !FuncInfo->FramePointerSaveIndex)) &&
727          "Saved FP but didn't need it");
728 }
729 
730 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
731                                    MachineBasicBlock &MBB) const {
732   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
733   if (FuncInfo->isEntryFunction())
734     return;
735 
736   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
737   const SIInstrInfo *TII = ST.getInstrInfo();
738   MachineRegisterInfo &MRI = MF.getRegInfo();
739   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
740   LivePhysRegs LiveRegs;
741   DebugLoc DL;
742 
743   const MachineFrameInfo &MFI = MF.getFrameInfo();
744   uint32_t NumBytes = MFI.getStackSize();
745   uint32_t RoundedSize = FuncInfo->isStackRealigned()
746                              ? NumBytes + MFI.getMaxAlign().value()
747                              : NumBytes;
748 
749   if (RoundedSize != 0 && hasFP(MF)) {
750     const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
751     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
752       .addReg(StackPtrReg)
753       .addImm(RoundedSize * ST.getWavefrontSize())
754       .setMIFlag(MachineInstr::FrameDestroy);
755   }
756 
757   if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
758     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
759       .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
760       .setMIFlag(MachineInstr::FrameSetup);
761   }
762 
763   if (FuncInfo->FramePointerSaveIndex) {
764     const int FI = FuncInfo->FramePointerSaveIndex.getValue();
765 
766     assert(!MF.getFrameInfo().isDeadObjectIndex(FI) &&
767            MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill);
768 
769     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
770       = FuncInfo->getSGPRToVGPRSpills(FI);
771     assert(Spill.size() == 1);
772     BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
773             FuncInfo->getFrameOffsetReg())
774       .addReg(Spill[0].VGPR)
775       .addImm(Spill[0].Lane);
776   }
777 
778   unsigned ScratchExecCopy = AMDGPU::NoRegister;
779   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
780          : FuncInfo->getSGPRSpillVGPRs()) {
781     if (!Reg.FI.hasValue())
782       continue;
783 
784     const SIRegisterInfo &TRI = TII->getRegisterInfo();
785     if (ScratchExecCopy == AMDGPU::NoRegister) {
786       // See emitPrologue
787       if (LiveRegs.empty()) {
788         LiveRegs.init(*ST.getRegisterInfo());
789         LiveRegs.addLiveOuts(MBB);
790         LiveRegs.stepBackward(*MBBI);
791       }
792 
793       ScratchExecCopy = findScratchNonCalleeSaveRegister(
794           MRI, LiveRegs, *TRI.getWaveMaskRegClass());
795       LiveRegs.removeReg(ScratchExecCopy);
796 
797       const unsigned OrSaveExec =
798           ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
799 
800       BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
801         .addImm(-1);
802     }
803 
804     buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
805                       FuncInfo->getScratchRSrcReg(),
806                       FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
807   }
808 
809   if (ScratchExecCopy != AMDGPU::NoRegister) {
810     // FIXME: Split block and make terminator.
811     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
812     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
813     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
814       .addReg(ScratchExecCopy, RegState::Kill);
815   }
816 }
817 
818 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
819 // memory. They should have been removed by now.
820 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
821   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
822        I != E; ++I) {
823     if (!MFI.isDeadObjectIndex(I))
824       return false;
825   }
826 
827   return true;
828 }
829 
830 #ifndef NDEBUG
831 static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
832                                  Optional<int> FramePointerSaveIndex) {
833   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
834        I != E; ++I) {
835     if (!MFI.isDeadObjectIndex(I) &&
836         MFI.getStackID(I) == TargetStackID::SGPRSpill &&
837         FramePointerSaveIndex && I != FramePointerSaveIndex) {
838       return false;
839     }
840   }
841 
842   return true;
843 }
844 #endif
845 
846 int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
847                                             Register &FrameReg) const {
848   const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
849 
850   FrameReg = RI->getFrameRegister(MF);
851   return MF.getFrameInfo().getObjectOffset(FI);
852 }
853 
854 void SIFrameLowering::processFunctionBeforeFrameFinalized(
855   MachineFunction &MF,
856   RegScavenger *RS) const {
857   MachineFrameInfo &MFI = MF.getFrameInfo();
858 
859   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
860   const SIRegisterInfo *TRI = ST.getRegisterInfo();
861   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
862 
863   FuncInfo->removeDeadFrameIndices(MFI);
864   assert(allSGPRSpillsAreDead(MFI, None) &&
865          "SGPR spill should have been removed in SILowerSGPRSpills");
866 
867   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
868   // but currently hasNonSpillStackObjects is set only from source
869   // allocas. Stack temps produced from legalization are not counted currently.
870   if (!allStackObjectsAreDead(MFI)) {
871     assert(RS && "RegScavenger required if spilling");
872 
873     if (FuncInfo->isEntryFunction()) {
874       int ScavengeFI = MFI.CreateFixedObject(
875         TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
876       RS->addScavengingFrameIndex(ScavengeFI);
877     } else {
878       int ScavengeFI = MFI.CreateStackObject(
879         TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
880         TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass),
881         false);
882       RS->addScavengingFrameIndex(ScavengeFI);
883     }
884   }
885 }
886 
887 // Only report VGPRs to generic code.
888 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
889                                            BitVector &SavedVGPRs,
890                                            RegScavenger *RS) const {
891   TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
892   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
893   if (MFI->isEntryFunction())
894     return;
895 
896   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
897   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
898   const SIRegisterInfo *TRI = ST.getRegisterInfo();
899 
900   // Ignore the SGPRs the default implementation found.
901   SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
902 
903   // hasFP only knows about stack objects that already exist. We're now
904   // determining the stack slots that will be created, so we have to predict
905   // them. Stack objects force FP usage with calls.
906   //
907   // Note a new VGPR CSR may be introduced if one is used for the spill, but we
908   // don't want to report it here.
909   //
910   // FIXME: Is this really hasReservedCallFrame?
911   const bool WillHaveFP =
912       FrameInfo.hasCalls() &&
913       (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
914 
915   // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
916   // so don't allow the default insertion to handle them.
917   for (auto SSpill : MFI->getSGPRSpillVGPRs())
918     SavedVGPRs.reset(SSpill.VGPR);
919 
920   const bool HasFP = WillHaveFP || hasFP(MF);
921   if (!HasFP)
922     return;
923 
924   if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
925     int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
926                                                     TargetStackID::SGPRSpill);
927 
928     // If there is already a VGPR with free lanes, use it. We may already have
929     // to pay the penalty for spilling a CSR VGPR.
930     if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
931       llvm_unreachable("allocate SGPR spill should have worked");
932 
933     MFI->FramePointerSaveIndex = NewFI;
934 
935     LLVM_DEBUG(
936       auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
937       dbgs() << "Spilling FP to  " << printReg(Spill.VGPR, TRI)
938              << ':' << Spill.Lane << '\n');
939     return;
940   }
941 
942   MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
943 
944   if (!MFI->SGPRForFPSaveRestoreCopy) {
945     // There's no free lane to spill, and no free register to save FP, so we're
946     // forced to spill another VGPR to use for the spill.
947     int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
948                                                     TargetStackID::SGPRSpill);
949     if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
950       llvm_unreachable("allocate SGPR spill should have worked");
951     MFI->FramePointerSaveIndex = NewFI;
952 
953     LLVM_DEBUG(
954       auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
955       dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)
956              << ':' << Spill.Lane << '\n';);
957   } else {
958     LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
959                printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n');
960   }
961 }
962 
963 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
964                                                BitVector &SavedRegs,
965                                                RegScavenger *RS) const {
966   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
967   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
968   if (MFI->isEntryFunction())
969     return;
970 
971   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
972   const SIRegisterInfo *TRI = ST.getRegisterInfo();
973 
974   // The SP is specifically managed and we don't want extra spills of it.
975   SavedRegs.reset(MFI->getStackPtrOffsetReg());
976   SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
977 }
978 
979 bool SIFrameLowering::assignCalleeSavedSpillSlots(
980     MachineFunction &MF, const TargetRegisterInfo *TRI,
981     std::vector<CalleeSavedInfo> &CSI) const {
982   if (CSI.empty())
983     return true; // Early exit if no callee saved registers are modified!
984 
985   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
986   if (!FuncInfo->SGPRForFPSaveRestoreCopy)
987     return false;
988 
989   for (auto &CS : CSI) {
990     if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
991       if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister)
992         CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
993       break;
994     }
995   }
996 
997   return false;
998 }
999 
1000 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1001   MachineFunction &MF,
1002   MachineBasicBlock &MBB,
1003   MachineBasicBlock::iterator I) const {
1004   int64_t Amount = I->getOperand(0).getImm();
1005   if (Amount == 0)
1006     return MBB.erase(I);
1007 
1008   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1009   const SIInstrInfo *TII = ST.getInstrInfo();
1010   const DebugLoc &DL = I->getDebugLoc();
1011   unsigned Opc = I->getOpcode();
1012   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1013   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1014 
1015   if (!hasReservedCallFrame(MF)) {
1016     Amount = alignTo(Amount, getStackAlign());
1017     assert(isUInt<32>(Amount) && "exceeded stack address space size");
1018     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1019     unsigned SPReg = MFI->getStackPtrOffsetReg();
1020 
1021     unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1022     BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1023       .addReg(SPReg)
1024       .addImm(Amount * ST.getWavefrontSize());
1025   } else if (CalleePopAmount != 0) {
1026     llvm_unreachable("is this used?");
1027   }
1028 
1029   return MBB.erase(I);
1030 }
1031 
1032 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1033   const MachineFrameInfo &MFI = MF.getFrameInfo();
1034 
1035   // For entry functions we can use an immediate offset in most cases, so the
1036   // presence of calls doesn't imply we need a distinct frame pointer.
1037   if (MFI.hasCalls() &&
1038       !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1039     // All offsets are unsigned, so need to be addressed in the same direction
1040     // as stack growth.
1041 
1042     // FIXME: This function is pretty broken, since it can be called before the
1043     // frame layout is determined or CSR spills are inserted.
1044     return MFI.getStackSize() != 0;
1045   }
1046 
1047   return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
1048     MFI.hasStackMap() || MFI.hasPatchPoint() ||
1049     MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
1050     MF.getTarget().Options.DisableFramePointerElim(MF);
1051 }
1052