1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPUSubtarget.h"
11 #include "SIInstrInfo.h"
12 #include "SIMachineFunctionInfo.h"
13 #include "SIRegisterInfo.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 
16 #include "llvm/CodeGen/LivePhysRegs.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/RegisterScavenging.h"
21 
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "frame-info"
25 
26 
27 static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST,
28                                          const MachineFunction &MF) {
29   return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
30                       ST.getMaxNumSGPRs(MF) / 4);
31 }
32 
33 // Find a scratch register that we can use at the start of the prologue to
34 // re-align the stack pointer. We avoid using callee-save registers since they
35 // may appear to be free when this is called from canUseAsPrologue (during
36 // shrink wrapping), but then no longer be free when this is called from
37 // emitPrologue.
38 //
39 // FIXME: This is a bit conservative, since in the above case we could use one
40 // of the callee-save registers as a scratch temp to re-align the stack pointer,
41 // but we would then have to make sure that we were in fact saving at least one
42 // callee-save register in the prologue, which is additional complexity that
43 // doesn't seem worth the benefit.
44 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
45                                                    LivePhysRegs &LiveRegs,
46                                                    const TargetRegisterClass &RC,
47                                                    bool Unused = false) {
48   // Mark callee saved registers as used so we will not choose them.
49   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
50   for (unsigned i = 0; CSRegs[i]; ++i)
51     LiveRegs.addReg(CSRegs[i]);
52 
53   if (Unused) {
54     // We are looking for a register that can be used throughout the entire
55     // function, so any use is unacceptable.
56     for (MCRegister Reg : RC) {
57       if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
58         return Reg;
59     }
60   } else {
61     for (MCRegister Reg : RC) {
62       if (LiveRegs.available(MRI, Reg))
63         return Reg;
64     }
65   }
66 
67   // If we require an unused register, this is used in contexts where failure is
68   // an option and has an alternative plan. In other contexts, this must
69   // succeed0.
70   if (!Unused)
71     report_fatal_error("failed to find free scratch register");
72 
73   return MCRegister();
74 }
75 
76 static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) {
77   LivePhysRegs LiveRegs;
78   LiveRegs.init(*MRI.getTargetRegisterInfo());
79   return findScratchNonCalleeSaveRegister(
80     MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
81 }
82 
83 // We need to specially emit stack operations here because a different frame
84 // register is used than in the rest of the function, as getFrameRegister would
85 // use.
86 static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
87                              MachineBasicBlock::iterator I,
88                              const SIInstrInfo *TII, Register SpillReg,
89                              Register ScratchRsrcReg, Register SPReg, int FI) {
90   MachineFunction *MF = MBB.getParent();
91   MachineFrameInfo &MFI = MF->getFrameInfo();
92 
93   int64_t Offset = MFI.getObjectOffset(FI);
94 
95   MachineMemOperand *MMO = MF->getMachineMemOperand(
96       MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
97       MFI.getObjectAlign(FI));
98 
99   if (isUInt<12>(Offset)) {
100     BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
101       .addReg(SpillReg, RegState::Kill)
102       .addReg(ScratchRsrcReg)
103       .addReg(SPReg)
104       .addImm(Offset)
105       .addImm(0) // glc
106       .addImm(0) // slc
107       .addImm(0) // tfe
108       .addImm(0) // dlc
109       .addImm(0) // swz
110       .addMemOperand(MMO);
111     return;
112   }
113 
114   MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
115     MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
116 
117   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
118     .addImm(Offset);
119 
120   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
121     .addReg(SpillReg, RegState::Kill)
122     .addReg(OffsetReg, RegState::Kill)
123     .addReg(ScratchRsrcReg)
124     .addReg(SPReg)
125     .addImm(0)
126     .addImm(0) // glc
127     .addImm(0) // slc
128     .addImm(0) // tfe
129     .addImm(0) // dlc
130     .addImm(0) // swz
131     .addMemOperand(MMO);
132 }
133 
134 static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
135                               MachineBasicBlock::iterator I,
136                               const SIInstrInfo *TII, Register SpillReg,
137                               Register ScratchRsrcReg, Register SPReg, int FI) {
138   MachineFunction *MF = MBB.getParent();
139   MachineFrameInfo &MFI = MF->getFrameInfo();
140   int64_t Offset = MFI.getObjectOffset(FI);
141 
142   MachineMemOperand *MMO = MF->getMachineMemOperand(
143       MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
144       MFI.getObjectAlign(FI));
145 
146   if (isUInt<12>(Offset)) {
147     BuildMI(MBB, I, DebugLoc(),
148             TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
149       .addReg(ScratchRsrcReg)
150       .addReg(SPReg)
151       .addImm(Offset)
152       .addImm(0) // glc
153       .addImm(0) // slc
154       .addImm(0) // tfe
155       .addImm(0) // dlc
156       .addImm(0) // swz
157       .addMemOperand(MMO);
158     return;
159   }
160 
161   MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
162     MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
163 
164   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
165     .addImm(Offset);
166 
167   BuildMI(MBB, I, DebugLoc(),
168           TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
169     .addReg(OffsetReg, RegState::Kill)
170     .addReg(ScratchRsrcReg)
171     .addReg(SPReg)
172     .addImm(0)
173     .addImm(0) // glc
174     .addImm(0) // slc
175     .addImm(0) // tfe
176     .addImm(0) // dlc
177     .addImm(0) // swz
178     .addMemOperand(MMO);
179 }
180 
181 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
182 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
183     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
184     const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
185   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
186   const SIInstrInfo *TII = ST.getInstrInfo();
187   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
188   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
189 
190   // We don't need this if we only have spills since there is no user facing
191   // scratch.
192 
193   // TODO: If we know we don't have flat instructions earlier, we can omit
194   // this from the input registers.
195   //
196   // TODO: We only need to know if we access scratch space through a flat
197   // pointer. Because we only detect if flat instructions are used at all,
198   // this will be used more often than necessary on VI.
199 
200   Register FlatScratchInitReg =
201       MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
202 
203   MachineRegisterInfo &MRI = MF.getRegInfo();
204   MRI.addLiveIn(FlatScratchInitReg);
205   MBB.addLiveIn(FlatScratchInitReg);
206 
207   Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
208   Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
209 
210   // Do a 64-bit pointer add.
211   if (ST.flatScratchIsPointer()) {
212     if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
213       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
214         .addReg(FlatScrInitLo)
215         .addReg(ScratchWaveOffsetReg);
216       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
217         .addReg(FlatScrInitHi)
218         .addImm(0);
219       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
220         addReg(FlatScrInitLo).
221         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
222                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
223       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
224         addReg(FlatScrInitHi).
225         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
226                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
227       return;
228     }
229 
230     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
231       .addReg(FlatScrInitLo)
232       .addReg(ScratchWaveOffsetReg);
233     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
234       .addReg(FlatScrInitHi)
235       .addImm(0);
236 
237     return;
238   }
239 
240   assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
241 
242   // Copy the size in bytes.
243   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
244     .addReg(FlatScrInitHi, RegState::Kill);
245 
246   // Add wave offset in bytes to private base offset.
247   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
248   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
249     .addReg(FlatScrInitLo)
250     .addReg(ScratchWaveOffsetReg);
251 
252   // Convert offset to 256-byte units.
253   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
254     .addReg(FlatScrInitLo, RegState::Kill)
255     .addImm(8);
256 }
257 
258 // Shift down registers reserved for the scratch RSRC.
259 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
260     MachineFunction &MF, Register ScratchWaveOffsetReg) const {
261 
262   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
263   const SIInstrInfo *TII = ST.getInstrInfo();
264   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
265   MachineRegisterInfo &MRI = MF.getRegInfo();
266   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
267 
268   assert(MFI->isEntryFunction());
269 
270   Register ScratchRsrcReg = MFI->getScratchRSrcReg();
271 
272   if (ScratchRsrcReg == AMDGPU::NoRegister ||
273       !MRI.isPhysRegUsed(ScratchRsrcReg))
274     return AMDGPU::NoRegister;
275 
276   if (ST.hasSGPRInitBug() ||
277       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
278     return ScratchRsrcReg;
279 
280   // We reserved the last registers for this. Shift it down to the end of those
281   // which were actually used.
282   //
283   // FIXME: It might be safer to use a pseudoregister before replacement.
284 
285   // FIXME: We should be able to eliminate unused input registers. We only
286   // cannot do this for the resources required for scratch access. For now we
287   // skip over user SGPRs and may leave unused holes.
288 
289   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
290   ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
291   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
292 
293   // Skip the last N reserved elements because they should have already been
294   // reserved for VCC etc.
295   for (MCPhysReg Reg : AllSGPR128s) {
296     // Pick the first unallocated one. Make sure we don't clobber the other
297     // reserved input we needed.
298     //
299     // FIXME: The preloaded SGPR count is not accurate for shaders as the
300     // scratch wave offset may be in a fixed SGPR or
301     // SITargetLowering::allocateSystemSGPRs may choose some free SGPR for the
302     // scratch wave offset. We explicitly avoid the scratch wave offset to
303     // account for this.
304     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
305         !TRI->isSubRegisterEq(Reg, ScratchWaveOffsetReg)) {
306       MRI.replaceRegWith(ScratchRsrcReg, Reg);
307       MFI->setScratchRSrcReg(Reg);
308       return Reg;
309     }
310   }
311 
312   return ScratchRsrcReg;
313 }
314 
315 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
316                                                 MachineBasicBlock &MBB) const {
317   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
318 
319   // FIXME: If we only have SGPR spills, we won't actually be using scratch
320   // memory since these spill to VGPRs. We should be cleaning up these unused
321   // SGPR spill frame indices somewhere.
322 
323   // FIXME: We still have implicit uses on SGPR spill instructions in case they
324   // need to spill to vector memory. It's likely that will not happen, but at
325   // this point it appears we need the setup. This part of the prolog should be
326   // emitted after frame indices are eliminated.
327 
328   // FIXME: Remove all of the isPhysRegUsed checks
329 
330   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
331   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
332   const SIInstrInfo *TII = ST.getInstrInfo();
333   MachineRegisterInfo &MRI = MF.getRegInfo();
334   const Function &F = MF.getFunction();
335 
336   assert(MFI->isEntryFunction());
337 
338   Register ScratchWaveOffsetReg = MFI->getPreloadedReg(
339       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
340   // FIXME: Hack to not crash in situations which emitted an error.
341   if (ScratchWaveOffsetReg == AMDGPU::NoRegister)
342     return;
343 
344   // We need to do the replacement of the private segment buffer register even
345   // if there are no stack objects. There could be stores to undef or a
346   // constant without an associated object.
347   //
348   // This will return `AMDGPU::NoRegister` in cases where there are no actual
349   // uses of the SRSRC.
350   Register ScratchRsrcReg =
351       getEntryFunctionReservedScratchRsrcReg(MF, ScratchWaveOffsetReg);
352 
353   // Make the selected register live throughout the function.
354   if (ScratchRsrcReg != AMDGPU::NoRegister) {
355     for (MachineBasicBlock &OtherBB : MF) {
356       if (&OtherBB != &MBB) {
357         OtherBB.addLiveIn(ScratchRsrcReg);
358       }
359     }
360   }
361 
362   // Now that we have fixed the reserved SRSRC we need to locate the
363   // (potentially) preloaded SRSRC.
364   Register PreloadedScratchRsrcReg = AMDGPU::NoRegister;
365   if (ST.isAmdHsaOrMesa(F)) {
366     PreloadedScratchRsrcReg =
367         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
368     if (ScratchRsrcReg != AMDGPU::NoRegister &&
369         PreloadedScratchRsrcReg != AMDGPU::NoRegister) {
370       // We added live-ins during argument lowering, but since they were not
371       // used they were deleted. We're adding the uses now, so add them back.
372       MRI.addLiveIn(PreloadedScratchRsrcReg);
373       MBB.addLiveIn(PreloadedScratchRsrcReg);
374     }
375   }
376 
377   // Debug location must be unknown since the first debug location is used to
378   // determine the end of the prologue.
379   DebugLoc DL;
380   MachineBasicBlock::iterator I = MBB.begin();
381 
382   if (MF.getFrameInfo().hasCalls()) {
383     Register SPReg = MFI->getStackPtrOffsetReg();
384     assert(SPReg != AMDGPU::SP_REG);
385     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
386         .addImm(MF.getFrameInfo().getStackSize() * ST.getWavefrontSize());
387   }
388 
389   if (hasFP(MF)) {
390     Register FPReg = MFI->getFrameOffsetReg();
391     assert(FPReg != AMDGPU::FP_REG);
392     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
393   }
394 
395   if (MFI->hasFlatScratchInit() || ScratchRsrcReg != AMDGPU::NoRegister) {
396     MRI.addLiveIn(ScratchWaveOffsetReg);
397     MBB.addLiveIn(ScratchWaveOffsetReg);
398   }
399 
400   if (MFI->hasFlatScratchInit()) {
401     emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
402   }
403 
404   if (ScratchRsrcReg != AMDGPU::NoRegister) {
405     emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
406                                          PreloadedScratchRsrcReg,
407                                          ScratchRsrcReg, ScratchWaveOffsetReg);
408   }
409 }
410 
411 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
412 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
413     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
414     const DebugLoc &DL, Register PreloadedScratchRsrcReg,
415     Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
416 
417   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
418   const SIInstrInfo *TII = ST.getInstrInfo();
419   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
420   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
421   const Function &Fn = MF.getFunction();
422 
423   if (ST.isAmdPalOS()) {
424     // The pointer to the GIT is formed from the offset passed in and either
425     // the amdgpu-git-ptr-high function attribute or the top part of the PC
426     Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
427     Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
428     Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
429 
430     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
431 
432     if (MFI->getGITPtrHigh() != 0xffffffff) {
433       BuildMI(MBB, I, DL, SMovB32, RsrcHi)
434         .addImm(MFI->getGITPtrHigh())
435         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
436     } else {
437       const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
438       BuildMI(MBB, I, DL, GetPC64, Rsrc01);
439     }
440     auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
441     if (ST.hasMergedShaders()) {
442       switch (MF.getFunction().getCallingConv()) {
443         case CallingConv::AMDGPU_HS:
444         case CallingConv::AMDGPU_GS:
445           // Low GIT address is passed in s8 rather than s0 for an LS+HS or
446           // ES+GS merged shader on gfx9+.
447           GitPtrLo = AMDGPU::SGPR8;
448           break;
449         default:
450           break;
451       }
452     }
453     MF.getRegInfo().addLiveIn(GitPtrLo);
454     MBB.addLiveIn(GitPtrLo);
455     BuildMI(MBB, I, DL, SMovB32, RsrcLo)
456       .addReg(GitPtrLo)
457       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
458 
459     // We now have the GIT ptr - now get the scratch descriptor from the entry
460     // at offset 0 (or offset 16 for a compute shader).
461     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
462     const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
463     auto MMO = MF.getMachineMemOperand(PtrInfo,
464                                        MachineMemOperand::MOLoad |
465                                            MachineMemOperand::MOInvariant |
466                                            MachineMemOperand::MODereferenceable,
467                                        16, Align(4));
468     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
469     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
470     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
471     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
472       .addReg(Rsrc01)
473       .addImm(EncodedOffset) // offset
474       .addImm(0) // glc
475       .addImm(0) // dlc
476       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
477       .addMemOperand(MMO);
478   } else if (ST.isMesaGfxShader(Fn) ||
479              (PreloadedScratchRsrcReg == AMDGPU::NoRegister)) {
480     assert(!ST.isAmdHsaOrMesa(Fn));
481     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
482 
483     Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
484     Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
485 
486     // Use relocations to get the pointer, and setup the other bits manually.
487     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
488 
489     if (MFI->hasImplicitBufferPtr()) {
490       Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
491 
492       if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
493         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
494 
495         BuildMI(MBB, I, DL, Mov64, Rsrc01)
496           .addReg(MFI->getImplicitBufferPtrUserSGPR())
497           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
498       } else {
499         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
500 
501         MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
502         auto MMO = MF.getMachineMemOperand(
503             PtrInfo,
504             MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
505                 MachineMemOperand::MODereferenceable,
506             8, Align(4));
507         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
508           .addReg(MFI->getImplicitBufferPtrUserSGPR())
509           .addImm(0) // offset
510           .addImm(0) // glc
511           .addImm(0) // dlc
512           .addMemOperand(MMO)
513           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
514 
515         MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
516         MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
517       }
518     } else {
519       Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
520       Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
521 
522       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
523         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
524         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
525 
526       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
527         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
528         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
529 
530     }
531 
532     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
533       .addImm(Rsrc23 & 0xffffffff)
534       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
535 
536     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
537       .addImm(Rsrc23 >> 32)
538       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
539   } else if (ST.isAmdHsaOrMesa(Fn)) {
540     assert(PreloadedScratchRsrcReg != AMDGPU::NoRegister);
541 
542     if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
543       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
544           .addReg(PreloadedScratchRsrcReg, RegState::Kill);
545     }
546   }
547 
548   // Add the scratch wave offset into the scratch RSRC.
549   //
550   // We only want to update the first 48 bits, which is the base address
551   // pointer, without touching the adjacent 16 bits of flags. We know this add
552   // cannot carry-out from bit 47, otherwise the scratch allocation would be
553   // impossible to fit in the 48-bit global address space.
554   //
555   // TODO: Evaluate if it is better to just construct an SRD using the flat
556   // scratch init and some constants rather than update the one we are passed.
557   Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
558   Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
559 
560   // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
561   // the kernel body via inreg arguments.
562   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
563       .addReg(ScratchRsrcSub0)
564       .addReg(ScratchWaveOffsetReg)
565       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
566   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
567       .addReg(ScratchRsrcSub1)
568       .addImm(0)
569       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
570 }
571 
572 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
573   switch (ID) {
574   case TargetStackID::Default:
575   case TargetStackID::NoAlloc:
576   case TargetStackID::SGPRSpill:
577     return true;
578   case TargetStackID::SVEVector:
579     return false;
580   }
581   llvm_unreachable("Invalid TargetStackID::Value");
582 }
583 
584 void SIFrameLowering::emitPrologue(MachineFunction &MF,
585                                    MachineBasicBlock &MBB) const {
586   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
587   if (FuncInfo->isEntryFunction()) {
588     emitEntryFunctionPrologue(MF, MBB);
589     return;
590   }
591 
592   const MachineFrameInfo &MFI = MF.getFrameInfo();
593   MachineRegisterInfo &MRI = MF.getRegInfo();
594   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
595   const SIInstrInfo *TII = ST.getInstrInfo();
596   const SIRegisterInfo &TRI = TII->getRegisterInfo();
597 
598   Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
599   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
600   LivePhysRegs LiveRegs;
601 
602   MachineBasicBlock::iterator MBBI = MBB.begin();
603   DebugLoc DL;
604 
605   bool HasFP = false;
606   uint32_t NumBytes = MFI.getStackSize();
607   uint32_t RoundedSize = NumBytes;
608   // To avoid clobbering VGPRs in lanes that weren't active on function entry,
609   // turn on all lanes before doing the spill to memory.
610   Register ScratchExecCopy;
611 
612   // Emit the copy if we need an FP, and are using a free SGPR to save it.
613   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
614     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
615       .addReg(FramePtrReg)
616       .setMIFlag(MachineInstr::FrameSetup);
617     // Make the register live throughout the function.
618     for (MachineBasicBlock &MBB : MF)
619       MBB.addLiveIn(FuncInfo->SGPRForFPSaveRestoreCopy);
620   }
621 
622   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
623          : FuncInfo->getSGPRSpillVGPRs()) {
624     if (!Reg.FI.hasValue())
625       continue;
626 
627     if (!ScratchExecCopy) {
628       if (LiveRegs.empty()) {
629         LiveRegs.init(TRI);
630         LiveRegs.addLiveIns(MBB);
631         if (FuncInfo->SGPRForFPSaveRestoreCopy)
632           LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
633       }
634 
635       ScratchExecCopy
636         = findScratchNonCalleeSaveRegister(MRI, LiveRegs,
637                                            *TRI.getWaveMaskRegClass());
638       assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
639 
640       const unsigned OrSaveExec = ST.isWave32() ?
641         AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
642       BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
643               ScratchExecCopy)
644         .addImm(-1);
645     }
646 
647     buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
648                      FuncInfo->getScratchRSrcReg(),
649                      StackPtrReg,
650                      Reg.FI.getValue());
651   }
652 
653   if (ScratchExecCopy != AMDGPU::NoRegister) {
654     // FIXME: Split block and make terminator.
655     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
656     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
657     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
658       .addReg(ScratchExecCopy, RegState::Kill);
659     LiveRegs.addReg(ScratchExecCopy);
660   }
661 
662 
663   if (FuncInfo->FramePointerSaveIndex) {
664     const int FI = FuncInfo->FramePointerSaveIndex.getValue();
665     assert(!MFI.isDeadObjectIndex(FI) &&
666            MFI.getStackID(FI) == TargetStackID::SGPRSpill);
667     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
668       = FuncInfo->getSGPRToVGPRSpills(FI);
669     assert(Spill.size() == 1);
670 
671     // Save FP before setting it up.
672     // FIXME: This should respect spillSGPRToVGPR;
673     BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
674             Spill[0].VGPR)
675       .addReg(FramePtrReg)
676       .addImm(Spill[0].Lane)
677       .addReg(Spill[0].VGPR, RegState::Undef);
678   }
679 
680   if (TRI.needsStackRealignment(MF)) {
681     HasFP = true;
682     const unsigned Alignment = MFI.getMaxAlign().value();
683 
684     RoundedSize += Alignment;
685     if (LiveRegs.empty()) {
686       LiveRegs.init(TRI);
687       LiveRegs.addLiveIns(MBB);
688       LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
689     }
690 
691     Register ScratchSPReg = findScratchNonCalleeSaveRegister(
692         MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
693     assert(ScratchSPReg != AMDGPU::NoRegister &&
694            ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy);
695 
696     // s_add_u32 tmp_reg, s32, NumBytes
697     // s_and_b32 s32, tmp_reg, 0b111...0000
698     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
699       .addReg(StackPtrReg)
700       .addImm((Alignment - 1) * ST.getWavefrontSize())
701       .setMIFlag(MachineInstr::FrameSetup);
702     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
703       .addReg(ScratchSPReg, RegState::Kill)
704       .addImm(-Alignment * ST.getWavefrontSize())
705       .setMIFlag(MachineInstr::FrameSetup);
706     FuncInfo->setIsStackRealigned(true);
707   } else if ((HasFP = hasFP(MF))) {
708     // If we need a base pointer, set it up here. It's whatever the value of
709     // the stack pointer is at this point. Any variable size objects will be
710     // allocated after this, so we can still use the base pointer to reference
711     // locals.
712     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
713       .addReg(StackPtrReg)
714       .setMIFlag(MachineInstr::FrameSetup);
715   }
716 
717   if (HasFP && RoundedSize != 0) {
718     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
719       .addReg(StackPtrReg)
720       .addImm(RoundedSize * ST.getWavefrontSize())
721       .setMIFlag(MachineInstr::FrameSetup);
722   }
723 
724   assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
725                      FuncInfo->FramePointerSaveIndex)) &&
726          "Needed to save FP but didn't save it anywhere");
727 
728   assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
729                     !FuncInfo->FramePointerSaveIndex)) &&
730          "Saved FP but didn't need it");
731 }
732 
733 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
734                                    MachineBasicBlock &MBB) const {
735   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
736   if (FuncInfo->isEntryFunction())
737     return;
738 
739   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
740   const SIInstrInfo *TII = ST.getInstrInfo();
741   MachineRegisterInfo &MRI = MF.getRegInfo();
742   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
743   LivePhysRegs LiveRegs;
744   DebugLoc DL;
745 
746   const MachineFrameInfo &MFI = MF.getFrameInfo();
747   uint32_t NumBytes = MFI.getStackSize();
748   uint32_t RoundedSize = FuncInfo->isStackRealigned()
749                              ? NumBytes + MFI.getMaxAlign().value()
750                              : NumBytes;
751 
752   if (RoundedSize != 0 && hasFP(MF)) {
753     const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
754     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
755       .addReg(StackPtrReg)
756       .addImm(RoundedSize * ST.getWavefrontSize())
757       .setMIFlag(MachineInstr::FrameDestroy);
758   }
759 
760   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
761     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
762       .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
763       .setMIFlag(MachineInstr::FrameSetup);
764   }
765 
766   if (FuncInfo->FramePointerSaveIndex) {
767     const int FI = FuncInfo->FramePointerSaveIndex.getValue();
768 
769     assert(!MF.getFrameInfo().isDeadObjectIndex(FI) &&
770            MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill);
771 
772     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
773       = FuncInfo->getSGPRToVGPRSpills(FI);
774     assert(Spill.size() == 1);
775     BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
776             FuncInfo->getFrameOffsetReg())
777       .addReg(Spill[0].VGPR)
778       .addImm(Spill[0].Lane);
779   }
780 
781   Register ScratchExecCopy;
782   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
783          : FuncInfo->getSGPRSpillVGPRs()) {
784     if (!Reg.FI.hasValue())
785       continue;
786 
787     const SIRegisterInfo &TRI = TII->getRegisterInfo();
788     if (ScratchExecCopy == AMDGPU::NoRegister) {
789       // See emitPrologue
790       if (LiveRegs.empty()) {
791         LiveRegs.init(*ST.getRegisterInfo());
792         LiveRegs.addLiveOuts(MBB);
793         LiveRegs.stepBackward(*MBBI);
794       }
795 
796       ScratchExecCopy = findScratchNonCalleeSaveRegister(
797           MRI, LiveRegs, *TRI.getWaveMaskRegClass());
798       LiveRegs.removeReg(ScratchExecCopy);
799 
800       const unsigned OrSaveExec =
801           ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
802 
803       BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
804         .addImm(-1);
805     }
806 
807     buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
808                       FuncInfo->getScratchRSrcReg(),
809                       FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
810   }
811 
812   if (ScratchExecCopy != AMDGPU::NoRegister) {
813     // FIXME: Split block and make terminator.
814     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
815     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
816     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
817       .addReg(ScratchExecCopy, RegState::Kill);
818   }
819 }
820 
821 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
822 // memory. They should have been removed by now.
823 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
824   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
825        I != E; ++I) {
826     if (!MFI.isDeadObjectIndex(I))
827       return false;
828   }
829 
830   return true;
831 }
832 
833 #ifndef NDEBUG
834 static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
835                                  Optional<int> FramePointerSaveIndex) {
836   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
837        I != E; ++I) {
838     if (!MFI.isDeadObjectIndex(I) &&
839         MFI.getStackID(I) == TargetStackID::SGPRSpill &&
840         FramePointerSaveIndex && I != FramePointerSaveIndex) {
841       return false;
842     }
843   }
844 
845   return true;
846 }
847 #endif
848 
849 int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
850                                             Register &FrameReg) const {
851   const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
852 
853   FrameReg = RI->getFrameRegister(MF);
854   return MF.getFrameInfo().getObjectOffset(FI);
855 }
856 
857 void SIFrameLowering::processFunctionBeforeFrameFinalized(
858   MachineFunction &MF,
859   RegScavenger *RS) const {
860   MachineFrameInfo &MFI = MF.getFrameInfo();
861 
862   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
863   const SIRegisterInfo *TRI = ST.getRegisterInfo();
864   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
865 
866   FuncInfo->removeDeadFrameIndices(MFI);
867   assert(allSGPRSpillsAreDead(MFI, None) &&
868          "SGPR spill should have been removed in SILowerSGPRSpills");
869 
870   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
871   // but currently hasNonSpillStackObjects is set only from source
872   // allocas. Stack temps produced from legalization are not counted currently.
873   if (!allStackObjectsAreDead(MFI)) {
874     assert(RS && "RegScavenger required if spilling");
875 
876     if (FuncInfo->isEntryFunction()) {
877       int ScavengeFI = MFI.CreateFixedObject(
878         TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
879       RS->addScavengingFrameIndex(ScavengeFI);
880     } else {
881       int ScavengeFI = MFI.CreateStackObject(
882         TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
883         TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass),
884         false);
885       RS->addScavengingFrameIndex(ScavengeFI);
886     }
887   }
888 }
889 
890 // Only report VGPRs to generic code.
891 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
892                                            BitVector &SavedVGPRs,
893                                            RegScavenger *RS) const {
894   TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
895   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
896   if (MFI->isEntryFunction())
897     return;
898 
899   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
900   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
901   const SIRegisterInfo *TRI = ST.getRegisterInfo();
902 
903   // Ignore the SGPRs the default implementation found.
904   SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
905 
906   // hasFP only knows about stack objects that already exist. We're now
907   // determining the stack slots that will be created, so we have to predict
908   // them. Stack objects force FP usage with calls.
909   //
910   // Note a new VGPR CSR may be introduced if one is used for the spill, but we
911   // don't want to report it here.
912   //
913   // FIXME: Is this really hasReservedCallFrame?
914   const bool WillHaveFP =
915       FrameInfo.hasCalls() &&
916       (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
917 
918   // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
919   // so don't allow the default insertion to handle them.
920   for (auto SSpill : MFI->getSGPRSpillVGPRs())
921     SavedVGPRs.reset(SSpill.VGPR);
922 
923   const bool HasFP = WillHaveFP || hasFP(MF);
924   if (!HasFP)
925     return;
926 
927   if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
928     int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
929                                                     TargetStackID::SGPRSpill);
930 
931     // If there is already a VGPR with free lanes, use it. We may already have
932     // to pay the penalty for spilling a CSR VGPR.
933     if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
934       llvm_unreachable("allocate SGPR spill should have worked");
935 
936     MFI->FramePointerSaveIndex = NewFI;
937 
938     LLVM_DEBUG(
939       auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
940       dbgs() << "Spilling FP to  " << printReg(Spill.VGPR, TRI)
941              << ':' << Spill.Lane << '\n');
942     return;
943   }
944 
945   MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
946 
947   if (!MFI->SGPRForFPSaveRestoreCopy) {
948     // There's no free lane to spill, and no free register to save FP, so we're
949     // forced to spill another VGPR to use for the spill.
950     int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
951                                                     TargetStackID::SGPRSpill);
952     if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
953       llvm_unreachable("allocate SGPR spill should have worked");
954     MFI->FramePointerSaveIndex = NewFI;
955 
956     LLVM_DEBUG(
957       auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
958       dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)
959              << ':' << Spill.Lane << '\n';);
960   } else {
961     LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
962                printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n');
963   }
964 }
965 
966 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
967                                                BitVector &SavedRegs,
968                                                RegScavenger *RS) const {
969   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
970   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
971   if (MFI->isEntryFunction())
972     return;
973 
974   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
975   const SIRegisterInfo *TRI = ST.getRegisterInfo();
976 
977   // The SP is specifically managed and we don't want extra spills of it.
978   SavedRegs.reset(MFI->getStackPtrOffsetReg());
979   SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
980 }
981 
982 bool SIFrameLowering::assignCalleeSavedSpillSlots(
983     MachineFunction &MF, const TargetRegisterInfo *TRI,
984     std::vector<CalleeSavedInfo> &CSI) const {
985   if (CSI.empty())
986     return true; // Early exit if no callee saved registers are modified!
987 
988   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
989   if (!FuncInfo->SGPRForFPSaveRestoreCopy)
990     return false;
991 
992   for (auto &CS : CSI) {
993     if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
994       if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister)
995         CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
996       break;
997     }
998   }
999 
1000   return false;
1001 }
1002 
1003 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1004   MachineFunction &MF,
1005   MachineBasicBlock &MBB,
1006   MachineBasicBlock::iterator I) const {
1007   int64_t Amount = I->getOperand(0).getImm();
1008   if (Amount == 0)
1009     return MBB.erase(I);
1010 
1011   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1012   const SIInstrInfo *TII = ST.getInstrInfo();
1013   const DebugLoc &DL = I->getDebugLoc();
1014   unsigned Opc = I->getOpcode();
1015   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1016   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1017 
1018   if (!hasReservedCallFrame(MF)) {
1019     Amount = alignTo(Amount, getStackAlign());
1020     assert(isUInt<32>(Amount) && "exceeded stack address space size");
1021     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1022     Register SPReg = MFI->getStackPtrOffsetReg();
1023 
1024     unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1025     BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1026       .addReg(SPReg)
1027       .addImm(Amount * ST.getWavefrontSize());
1028   } else if (CalleePopAmount != 0) {
1029     llvm_unreachable("is this used?");
1030   }
1031 
1032   return MBB.erase(I);
1033 }
1034 
1035 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1036   const MachineFrameInfo &MFI = MF.getFrameInfo();
1037 
1038   // For entry functions we can use an immediate offset in most cases, so the
1039   // presence of calls doesn't imply we need a distinct frame pointer.
1040   if (MFI.hasCalls() &&
1041       !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1042     // All offsets are unsigned, so need to be addressed in the same direction
1043     // as stack growth.
1044 
1045     // FIXME: This function is pretty broken, since it can be called before the
1046     // frame layout is determined or CSR spills are inserted.
1047     return MFI.getStackSize() != 0;
1048   }
1049 
1050   return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
1051     MFI.hasStackMap() || MFI.hasPatchPoint() ||
1052     MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
1053     MF.getTarget().Options.DisableFramePointerElim(MF);
1054 }
1055