1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPU.h"
11 #include "GCNSubtarget.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "SIMachineFunctionInfo.h"
14 #include "llvm/CodeGen/LivePhysRegs.h"
15 #include "llvm/CodeGen/MachineFrameInfo.h"
16 #include "llvm/CodeGen/RegisterScavenging.h"
17 #include "llvm/Target/TargetMachine.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "frame-info"
22 
23 static cl::opt<bool> EnableSpillVGPRToAGPR(
24   "amdgpu-spill-vgpr-to-agpr",
25   cl::desc("Enable spilling VGPRs to AGPRs"),
26   cl::ReallyHidden,
27   cl::init(true));
28 
29 // Find a scratch register that we can use in the prologue. We avoid using
30 // callee-save registers since they may appear to be free when this is called
31 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
32 // when this is called from emitPrologue.
33 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
34                                                    LivePhysRegs &LiveRegs,
35                                                    const TargetRegisterClass &RC,
36                                                    bool Unused = false) {
37   // Mark callee saved registers as used so we will not choose them.
38   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
39   for (unsigned i = 0; CSRegs[i]; ++i)
40     LiveRegs.addReg(CSRegs[i]);
41 
42   if (Unused) {
43     // We are looking for a register that can be used throughout the entire
44     // function, so any use is unacceptable.
45     for (MCRegister Reg : RC) {
46       if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
47         return Reg;
48     }
49   } else {
50     for (MCRegister Reg : RC) {
51       if (LiveRegs.available(MRI, Reg))
52         return Reg;
53     }
54   }
55 
56   return MCRegister();
57 }
58 
59 static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
60                                            LivePhysRegs &LiveRegs,
61                                            Register &TempSGPR,
62                                            Optional<int> &FrameIndex,
63                                            bool IsFP) {
64   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
65   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
66 
67   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
68   const SIRegisterInfo *TRI = ST.getRegisterInfo();
69 
70   // We need to save and restore the current FP/BP.
71 
72   // 1: If there is already a VGPR with free lanes, use it. We
73   // may already have to pay the penalty for spilling a CSR VGPR.
74   if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
75     int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
76                                             TargetStackID::SGPRSpill);
77 
78     if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
79       llvm_unreachable("allocate SGPR spill should have worked");
80 
81     FrameIndex = NewFI;
82 
83     LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
84                dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to  "
85                       << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
86                       << '\n');
87     return;
88   }
89 
90   // 2: Next, try to save the FP/BP in an unused SGPR.
91   TempSGPR = findScratchNonCalleeSaveRegister(
92       MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
93 
94   if (!TempSGPR) {
95     int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
96                                             TargetStackID::SGPRSpill);
97 
98     if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
99       // 3: There's no free lane to spill, and no free register to save FP/BP,
100       // so we're forced to spill another VGPR to use for the spill.
101       FrameIndex = NewFI;
102 
103       LLVM_DEBUG(
104           auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
105           dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
106                  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
107     } else {
108       // Remove dead <NewFI> index
109       MF.getFrameInfo().RemoveStackObject(NewFI);
110       // 4: If all else fails, spill the FP/BP to memory.
111       FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
112       LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
113                         << (IsFP ? "FP" : "BP") << '\n');
114     }
115   } else {
116     LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
117                       << printReg(TempSGPR, TRI) << '\n');
118   }
119 }
120 
121 // We need to specially emit stack operations here because a different frame
122 // register is used than in the rest of the function, as getFrameRegister would
123 // use.
124 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
125                              const SIMachineFunctionInfo &FuncInfo,
126                              LivePhysRegs &LiveRegs, MachineFunction &MF,
127                              MachineBasicBlock &MBB,
128                              MachineBasicBlock::iterator I, const DebugLoc &DL,
129                              Register SpillReg, int FI) {
130   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
131                                         : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
132 
133   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
134   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
135   MachineMemOperand *MMO = MF.getMachineMemOperand(
136       PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
137       FrameInfo.getObjectAlign(FI));
138   LiveRegs.addReg(SpillReg);
139   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true,
140                           FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
141                           &LiveRegs);
142   LiveRegs.removeReg(SpillReg);
143 }
144 
145 static void buildEpilogRestore(const GCNSubtarget &ST,
146                                const SIRegisterInfo &TRI,
147                                const SIMachineFunctionInfo &FuncInfo,
148                                LivePhysRegs &LiveRegs, MachineFunction &MF,
149                                MachineBasicBlock &MBB,
150                                MachineBasicBlock::iterator I,
151                                const DebugLoc &DL, Register SpillReg, int FI) {
152   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
153                                         : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
154 
155   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
156   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
157   MachineMemOperand *MMO = MF.getMachineMemOperand(
158       PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
159       FrameInfo.getObjectAlign(FI));
160   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false,
161                           FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
162                           &LiveRegs);
163 }
164 
165 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
166                         const DebugLoc &DL, const SIInstrInfo *TII,
167                         Register TargetReg) {
168   MachineFunction *MF = MBB.getParent();
169   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
170   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
171   const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
172   Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
173   Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
174 
175   if (MFI->getGITPtrHigh() != 0xffffffff) {
176     BuildMI(MBB, I, DL, SMovB32, TargetHi)
177         .addImm(MFI->getGITPtrHigh())
178         .addReg(TargetReg, RegState::ImplicitDefine);
179   } else {
180     const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
181     BuildMI(MBB, I, DL, GetPC64, TargetReg);
182   }
183   Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
184   MF->getRegInfo().addLiveIn(GitPtrLo);
185   MBB.addLiveIn(GitPtrLo);
186   BuildMI(MBB, I, DL, SMovB32, TargetLo)
187     .addReg(GitPtrLo);
188 }
189 
190 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
191 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
192     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
193     const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
194   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
195   const SIInstrInfo *TII = ST.getInstrInfo();
196   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
197   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
198 
199   // We don't need this if we only have spills since there is no user facing
200   // scratch.
201 
202   // TODO: If we know we don't have flat instructions earlier, we can omit
203   // this from the input registers.
204   //
205   // TODO: We only need to know if we access scratch space through a flat
206   // pointer. Because we only detect if flat instructions are used at all,
207   // this will be used more often than necessary on VI.
208 
209   Register FlatScrInitLo;
210   Register FlatScrInitHi;
211 
212   if (ST.isAmdPalOS()) {
213     // Extract the scratch offset from the descriptor in the GIT
214     LivePhysRegs LiveRegs;
215     LiveRegs.init(*TRI);
216     LiveRegs.addLiveIns(MBB);
217 
218     // Find unused reg to load flat scratch init into
219     MachineRegisterInfo &MRI = MF.getRegInfo();
220     Register FlatScrInit = AMDGPU::NoRegister;
221     ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
222     unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
223     AllSGPR64s = AllSGPR64s.slice(
224         std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
225     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
226     for (MCPhysReg Reg : AllSGPR64s) {
227       if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
228           !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
229         FlatScrInit = Reg;
230         break;
231       }
232     }
233     assert(FlatScrInit && "Failed to find free register for scratch init");
234 
235     FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
236     FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
237 
238     buildGitPtr(MBB, I, DL, TII, FlatScrInit);
239 
240     // We now have the GIT ptr - now get the scratch descriptor from the entry
241     // at offset 0 (or offset 16 for a compute shader).
242     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
243     const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
244     auto *MMO = MF.getMachineMemOperand(
245         PtrInfo,
246         MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
247             MachineMemOperand::MODereferenceable,
248         8, Align(4));
249     unsigned Offset =
250         MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
251     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
252     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
253     BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
254         .addReg(FlatScrInit)
255         .addImm(EncodedOffset) // offset
256         .addImm(0)             // cpol
257         .addMemOperand(MMO);
258 
259     // Mask the offset in [47:0] of the descriptor
260     const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
261     BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
262         .addReg(FlatScrInitHi)
263         .addImm(0xffff);
264   } else {
265     Register FlatScratchInitReg =
266         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
267     assert(FlatScratchInitReg);
268 
269     MachineRegisterInfo &MRI = MF.getRegInfo();
270     MRI.addLiveIn(FlatScratchInitReg);
271     MBB.addLiveIn(FlatScratchInitReg);
272 
273     FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
274     FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
275   }
276 
277   // Do a 64-bit pointer add.
278   if (ST.flatScratchIsPointer()) {
279     if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
280       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
281         .addReg(FlatScrInitLo)
282         .addReg(ScratchWaveOffsetReg);
283       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
284         .addReg(FlatScrInitHi)
285         .addImm(0);
286       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
287         addReg(FlatScrInitLo).
288         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
289                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
290       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
291         addReg(FlatScrInitHi).
292         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
293                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
294       return;
295     }
296 
297     // For GFX9.
298     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
299       .addReg(FlatScrInitLo)
300       .addReg(ScratchWaveOffsetReg);
301     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
302       .addReg(FlatScrInitHi)
303       .addImm(0);
304 
305     return;
306   }
307 
308   assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
309 
310   // Copy the size in bytes.
311   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
312     .addReg(FlatScrInitHi, RegState::Kill);
313 
314   // Add wave offset in bytes to private base offset.
315   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
316   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
317       .addReg(FlatScrInitLo)
318       .addReg(ScratchWaveOffsetReg);
319 
320   // Convert offset to 256-byte units.
321   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
322     .addReg(FlatScrInitLo, RegState::Kill)
323     .addImm(8);
324 }
325 
326 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
327 // memory. They should have been removed by now.
328 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
329   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
330        I != E; ++I) {
331     if (!MFI.isDeadObjectIndex(I))
332       return false;
333   }
334 
335   return true;
336 }
337 
338 // Shift down registers reserved for the scratch RSRC.
339 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
340     MachineFunction &MF) const {
341 
342   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
343   const SIInstrInfo *TII = ST.getInstrInfo();
344   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
345   MachineRegisterInfo &MRI = MF.getRegInfo();
346   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
347 
348   assert(MFI->isEntryFunction());
349 
350   Register ScratchRsrcReg = MFI->getScratchRSrcReg();
351 
352   if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
353                           allStackObjectsAreDead(MF.getFrameInfo())))
354     return Register();
355 
356   if (ST.hasSGPRInitBug() ||
357       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
358     return ScratchRsrcReg;
359 
360   // We reserved the last registers for this. Shift it down to the end of those
361   // which were actually used.
362   //
363   // FIXME: It might be safer to use a pseudoregister before replacement.
364 
365   // FIXME: We should be able to eliminate unused input registers. We only
366   // cannot do this for the resources required for scratch access. For now we
367   // skip over user SGPRs and may leave unused holes.
368 
369   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
370   ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
371   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
372 
373   // Skip the last N reserved elements because they should have already been
374   // reserved for VCC etc.
375   Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
376   for (MCPhysReg Reg : AllSGPR128s) {
377     // Pick the first unallocated one. Make sure we don't clobber the other
378     // reserved input we needed. Also for PAL, make sure we don't clobber
379     // the GIT pointer passed in SGPR0 or SGPR8.
380     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
381         !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
382       MRI.replaceRegWith(ScratchRsrcReg, Reg);
383       MFI->setScratchRSrcReg(Reg);
384       return Reg;
385     }
386   }
387 
388   return ScratchRsrcReg;
389 }
390 
391 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
392   return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
393 }
394 
395 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
396                                                 MachineBasicBlock &MBB) const {
397   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
398 
399   // FIXME: If we only have SGPR spills, we won't actually be using scratch
400   // memory since these spill to VGPRs. We should be cleaning up these unused
401   // SGPR spill frame indices somewhere.
402 
403   // FIXME: We still have implicit uses on SGPR spill instructions in case they
404   // need to spill to vector memory. It's likely that will not happen, but at
405   // this point it appears we need the setup. This part of the prolog should be
406   // emitted after frame indices are eliminated.
407 
408   // FIXME: Remove all of the isPhysRegUsed checks
409 
410   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
411   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
412   const SIInstrInfo *TII = ST.getInstrInfo();
413   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
414   MachineRegisterInfo &MRI = MF.getRegInfo();
415   const Function &F = MF.getFunction();
416   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
417 
418   assert(MFI->isEntryFunction());
419 
420   Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
421       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
422 
423   // We need to do the replacement of the private segment buffer register even
424   // if there are no stack objects. There could be stores to undef or a
425   // constant without an associated object.
426   //
427   // This will return `Register()` in cases where there are no actual
428   // uses of the SRSRC.
429   Register ScratchRsrcReg;
430   if (!ST.enableFlatScratch())
431     ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
432 
433   // Make the selected register live throughout the function.
434   if (ScratchRsrcReg) {
435     for (MachineBasicBlock &OtherBB : MF) {
436       if (&OtherBB != &MBB) {
437         OtherBB.addLiveIn(ScratchRsrcReg);
438       }
439     }
440   }
441 
442   // Now that we have fixed the reserved SRSRC we need to locate the
443   // (potentially) preloaded SRSRC.
444   Register PreloadedScratchRsrcReg;
445   if (ST.isAmdHsaOrMesa(F)) {
446     PreloadedScratchRsrcReg =
447         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
448     if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
449       // We added live-ins during argument lowering, but since they were not
450       // used they were deleted. We're adding the uses now, so add them back.
451       MRI.addLiveIn(PreloadedScratchRsrcReg);
452       MBB.addLiveIn(PreloadedScratchRsrcReg);
453     }
454   }
455 
456   // Debug location must be unknown since the first debug location is used to
457   // determine the end of the prologue.
458   DebugLoc DL;
459   MachineBasicBlock::iterator I = MBB.begin();
460 
461   // We found the SRSRC first because it needs four registers and has an
462   // alignment requirement. If the SRSRC that we found is clobbering with
463   // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
464   // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
465   // wave offset to a free SGPR.
466   Register ScratchWaveOffsetReg;
467   if (PreloadedScratchWaveOffsetReg &&
468       TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
469     ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
470     unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
471     AllSGPRs = AllSGPRs.slice(
472         std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
473     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
474     for (MCPhysReg Reg : AllSGPRs) {
475       if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
476           !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
477         ScratchWaveOffsetReg = Reg;
478         BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
479             .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
480         break;
481       }
482     }
483   } else {
484     ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
485   }
486   assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
487 
488   if (requiresStackPointerReference(MF)) {
489     Register SPReg = MFI->getStackPtrOffsetReg();
490     assert(SPReg != AMDGPU::SP_REG);
491     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
492         .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
493   }
494 
495   if (hasFP(MF)) {
496     Register FPReg = MFI->getFrameOffsetReg();
497     assert(FPReg != AMDGPU::FP_REG);
498     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
499   }
500 
501   bool NeedsFlatScratchInit =
502       MFI->hasFlatScratchInit() &&
503       (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
504        (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
505 
506   if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
507       PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
508     MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
509     MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
510   }
511 
512   if (NeedsFlatScratchInit) {
513     emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
514   }
515 
516   if (ScratchRsrcReg) {
517     emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
518                                          PreloadedScratchRsrcReg,
519                                          ScratchRsrcReg, ScratchWaveOffsetReg);
520   }
521 }
522 
523 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
524 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
525     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
526     const DebugLoc &DL, Register PreloadedScratchRsrcReg,
527     Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
528 
529   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
530   const SIInstrInfo *TII = ST.getInstrInfo();
531   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
532   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
533   const Function &Fn = MF.getFunction();
534 
535   if (ST.isAmdPalOS()) {
536     // The pointer to the GIT is formed from the offset passed in and either
537     // the amdgpu-git-ptr-high function attribute or the top part of the PC
538     Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
539     Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
540 
541     buildGitPtr(MBB, I, DL, TII, Rsrc01);
542 
543     // We now have the GIT ptr - now get the scratch descriptor from the entry
544     // at offset 0 (or offset 16 for a compute shader).
545     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
546     const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
547     auto MMO = MF.getMachineMemOperand(PtrInfo,
548                                        MachineMemOperand::MOLoad |
549                                            MachineMemOperand::MOInvariant |
550                                            MachineMemOperand::MODereferenceable,
551                                        16, Align(4));
552     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
553     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
554     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
555     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
556       .addReg(Rsrc01)
557       .addImm(EncodedOffset) // offset
558       .addImm(0) // cpol
559       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
560       .addMemOperand(MMO);
561 
562     // The driver will always set the SRD for wave 64 (bits 118:117 of
563     // descriptor / bits 22:21 of third sub-reg will be 0b11)
564     // If the shader is actually wave32 we have to modify the const_index_stride
565     // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
566     // reason the driver does this is that there can be cases where it presents
567     // 2 shaders with different wave size (e.g. VsFs).
568     // TODO: convert to using SCRATCH instructions or multiple SRD buffers
569     if (ST.isWave32()) {
570       const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
571       BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
572           .addImm(21)
573           .addReg(Rsrc03);
574     }
575   } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
576     assert(!ST.isAmdHsaOrMesa(Fn));
577     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
578 
579     Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
580     Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
581 
582     // Use relocations to get the pointer, and setup the other bits manually.
583     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
584 
585     if (MFI->hasImplicitBufferPtr()) {
586       Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
587 
588       if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
589         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
590 
591         BuildMI(MBB, I, DL, Mov64, Rsrc01)
592           .addReg(MFI->getImplicitBufferPtrUserSGPR())
593           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
594       } else {
595         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
596 
597         MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
598         auto MMO = MF.getMachineMemOperand(
599             PtrInfo,
600             MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
601                 MachineMemOperand::MODereferenceable,
602             8, Align(4));
603         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
604           .addReg(MFI->getImplicitBufferPtrUserSGPR())
605           .addImm(0) // offset
606           .addImm(0) // cpol
607           .addMemOperand(MMO)
608           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
609 
610         MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
611         MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
612       }
613     } else {
614       Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
615       Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
616 
617       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
618         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
619         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
620 
621       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
622         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
623         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
624 
625     }
626 
627     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
628       .addImm(Rsrc23 & 0xffffffff)
629       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
630 
631     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
632       .addImm(Rsrc23 >> 32)
633       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
634   } else if (ST.isAmdHsaOrMesa(Fn)) {
635     assert(PreloadedScratchRsrcReg);
636 
637     if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
638       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
639           .addReg(PreloadedScratchRsrcReg, RegState::Kill);
640     }
641   }
642 
643   // Add the scratch wave offset into the scratch RSRC.
644   //
645   // We only want to update the first 48 bits, which is the base address
646   // pointer, without touching the adjacent 16 bits of flags. We know this add
647   // cannot carry-out from bit 47, otherwise the scratch allocation would be
648   // impossible to fit in the 48-bit global address space.
649   //
650   // TODO: Evaluate if it is better to just construct an SRD using the flat
651   // scratch init and some constants rather than update the one we are passed.
652   Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
653   Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
654 
655   // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
656   // the kernel body via inreg arguments.
657   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
658       .addReg(ScratchRsrcSub0)
659       .addReg(ScratchWaveOffsetReg)
660       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
661   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
662       .addReg(ScratchRsrcSub1)
663       .addImm(0)
664       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
665 }
666 
667 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
668   switch (ID) {
669   case TargetStackID::Default:
670   case TargetStackID::NoAlloc:
671   case TargetStackID::SGPRSpill:
672     return true;
673   case TargetStackID::ScalableVector:
674   case TargetStackID::WasmLocal:
675     return false;
676   }
677   llvm_unreachable("Invalid TargetStackID::Value");
678 }
679 
680 static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
681                          const SIMachineFunctionInfo *FuncInfo,
682                          MachineFunction &MF, MachineBasicBlock &MBB,
683                          MachineBasicBlock::iterator MBBI, bool IsProlog) {
684   if (LiveRegs.empty()) {
685     LiveRegs.init(TRI);
686     if (IsProlog) {
687       LiveRegs.addLiveIns(MBB);
688     } else {
689       // In epilog.
690       LiveRegs.addLiveOuts(MBB);
691       LiveRegs.stepBackward(*MBBI);
692     }
693   }
694 }
695 
696 // Activate all lanes, returns saved exec.
697 static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
698                                      MachineFunction &MF,
699                                      MachineBasicBlock &MBB,
700                                      MachineBasicBlock::iterator MBBI,
701                                      bool IsProlog) {
702   Register ScratchExecCopy;
703   MachineRegisterInfo &MRI = MF.getRegInfo();
704   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
705   const SIInstrInfo *TII = ST.getInstrInfo();
706   const SIRegisterInfo &TRI = TII->getRegisterInfo();
707   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
708   DebugLoc DL;
709 
710   initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
711 
712   ScratchExecCopy = findScratchNonCalleeSaveRegister(
713       MRI, LiveRegs, *TRI.getWaveMaskRegClass());
714   if (!ScratchExecCopy)
715     report_fatal_error("failed to find free scratch register");
716 
717   LiveRegs.addReg(ScratchExecCopy);
718 
719   const unsigned OrSaveExec =
720       ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
721   BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
722 
723   return ScratchExecCopy;
724 }
725 
726 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
727 // Otherwise we are spilling to memory.
728 static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
729   const MachineFrameInfo &MFI = MF.getFrameInfo();
730   return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
731 }
732 
733 void SIFrameLowering::emitPrologue(MachineFunction &MF,
734                                    MachineBasicBlock &MBB) const {
735   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
736   if (FuncInfo->isEntryFunction()) {
737     emitEntryFunctionPrologue(MF, MBB);
738     return;
739   }
740 
741   const MachineFrameInfo &MFI = MF.getFrameInfo();
742   MachineRegisterInfo &MRI = MF.getRegInfo();
743   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
744   const SIInstrInfo *TII = ST.getInstrInfo();
745   const SIRegisterInfo &TRI = TII->getRegisterInfo();
746 
747   Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
748   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
749   Register BasePtrReg =
750       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
751   LivePhysRegs LiveRegs;
752 
753   MachineBasicBlock::iterator MBBI = MBB.begin();
754   DebugLoc DL;
755 
756   bool HasFP = false;
757   bool HasBP = false;
758   uint32_t NumBytes = MFI.getStackSize();
759   uint32_t RoundedSize = NumBytes;
760   // To avoid clobbering VGPRs in lanes that weren't active on function entry,
761   // turn on all lanes before doing the spill to memory.
762   Register ScratchExecCopy;
763 
764   Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
765   Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
766 
767   // VGPRs used for SGPR->VGPR spills
768   for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
769        FuncInfo->getSGPRSpillVGPRs()) {
770     if (!Reg.FI)
771       continue;
772 
773     if (!ScratchExecCopy)
774       ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
775                                              /*IsProlog*/ true);
776 
777     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR,
778                      *Reg.FI);
779   }
780 
781   // VGPRs used for Whole Wave Mode
782   for (const auto &Reg : FuncInfo->WWMReservedRegs) {
783     auto VGPR = Reg.first;
784     auto FI = Reg.second;
785     if (!FI)
786       continue;
787 
788     if (!ScratchExecCopy)
789       ScratchExecCopy =
790           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
791 
792     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
793                      *FI);
794   }
795 
796   if (ScratchExecCopy) {
797     // FIXME: Split block and make terminator.
798     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
799     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
800     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
801         .addReg(ScratchExecCopy, RegState::Kill);
802     LiveRegs.addReg(ScratchExecCopy);
803   }
804 
805   if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
806     const int FramePtrFI = *FPSaveIndex;
807     assert(!MFI.isDeadObjectIndex(FramePtrFI));
808 
809     initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
810 
811     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
812         MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
813     if (!TmpVGPR)
814       report_fatal_error("failed to find free scratch register");
815 
816     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
817         .addReg(FramePtrReg);
818 
819     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
820                      FramePtrFI);
821   }
822 
823   if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
824     const int BasePtrFI = *BPSaveIndex;
825     assert(!MFI.isDeadObjectIndex(BasePtrFI));
826 
827     initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
828 
829     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
830         MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
831     if (!TmpVGPR)
832       report_fatal_error("failed to find free scratch register");
833 
834     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
835         .addReg(BasePtrReg);
836 
837     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
838                      BasePtrFI);
839   }
840 
841   // In this case, spill the FP to a reserved VGPR.
842   if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
843     const int FramePtrFI = *FPSaveIndex;
844     assert(!MFI.isDeadObjectIndex(FramePtrFI));
845 
846     assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
847     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
848         FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
849     assert(Spill.size() == 1);
850 
851     // Save FP before setting it up.
852     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
853         .addReg(FramePtrReg)
854         .addImm(Spill[0].Lane)
855         .addReg(Spill[0].VGPR, RegState::Undef);
856   }
857 
858   // In this case, spill the BP to a reserved VGPR.
859   if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
860     const int BasePtrFI = *BPSaveIndex;
861     assert(!MFI.isDeadObjectIndex(BasePtrFI));
862 
863     assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
864     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
865         FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
866     assert(Spill.size() == 1);
867 
868     // Save BP before setting it up.
869     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
870         .addReg(BasePtrReg)
871         .addImm(Spill[0].Lane)
872         .addReg(Spill[0].VGPR, RegState::Undef);
873   }
874 
875   // Emit the copy if we need an FP, and are using a free SGPR to save it.
876   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
877     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
878             FuncInfo->SGPRForFPSaveRestoreCopy)
879         .addReg(FramePtrReg)
880         .setMIFlag(MachineInstr::FrameSetup);
881   }
882 
883   // Emit the copy if we need a BP, and are using a free SGPR to save it.
884   if (FuncInfo->SGPRForBPSaveRestoreCopy) {
885     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
886             FuncInfo->SGPRForBPSaveRestoreCopy)
887         .addReg(BasePtrReg)
888         .setMIFlag(MachineInstr::FrameSetup);
889   }
890 
891   // If a copy has been emitted for FP and/or BP, Make the SGPRs
892   // used in the copy instructions live throughout the function.
893   SmallVector<MCPhysReg, 2> TempSGPRs;
894   if (FuncInfo->SGPRForFPSaveRestoreCopy)
895     TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
896 
897   if (FuncInfo->SGPRForBPSaveRestoreCopy)
898     TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
899 
900   if (!TempSGPRs.empty()) {
901     for (MachineBasicBlock &MBB : MF) {
902       for (MCPhysReg Reg : TempSGPRs)
903         MBB.addLiveIn(Reg);
904 
905       MBB.sortUniqueLiveIns();
906     }
907     if (!LiveRegs.empty()) {
908       LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
909       LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
910     }
911   }
912 
913   if (TRI.hasStackRealignment(MF)) {
914     HasFP = true;
915     const unsigned Alignment = MFI.getMaxAlign().value();
916 
917     RoundedSize += Alignment;
918     if (LiveRegs.empty()) {
919       LiveRegs.init(TRI);
920       LiveRegs.addLiveIns(MBB);
921     }
922 
923     // s_add_i32 s33, s32, NumBytes
924     // s_and_b32 s33, s33, 0b111...0000
925     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
926         .addReg(StackPtrReg)
927         .addImm((Alignment - 1) * getScratchScaleFactor(ST))
928         .setMIFlag(MachineInstr::FrameSetup);
929     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
930         .addReg(FramePtrReg, RegState::Kill)
931         .addImm(-Alignment * getScratchScaleFactor(ST))
932         .setMIFlag(MachineInstr::FrameSetup);
933     FuncInfo->setIsStackRealigned(true);
934   } else if ((HasFP = hasFP(MF))) {
935     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
936         .addReg(StackPtrReg)
937         .setMIFlag(MachineInstr::FrameSetup);
938   }
939 
940   // If we need a base pointer, set it up here. It's whatever the value of
941   // the stack pointer is at this point. Any variable size objects will be
942   // allocated after this, so we can still use the base pointer to reference
943   // the incoming arguments.
944   if ((HasBP = TRI.hasBasePointer(MF))) {
945     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
946         .addReg(StackPtrReg)
947         .setMIFlag(MachineInstr::FrameSetup);
948   }
949 
950   if (HasFP && RoundedSize != 0) {
951     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
952         .addReg(StackPtrReg)
953         .addImm(RoundedSize * getScratchScaleFactor(ST))
954         .setMIFlag(MachineInstr::FrameSetup);
955   }
956 
957   assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
958                      FuncInfo->FramePointerSaveIndex)) &&
959          "Needed to save FP but didn't save it anywhere");
960 
961   // If we allow spilling to AGPRs we may have saved FP but then spill
962   // everything into AGPRs instead of the stack.
963   assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
964                     !FuncInfo->FramePointerSaveIndex) ||
965                    EnableSpillVGPRToAGPR) &&
966          "Saved FP but didn't need it");
967 
968   assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
969                      FuncInfo->BasePointerSaveIndex)) &&
970          "Needed to save BP but didn't save it anywhere");
971 
972   assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
973                     !FuncInfo->BasePointerSaveIndex)) &&
974          "Saved BP but didn't need it");
975 }
976 
977 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
978                                    MachineBasicBlock &MBB) const {
979   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
980   if (FuncInfo->isEntryFunction())
981     return;
982 
983   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
984   const SIInstrInfo *TII = ST.getInstrInfo();
985   MachineRegisterInfo &MRI = MF.getRegInfo();
986   const SIRegisterInfo &TRI = TII->getRegisterInfo();
987   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
988   LivePhysRegs LiveRegs;
989   DebugLoc DL;
990 
991   const MachineFrameInfo &MFI = MF.getFrameInfo();
992   uint32_t NumBytes = MFI.getStackSize();
993   uint32_t RoundedSize = FuncInfo->isStackRealigned()
994                              ? NumBytes + MFI.getMaxAlign().value()
995                              : NumBytes;
996   const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
997   const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
998   const Register BasePtrReg =
999       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1000 
1001   Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
1002   Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
1003 
1004   if (RoundedSize != 0 && hasFP(MF)) {
1005     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1006         .addReg(StackPtrReg)
1007         .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
1008         .setMIFlag(MachineInstr::FrameDestroy);
1009   }
1010 
1011   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
1012     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1013         .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
1014         .setMIFlag(MachineInstr::FrameDestroy);
1015   }
1016 
1017   if (FuncInfo->SGPRForBPSaveRestoreCopy) {
1018     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1019         .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
1020         .setMIFlag(MachineInstr::FrameDestroy);
1021   }
1022 
1023   if (FPSaveIndex) {
1024     const int FramePtrFI = *FPSaveIndex;
1025     assert(!MFI.isDeadObjectIndex(FramePtrFI));
1026     if (spilledToMemory(MF, FramePtrFI)) {
1027       initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1028 
1029       MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1030           MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1031       if (!TmpVGPR)
1032         report_fatal_error("failed to find free scratch register");
1033       buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1034                          TmpVGPR, FramePtrFI);
1035       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
1036           .addReg(TmpVGPR, RegState::Kill);
1037     } else {
1038       // Reload from VGPR spill.
1039       assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
1040       ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1041           FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
1042       assert(Spill.size() == 1);
1043       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
1044           .addReg(Spill[0].VGPR)
1045           .addImm(Spill[0].Lane);
1046     }
1047   }
1048 
1049   if (BPSaveIndex) {
1050     const int BasePtrFI = *BPSaveIndex;
1051     assert(!MFI.isDeadObjectIndex(BasePtrFI));
1052     if (spilledToMemory(MF, BasePtrFI)) {
1053       initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1054 
1055       MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1056           MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1057       if (!TmpVGPR)
1058         report_fatal_error("failed to find free scratch register");
1059       buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1060                          TmpVGPR, BasePtrFI);
1061       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
1062           .addReg(TmpVGPR, RegState::Kill);
1063     } else {
1064       // Reload from VGPR spill.
1065       assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
1066       ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1067           FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
1068       assert(Spill.size() == 1);
1069       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
1070           .addReg(Spill[0].VGPR)
1071           .addImm(Spill[0].Lane);
1072     }
1073   }
1074 
1075   Register ScratchExecCopy;
1076   for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
1077        FuncInfo->getSGPRSpillVGPRs()) {
1078     if (!Reg.FI)
1079       continue;
1080 
1081     if (!ScratchExecCopy)
1082       ScratchExecCopy =
1083           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1084 
1085     buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1086                        Reg.VGPR, *Reg.FI);
1087   }
1088 
1089   for (const auto &Reg : FuncInfo->WWMReservedRegs) {
1090     auto VGPR = Reg.first;
1091     auto FI = Reg.second;
1092     if (!FI)
1093       continue;
1094 
1095     if (!ScratchExecCopy)
1096       ScratchExecCopy =
1097           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1098 
1099     buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
1100                        *FI);
1101   }
1102 
1103   if (ScratchExecCopy) {
1104     // FIXME: Split block and make terminator.
1105     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1106     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1107     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1108         .addReg(ScratchExecCopy, RegState::Kill);
1109   }
1110 }
1111 
1112 #ifndef NDEBUG
1113 static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1114   const MachineFrameInfo &MFI = MF.getFrameInfo();
1115   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1116   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1117        I != E; ++I) {
1118     if (!MFI.isDeadObjectIndex(I) &&
1119         MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1120         (I != FuncInfo->FramePointerSaveIndex &&
1121          I != FuncInfo->BasePointerSaveIndex)) {
1122       return false;
1123     }
1124   }
1125 
1126   return true;
1127 }
1128 #endif
1129 
1130 StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1131                                                     int FI,
1132                                                     Register &FrameReg) const {
1133   const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1134 
1135   FrameReg = RI->getFrameRegister(MF);
1136   return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
1137 }
1138 
1139 void SIFrameLowering::processFunctionBeforeFrameFinalized(
1140   MachineFunction &MF,
1141   RegScavenger *RS) const {
1142   MachineFrameInfo &MFI = MF.getFrameInfo();
1143 
1144   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1145   const SIInstrInfo *TII = ST.getInstrInfo();
1146   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1147   MachineRegisterInfo &MRI = MF.getRegInfo();
1148   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1149 
1150   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1151                                && EnableSpillVGPRToAGPR;
1152 
1153   if (SpillVGPRToAGPR) {
1154     // To track the spill frame indices handled in this pass.
1155     BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1156 
1157     bool SeenDbgInstr = false;
1158 
1159     for (MachineBasicBlock &MBB : MF) {
1160       for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
1161         if (MI.isDebugInstr())
1162           SeenDbgInstr = true;
1163 
1164         if (TII->isVGPRSpill(MI)) {
1165           // Try to eliminate stack used by VGPR spills before frame
1166           // finalization.
1167           unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1168                                                      AMDGPU::OpName::vaddr);
1169           int FI = MI.getOperand(FIOp).getIndex();
1170           Register VReg =
1171             TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1172           if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1173                                                 TRI->isAGPR(MRI, VReg))) {
1174             // FIXME: change to enterBasicBlockEnd()
1175             RS->enterBasicBlock(MBB);
1176             TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1177             SpillFIs.set(FI);
1178             continue;
1179           }
1180         }
1181       }
1182     }
1183 
1184     for (MachineBasicBlock &MBB : MF) {
1185       for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1186         MBB.addLiveIn(Reg);
1187 
1188       for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1189         MBB.addLiveIn(Reg);
1190 
1191       MBB.sortUniqueLiveIns();
1192 
1193       if (!SpillFIs.empty() && SeenDbgInstr) {
1194         // FIXME: The dead frame indices are replaced with a null register from
1195         // the debug value instructions. We should instead, update it with the
1196         // correct register value. But not sure the register value alone is
1197         for (MachineInstr &MI : MBB) {
1198           if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1199               SpillFIs[MI.getOperand(0).getIndex()]) {
1200             MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1201           }
1202         }
1203       }
1204     }
1205   }
1206 
1207   FuncInfo->removeDeadFrameIndices(MFI);
1208   assert(allSGPRSpillsAreDead(MF) &&
1209          "SGPR spill should have been removed in SILowerSGPRSpills");
1210 
1211   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1212   // but currently hasNonSpillStackObjects is set only from source
1213   // allocas. Stack temps produced from legalization are not counted currently.
1214   if (!allStackObjectsAreDead(MFI)) {
1215     assert(RS && "RegScavenger required if spilling");
1216 
1217     // Add an emergency spill slot
1218     RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1219   }
1220 }
1221 
1222 // Only report VGPRs to generic code.
1223 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1224                                            BitVector &SavedVGPRs,
1225                                            RegScavenger *RS) const {
1226   TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1227   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1228   if (MFI->isEntryFunction())
1229     return;
1230 
1231   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1232   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1233   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1234 
1235   // Ignore the SGPRs the default implementation found.
1236   SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1237 
1238   // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1239   // In gfx908 there was do AGPR loads and stores and thus spilling also
1240   // require a temporary VGPR.
1241   if (!ST.hasGFX90AInsts())
1242     SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1243 
1244   // hasFP only knows about stack objects that already exist. We're now
1245   // determining the stack slots that will be created, so we have to predict
1246   // them. Stack objects force FP usage with calls.
1247   //
1248   // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1249   // don't want to report it here.
1250   //
1251   // FIXME: Is this really hasReservedCallFrame?
1252   const bool WillHaveFP =
1253       FrameInfo.hasCalls() &&
1254       (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1255 
1256   // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1257   // so don't allow the default insertion to handle them.
1258   for (auto SSpill : MFI->getSGPRSpillVGPRs())
1259     SavedVGPRs.reset(SSpill.VGPR);
1260 
1261   LivePhysRegs LiveRegs;
1262   LiveRegs.init(*TRI);
1263 
1264   if (WillHaveFP || hasFP(MF)) {
1265     assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&
1266            "Re-reserving spill slot for FP");
1267     getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
1268                                    MFI->FramePointerSaveIndex, true);
1269   }
1270 
1271   if (TRI->hasBasePointer(MF)) {
1272     if (MFI->SGPRForFPSaveRestoreCopy)
1273       LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1274 
1275     assert(!MFI->SGPRForBPSaveRestoreCopy &&
1276            !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
1277     getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
1278                                    MFI->BasePointerSaveIndex, false);
1279   }
1280 }
1281 
1282 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
1283                                                BitVector &SavedRegs,
1284                                                RegScavenger *RS) const {
1285   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1286   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1287   if (MFI->isEntryFunction())
1288     return;
1289 
1290   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1291   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1292 
1293   // The SP is specifically managed and we don't want extra spills of it.
1294   SavedRegs.reset(MFI->getStackPtrOffsetReg());
1295 
1296   const BitVector AllSavedRegs = SavedRegs;
1297   SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1298 
1299   // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
1300   const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
1301 
1302   // We have to anticipate introducing CSR VGPR spills or spill of caller
1303   // save VGPR reserved for SGPR spills as we now always create stack entry
1304   // for it, if we don't have any stack objects already, since we require
1305   // an FP if there is a call and stack.
1306   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1307   const bool WillHaveFP =
1308       FrameInfo.hasCalls() && (HaveAnyCSRVGPR || MFI->VGPRReservedForSGPRSpill);
1309 
1310   // FP will be specially managed like SP.
1311   if (WillHaveFP || hasFP(MF))
1312     SavedRegs.reset(MFI->getFrameOffsetReg());
1313 }
1314 
1315 bool SIFrameLowering::assignCalleeSavedSpillSlots(
1316     MachineFunction &MF, const TargetRegisterInfo *TRI,
1317     std::vector<CalleeSavedInfo> &CSI) const {
1318   if (CSI.empty())
1319     return true; // Early exit if no callee saved registers are modified!
1320 
1321   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1322   if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1323       !FuncInfo->SGPRForBPSaveRestoreCopy)
1324     return false;
1325 
1326   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1327   const SIRegisterInfo *RI = ST.getRegisterInfo();
1328   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1329   Register BasePtrReg = RI->getBaseRegister();
1330   unsigned NumModifiedRegs = 0;
1331 
1332   if (FuncInfo->SGPRForFPSaveRestoreCopy)
1333     NumModifiedRegs++;
1334   if (FuncInfo->SGPRForBPSaveRestoreCopy)
1335     NumModifiedRegs++;
1336 
1337   for (auto &CS : CSI) {
1338     if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1339       CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1340       if (--NumModifiedRegs)
1341         break;
1342     } else if (CS.getReg() == BasePtrReg &&
1343                FuncInfo->SGPRForBPSaveRestoreCopy) {
1344       CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1345       if (--NumModifiedRegs)
1346         break;
1347     }
1348   }
1349 
1350   return false;
1351 }
1352 
1353 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1354   MachineFunction &MF,
1355   MachineBasicBlock &MBB,
1356   MachineBasicBlock::iterator I) const {
1357   int64_t Amount = I->getOperand(0).getImm();
1358   if (Amount == 0)
1359     return MBB.erase(I);
1360 
1361   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1362   const SIInstrInfo *TII = ST.getInstrInfo();
1363   const DebugLoc &DL = I->getDebugLoc();
1364   unsigned Opc = I->getOpcode();
1365   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1366   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1367 
1368   if (!hasReservedCallFrame(MF)) {
1369     Amount = alignTo(Amount, getStackAlign());
1370     assert(isUInt<32>(Amount) && "exceeded stack address space size");
1371     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1372     Register SPReg = MFI->getStackPtrOffsetReg();
1373 
1374     Amount *= getScratchScaleFactor(ST);
1375     if (IsDestroy)
1376       Amount = -Amount;
1377     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
1378         .addReg(SPReg)
1379         .addImm(Amount);
1380   } else if (CalleePopAmount != 0) {
1381     llvm_unreachable("is this used?");
1382   }
1383 
1384   return MBB.erase(I);
1385 }
1386 
1387 /// Returns true if the frame will require a reference to the stack pointer.
1388 ///
1389 /// This is the set of conditions common to setting up the stack pointer in a
1390 /// kernel, and for using a frame pointer in a callable function.
1391 ///
1392 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1393 /// references SP.
1394 static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1395   return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1396 }
1397 
1398 // The FP for kernels is always known 0, so we never really need to setup an
1399 // explicit register for it. However, DisableFramePointerElim will force us to
1400 // use a register for it.
1401 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1402   const MachineFrameInfo &MFI = MF.getFrameInfo();
1403 
1404   // For entry functions we can use an immediate offset in most cases, so the
1405   // presence of calls doesn't imply we need a distinct frame pointer.
1406   if (MFI.hasCalls() &&
1407       !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1408     // All offsets are unsigned, so need to be addressed in the same direction
1409     // as stack growth.
1410 
1411     // FIXME: This function is pretty broken, since it can be called before the
1412     // frame layout is determined or CSR spills are inserted.
1413     return MFI.getStackSize() != 0;
1414   }
1415 
1416   return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1417          MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1418              MF) ||
1419          MF.getTarget().Options.DisableFramePointerElim(MF);
1420 }
1421 
1422 // This is essentially a reduced version of hasFP for entry functions. Since the
1423 // stack pointer is known 0 on entry to kernels, we never really need an FP
1424 // register. We may need to initialize the stack pointer depending on the frame
1425 // properties, which logically overlaps many of the cases where an ordinary
1426 // function would require an FP.
1427 bool SIFrameLowering::requiresStackPointerReference(
1428     const MachineFunction &MF) const {
1429   // Callable functions always require a stack pointer reference.
1430   assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
1431          "only expected to call this for entry points");
1432 
1433   const MachineFrameInfo &MFI = MF.getFrameInfo();
1434 
1435   // Entry points ordinarily don't need to initialize SP. We have to set it up
1436   // for callees if there are any. Also note tail calls are impossible/don't
1437   // make any sense for kernels.
1438   if (MFI.hasCalls())
1439     return true;
1440 
1441   // We still need to initialize the SP if we're doing anything weird that
1442   // references the SP, like variable sized stack objects.
1443   return frameTriviallyRequiresSP(MFI);
1444 }
1445