1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPU.h"
11 #include "GCNSubtarget.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "SIMachineFunctionInfo.h"
14 #include "llvm/CodeGen/LivePhysRegs.h"
15 #include "llvm/CodeGen/MachineFrameInfo.h"
16 #include "llvm/CodeGen/RegisterScavenging.h"
17 #include "llvm/Target/TargetMachine.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "frame-info"
22 
23 static cl::opt<bool> EnableSpillVGPRToAGPR(
24   "amdgpu-spill-vgpr-to-agpr",
25   cl::desc("Enable spilling VGPRs to AGPRs"),
26   cl::ReallyHidden,
27   cl::init(true));
28 
29 // Find a scratch register that we can use in the prologue. We avoid using
30 // callee-save registers since they may appear to be free when this is called
31 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
32 // when this is called from emitPrologue.
33 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
34                                                    LivePhysRegs &LiveRegs,
35                                                    const TargetRegisterClass &RC,
36                                                    bool Unused = false) {
37   // Mark callee saved registers as used so we will not choose them.
38   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
39   for (unsigned i = 0; CSRegs[i]; ++i)
40     LiveRegs.addReg(CSRegs[i]);
41 
42   if (Unused) {
43     // We are looking for a register that can be used throughout the entire
44     // function, so any use is unacceptable.
45     for (MCRegister Reg : RC) {
46       if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
47         return Reg;
48     }
49   } else {
50     for (MCRegister Reg : RC) {
51       if (LiveRegs.available(MRI, Reg))
52         return Reg;
53     }
54   }
55 
56   return MCRegister();
57 }
58 
59 static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
60                                            LivePhysRegs &LiveRegs,
61                                            Register &TempSGPR,
62                                            Optional<int> &FrameIndex,
63                                            bool IsFP) {
64   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
65   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
66 
67   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
68   const SIRegisterInfo *TRI = ST.getRegisterInfo();
69 
70   // We need to save and restore the current FP/BP.
71 
72   // 1: If there is already a VGPR with free lanes, use it. We
73   // may already have to pay the penalty for spilling a CSR VGPR.
74   if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
75     int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
76                                             TargetStackID::SGPRSpill);
77 
78     if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
79       llvm_unreachable("allocate SGPR spill should have worked");
80 
81     FrameIndex = NewFI;
82 
83     LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
84                dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to  "
85                       << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
86                       << '\n');
87     return;
88   }
89 
90   // 2: Next, try to save the FP/BP in an unused SGPR.
91   TempSGPR = findScratchNonCalleeSaveRegister(
92       MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
93 
94   if (!TempSGPR) {
95     int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
96                                             TargetStackID::SGPRSpill);
97 
98     if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
99       // 3: There's no free lane to spill, and no free register to save FP/BP,
100       // so we're forced to spill another VGPR to use for the spill.
101       FrameIndex = NewFI;
102 
103       LLVM_DEBUG(
104           auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
105           dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
106                  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
107     } else {
108       // Remove dead <NewFI> index
109       MF.getFrameInfo().RemoveStackObject(NewFI);
110       // 4: If all else fails, spill the FP/BP to memory.
111       FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
112       LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
113                         << (IsFP ? "FP" : "BP") << '\n');
114     }
115   } else {
116     LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
117                       << printReg(TempSGPR, TRI) << '\n');
118   }
119 }
120 
121 // We need to specially emit stack operations here because a different frame
122 // register is used than in the rest of the function, as getFrameRegister would
123 // use.
124 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
125                              const SIMachineFunctionInfo &FuncInfo,
126                              LivePhysRegs &LiveRegs, MachineFunction &MF,
127                              MachineBasicBlock &MBB,
128                              MachineBasicBlock::iterator I, const DebugLoc &DL,
129                              Register SpillReg, int FI) {
130   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
131                                         : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
132 
133   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
134   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
135   MachineMemOperand *MMO = MF.getMachineMemOperand(
136       PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
137       FrameInfo.getObjectAlign(FI));
138   LiveRegs.addReg(SpillReg);
139   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true,
140                           FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
141                           &LiveRegs);
142   LiveRegs.removeReg(SpillReg);
143 }
144 
145 static void buildEpilogRestore(const GCNSubtarget &ST,
146                                const SIRegisterInfo &TRI,
147                                const SIMachineFunctionInfo &FuncInfo,
148                                LivePhysRegs &LiveRegs, MachineFunction &MF,
149                                MachineBasicBlock &MBB,
150                                MachineBasicBlock::iterator I,
151                                const DebugLoc &DL, Register SpillReg, int FI) {
152   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
153                                         : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
154 
155   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
156   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
157   MachineMemOperand *MMO = MF.getMachineMemOperand(
158       PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
159       FrameInfo.getObjectAlign(FI));
160   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false,
161                           FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
162                           &LiveRegs);
163 }
164 
165 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
166                         const DebugLoc &DL, const SIInstrInfo *TII,
167                         Register TargetReg) {
168   MachineFunction *MF = MBB.getParent();
169   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
170   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
171   const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
172   Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
173   Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
174 
175   if (MFI->getGITPtrHigh() != 0xffffffff) {
176     BuildMI(MBB, I, DL, SMovB32, TargetHi)
177         .addImm(MFI->getGITPtrHigh())
178         .addReg(TargetReg, RegState::ImplicitDefine);
179   } else {
180     const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
181     BuildMI(MBB, I, DL, GetPC64, TargetReg);
182   }
183   Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
184   MF->getRegInfo().addLiveIn(GitPtrLo);
185   MBB.addLiveIn(GitPtrLo);
186   BuildMI(MBB, I, DL, SMovB32, TargetLo)
187     .addReg(GitPtrLo);
188 }
189 
190 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
191 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
192     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
193     const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
194   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
195   const SIInstrInfo *TII = ST.getInstrInfo();
196   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
197   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
198 
199   // We don't need this if we only have spills since there is no user facing
200   // scratch.
201 
202   // TODO: If we know we don't have flat instructions earlier, we can omit
203   // this from the input registers.
204   //
205   // TODO: We only need to know if we access scratch space through a flat
206   // pointer. Because we only detect if flat instructions are used at all,
207   // this will be used more often than necessary on VI.
208 
209   Register FlatScrInitLo;
210   Register FlatScrInitHi;
211 
212   if (ST.isAmdPalOS()) {
213     // Extract the scratch offset from the descriptor in the GIT
214     LivePhysRegs LiveRegs;
215     LiveRegs.init(*TRI);
216     LiveRegs.addLiveIns(MBB);
217 
218     // Find unused reg to load flat scratch init into
219     MachineRegisterInfo &MRI = MF.getRegInfo();
220     Register FlatScrInit = AMDGPU::NoRegister;
221     ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
222     unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
223     AllSGPR64s = AllSGPR64s.slice(
224         std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
225     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
226     for (MCPhysReg Reg : AllSGPR64s) {
227       if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
228           !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
229         FlatScrInit = Reg;
230         break;
231       }
232     }
233     assert(FlatScrInit && "Failed to find free register for scratch init");
234 
235     FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
236     FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
237 
238     buildGitPtr(MBB, I, DL, TII, FlatScrInit);
239 
240     // We now have the GIT ptr - now get the scratch descriptor from the entry
241     // at offset 0 (or offset 16 for a compute shader).
242     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
243     const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
244     auto *MMO = MF.getMachineMemOperand(
245         PtrInfo,
246         MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
247             MachineMemOperand::MODereferenceable,
248         8, Align(4));
249     unsigned Offset =
250         MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
251     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
252     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
253     BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
254         .addReg(FlatScrInit)
255         .addImm(EncodedOffset) // offset
256         .addImm(0)             // cpol
257         .addMemOperand(MMO);
258 
259     // Mask the offset in [47:0] of the descriptor
260     const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
261     BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
262         .addReg(FlatScrInitHi)
263         .addImm(0xffff);
264   } else {
265     Register FlatScratchInitReg =
266         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
267     assert(FlatScratchInitReg);
268 
269     MachineRegisterInfo &MRI = MF.getRegInfo();
270     MRI.addLiveIn(FlatScratchInitReg);
271     MBB.addLiveIn(FlatScratchInitReg);
272 
273     FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
274     FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
275   }
276 
277   // Do a 64-bit pointer add.
278   if (ST.flatScratchIsPointer()) {
279     if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
280       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
281         .addReg(FlatScrInitLo)
282         .addReg(ScratchWaveOffsetReg);
283       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
284         .addReg(FlatScrInitHi)
285         .addImm(0);
286       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
287         addReg(FlatScrInitLo).
288         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
289                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
290       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
291         addReg(FlatScrInitHi).
292         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
293                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
294       return;
295     }
296 
297     // For GFX9.
298     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
299       .addReg(FlatScrInitLo)
300       .addReg(ScratchWaveOffsetReg);
301     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
302       .addReg(FlatScrInitHi)
303       .addImm(0);
304 
305     return;
306   }
307 
308   assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
309 
310   // Copy the size in bytes.
311   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
312     .addReg(FlatScrInitHi, RegState::Kill);
313 
314   // Add wave offset in bytes to private base offset.
315   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
316   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
317       .addReg(FlatScrInitLo)
318       .addReg(ScratchWaveOffsetReg);
319 
320   // Convert offset to 256-byte units.
321   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
322     .addReg(FlatScrInitLo, RegState::Kill)
323     .addImm(8);
324 }
325 
326 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
327 // memory. They should have been removed by now.
328 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
329   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
330        I != E; ++I) {
331     if (!MFI.isDeadObjectIndex(I))
332       return false;
333   }
334 
335   return true;
336 }
337 
338 // Shift down registers reserved for the scratch RSRC.
339 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
340     MachineFunction &MF) const {
341 
342   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
343   const SIInstrInfo *TII = ST.getInstrInfo();
344   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
345   MachineRegisterInfo &MRI = MF.getRegInfo();
346   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
347 
348   assert(MFI->isEntryFunction());
349 
350   Register ScratchRsrcReg = MFI->getScratchRSrcReg();
351 
352   if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
353                           allStackObjectsAreDead(MF.getFrameInfo())))
354     return Register();
355 
356   if (ST.hasSGPRInitBug() ||
357       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
358     return ScratchRsrcReg;
359 
360   // We reserved the last registers for this. Shift it down to the end of those
361   // which were actually used.
362   //
363   // FIXME: It might be safer to use a pseudoregister before replacement.
364 
365   // FIXME: We should be able to eliminate unused input registers. We only
366   // cannot do this for the resources required for scratch access. For now we
367   // skip over user SGPRs and may leave unused holes.
368 
369   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
370   ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
371   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
372 
373   // Skip the last N reserved elements because they should have already been
374   // reserved for VCC etc.
375   Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
376   for (MCPhysReg Reg : AllSGPR128s) {
377     // Pick the first unallocated one. Make sure we don't clobber the other
378     // reserved input we needed. Also for PAL, make sure we don't clobber
379     // the GIT pointer passed in SGPR0 or SGPR8.
380     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
381         !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
382       MRI.replaceRegWith(ScratchRsrcReg, Reg);
383       MFI->setScratchRSrcReg(Reg);
384       return Reg;
385     }
386   }
387 
388   return ScratchRsrcReg;
389 }
390 
391 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
392   return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
393 }
394 
395 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
396                                                 MachineBasicBlock &MBB) const {
397   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
398 
399   // FIXME: If we only have SGPR spills, we won't actually be using scratch
400   // memory since these spill to VGPRs. We should be cleaning up these unused
401   // SGPR spill frame indices somewhere.
402 
403   // FIXME: We still have implicit uses on SGPR spill instructions in case they
404   // need to spill to vector memory. It's likely that will not happen, but at
405   // this point it appears we need the setup. This part of the prolog should be
406   // emitted after frame indices are eliminated.
407 
408   // FIXME: Remove all of the isPhysRegUsed checks
409 
410   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
411   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
412   const SIInstrInfo *TII = ST.getInstrInfo();
413   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
414   MachineRegisterInfo &MRI = MF.getRegInfo();
415   const Function &F = MF.getFunction();
416   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
417 
418   assert(MFI->isEntryFunction());
419 
420   Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
421       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
422   // FIXME: Hack to not crash in situations which emitted an error.
423   if (!PreloadedScratchWaveOffsetReg)
424     return;
425 
426   // We need to do the replacement of the private segment buffer register even
427   // if there are no stack objects. There could be stores to undef or a
428   // constant without an associated object.
429   //
430   // This will return `Register()` in cases where there are no actual
431   // uses of the SRSRC.
432   Register ScratchRsrcReg;
433   if (!ST.enableFlatScratch())
434     ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
435 
436   // Make the selected register live throughout the function.
437   if (ScratchRsrcReg) {
438     for (MachineBasicBlock &OtherBB : MF) {
439       if (&OtherBB != &MBB) {
440         OtherBB.addLiveIn(ScratchRsrcReg);
441       }
442     }
443   }
444 
445   // Now that we have fixed the reserved SRSRC we need to locate the
446   // (potentially) preloaded SRSRC.
447   Register PreloadedScratchRsrcReg;
448   if (ST.isAmdHsaOrMesa(F)) {
449     PreloadedScratchRsrcReg =
450         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
451     if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
452       // We added live-ins during argument lowering, but since they were not
453       // used they were deleted. We're adding the uses now, so add them back.
454       MRI.addLiveIn(PreloadedScratchRsrcReg);
455       MBB.addLiveIn(PreloadedScratchRsrcReg);
456     }
457   }
458 
459   // Debug location must be unknown since the first debug location is used to
460   // determine the end of the prologue.
461   DebugLoc DL;
462   MachineBasicBlock::iterator I = MBB.begin();
463 
464   // We found the SRSRC first because it needs four registers and has an
465   // alignment requirement. If the SRSRC that we found is clobbering with
466   // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
467   // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
468   // wave offset to a free SGPR.
469   Register ScratchWaveOffsetReg;
470   if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
471     ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
472     unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
473     AllSGPRs = AllSGPRs.slice(
474         std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
475     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
476     for (MCPhysReg Reg : AllSGPRs) {
477       if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
478           !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
479         ScratchWaveOffsetReg = Reg;
480         BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
481             .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
482         break;
483       }
484     }
485   } else {
486     ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
487   }
488   assert(ScratchWaveOffsetReg);
489 
490   if (requiresStackPointerReference(MF)) {
491     Register SPReg = MFI->getStackPtrOffsetReg();
492     assert(SPReg != AMDGPU::SP_REG);
493     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
494         .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
495   }
496 
497   if (hasFP(MF)) {
498     Register FPReg = MFI->getFrameOffsetReg();
499     assert(FPReg != AMDGPU::FP_REG);
500     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
501   }
502 
503   bool NeedsFlatScratchInit =
504       MFI->hasFlatScratchInit() &&
505       (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
506        (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
507 
508   if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
509       !ST.flatScratchIsArchitected()) {
510     MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
511     MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
512   }
513 
514   if (NeedsFlatScratchInit) {
515     emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
516   }
517 
518   if (ScratchRsrcReg) {
519     emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
520                                          PreloadedScratchRsrcReg,
521                                          ScratchRsrcReg, ScratchWaveOffsetReg);
522   }
523 }
524 
525 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
526 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
527     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
528     const DebugLoc &DL, Register PreloadedScratchRsrcReg,
529     Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
530 
531   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
532   const SIInstrInfo *TII = ST.getInstrInfo();
533   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
534   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
535   const Function &Fn = MF.getFunction();
536 
537   if (ST.isAmdPalOS()) {
538     // The pointer to the GIT is formed from the offset passed in and either
539     // the amdgpu-git-ptr-high function attribute or the top part of the PC
540     Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
541     Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
542 
543     buildGitPtr(MBB, I, DL, TII, Rsrc01);
544 
545     // We now have the GIT ptr - now get the scratch descriptor from the entry
546     // at offset 0 (or offset 16 for a compute shader).
547     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
548     const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
549     auto MMO = MF.getMachineMemOperand(PtrInfo,
550                                        MachineMemOperand::MOLoad |
551                                            MachineMemOperand::MOInvariant |
552                                            MachineMemOperand::MODereferenceable,
553                                        16, Align(4));
554     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
555     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
556     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
557     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
558       .addReg(Rsrc01)
559       .addImm(EncodedOffset) // offset
560       .addImm(0) // cpol
561       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
562       .addMemOperand(MMO);
563 
564     // The driver will always set the SRD for wave 64 (bits 118:117 of
565     // descriptor / bits 22:21 of third sub-reg will be 0b11)
566     // If the shader is actually wave32 we have to modify the const_index_stride
567     // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
568     // reason the driver does this is that there can be cases where it presents
569     // 2 shaders with different wave size (e.g. VsFs).
570     // TODO: convert to using SCRATCH instructions or multiple SRD buffers
571     if (ST.isWave32()) {
572       const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
573       BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
574           .addImm(21)
575           .addReg(Rsrc03);
576     }
577   } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
578     assert(!ST.isAmdHsaOrMesa(Fn));
579     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
580 
581     Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
582     Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
583 
584     // Use relocations to get the pointer, and setup the other bits manually.
585     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
586 
587     if (MFI->hasImplicitBufferPtr()) {
588       Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
589 
590       if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
591         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
592 
593         BuildMI(MBB, I, DL, Mov64, Rsrc01)
594           .addReg(MFI->getImplicitBufferPtrUserSGPR())
595           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
596       } else {
597         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
598 
599         MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
600         auto MMO = MF.getMachineMemOperand(
601             PtrInfo,
602             MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
603                 MachineMemOperand::MODereferenceable,
604             8, Align(4));
605         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
606           .addReg(MFI->getImplicitBufferPtrUserSGPR())
607           .addImm(0) // offset
608           .addImm(0) // cpol
609           .addMemOperand(MMO)
610           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
611 
612         MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
613         MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
614       }
615     } else {
616       Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
617       Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
618 
619       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
620         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
621         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
622 
623       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
624         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
625         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
626 
627     }
628 
629     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
630       .addImm(Rsrc23 & 0xffffffff)
631       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
632 
633     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
634       .addImm(Rsrc23 >> 32)
635       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
636   } else if (ST.isAmdHsaOrMesa(Fn)) {
637     assert(PreloadedScratchRsrcReg);
638 
639     if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
640       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
641           .addReg(PreloadedScratchRsrcReg, RegState::Kill);
642     }
643   }
644 
645   // Add the scratch wave offset into the scratch RSRC.
646   //
647   // We only want to update the first 48 bits, which is the base address
648   // pointer, without touching the adjacent 16 bits of flags. We know this add
649   // cannot carry-out from bit 47, otherwise the scratch allocation would be
650   // impossible to fit in the 48-bit global address space.
651   //
652   // TODO: Evaluate if it is better to just construct an SRD using the flat
653   // scratch init and some constants rather than update the one we are passed.
654   Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
655   Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
656 
657   // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
658   // the kernel body via inreg arguments.
659   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
660       .addReg(ScratchRsrcSub0)
661       .addReg(ScratchWaveOffsetReg)
662       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
663   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
664       .addReg(ScratchRsrcSub1)
665       .addImm(0)
666       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
667 }
668 
669 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
670   switch (ID) {
671   case TargetStackID::Default:
672   case TargetStackID::NoAlloc:
673   case TargetStackID::SGPRSpill:
674     return true;
675   case TargetStackID::ScalableVector:
676   case TargetStackID::WasmLocal:
677     return false;
678   }
679   llvm_unreachable("Invalid TargetStackID::Value");
680 }
681 
682 static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
683                          const SIMachineFunctionInfo *FuncInfo,
684                          MachineFunction &MF, MachineBasicBlock &MBB,
685                          MachineBasicBlock::iterator MBBI, bool IsProlog) {
686   if (LiveRegs.empty()) {
687     LiveRegs.init(TRI);
688     if (IsProlog) {
689       LiveRegs.addLiveIns(MBB);
690     } else {
691       // In epilog.
692       LiveRegs.addLiveOuts(MBB);
693       LiveRegs.stepBackward(*MBBI);
694     }
695   }
696 }
697 
698 // Activate all lanes, returns saved exec.
699 static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
700                                      MachineFunction &MF,
701                                      MachineBasicBlock &MBB,
702                                      MachineBasicBlock::iterator MBBI,
703                                      bool IsProlog) {
704   Register ScratchExecCopy;
705   MachineRegisterInfo &MRI = MF.getRegInfo();
706   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
707   const SIInstrInfo *TII = ST.getInstrInfo();
708   const SIRegisterInfo &TRI = TII->getRegisterInfo();
709   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
710   DebugLoc DL;
711 
712   initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
713 
714   ScratchExecCopy = findScratchNonCalleeSaveRegister(
715       MRI, LiveRegs, *TRI.getWaveMaskRegClass());
716   if (!ScratchExecCopy)
717     report_fatal_error("failed to find free scratch register");
718 
719   LiveRegs.addReg(ScratchExecCopy);
720 
721   const unsigned OrSaveExec =
722       ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
723   BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
724 
725   return ScratchExecCopy;
726 }
727 
728 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
729 // Otherwise we are spilling to memory.
730 static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
731   const MachineFrameInfo &MFI = MF.getFrameInfo();
732   return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
733 }
734 
735 void SIFrameLowering::emitPrologue(MachineFunction &MF,
736                                    MachineBasicBlock &MBB) const {
737   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
738   if (FuncInfo->isEntryFunction()) {
739     emitEntryFunctionPrologue(MF, MBB);
740     return;
741   }
742 
743   const MachineFrameInfo &MFI = MF.getFrameInfo();
744   MachineRegisterInfo &MRI = MF.getRegInfo();
745   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
746   const SIInstrInfo *TII = ST.getInstrInfo();
747   const SIRegisterInfo &TRI = TII->getRegisterInfo();
748 
749   Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
750   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
751   Register BasePtrReg =
752       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
753   LivePhysRegs LiveRegs;
754 
755   MachineBasicBlock::iterator MBBI = MBB.begin();
756   DebugLoc DL;
757 
758   bool HasFP = false;
759   bool HasBP = false;
760   uint32_t NumBytes = MFI.getStackSize();
761   uint32_t RoundedSize = NumBytes;
762   // To avoid clobbering VGPRs in lanes that weren't active on function entry,
763   // turn on all lanes before doing the spill to memory.
764   Register ScratchExecCopy;
765 
766   Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
767   Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
768 
769   // VGPRs used for SGPR->VGPR spills
770   for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
771        FuncInfo->getSGPRSpillVGPRs()) {
772     if (!Reg.FI)
773       continue;
774 
775     if (!ScratchExecCopy)
776       ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
777                                              /*IsProlog*/ true);
778 
779     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR,
780                      *Reg.FI);
781   }
782 
783   // VGPRs used for Whole Wave Mode
784   for (const auto &Reg : FuncInfo->WWMReservedRegs) {
785     auto VGPR = Reg.first;
786     auto FI = Reg.second;
787     if (!FI)
788       continue;
789 
790     if (!ScratchExecCopy)
791       ScratchExecCopy =
792           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
793 
794     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
795                      *FI);
796   }
797 
798   if (ScratchExecCopy) {
799     // FIXME: Split block and make terminator.
800     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
801     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
802     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
803         .addReg(ScratchExecCopy, RegState::Kill);
804     LiveRegs.addReg(ScratchExecCopy);
805   }
806 
807   if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
808     const int FramePtrFI = *FPSaveIndex;
809     assert(!MFI.isDeadObjectIndex(FramePtrFI));
810 
811     initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
812 
813     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
814         MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
815     if (!TmpVGPR)
816       report_fatal_error("failed to find free scratch register");
817 
818     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
819         .addReg(FramePtrReg);
820 
821     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
822                      FramePtrFI);
823   }
824 
825   if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
826     const int BasePtrFI = *BPSaveIndex;
827     assert(!MFI.isDeadObjectIndex(BasePtrFI));
828 
829     initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
830 
831     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
832         MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
833     if (!TmpVGPR)
834       report_fatal_error("failed to find free scratch register");
835 
836     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
837         .addReg(BasePtrReg);
838 
839     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
840                      BasePtrFI);
841   }
842 
843   // In this case, spill the FP to a reserved VGPR.
844   if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
845     const int FramePtrFI = *FPSaveIndex;
846     assert(!MFI.isDeadObjectIndex(FramePtrFI));
847 
848     assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
849     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
850         FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
851     assert(Spill.size() == 1);
852 
853     // Save FP before setting it up.
854     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
855         .addReg(FramePtrReg)
856         .addImm(Spill[0].Lane)
857         .addReg(Spill[0].VGPR, RegState::Undef);
858   }
859 
860   // In this case, spill the BP to a reserved VGPR.
861   if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
862     const int BasePtrFI = *BPSaveIndex;
863     assert(!MFI.isDeadObjectIndex(BasePtrFI));
864 
865     assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
866     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
867         FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
868     assert(Spill.size() == 1);
869 
870     // Save BP before setting it up.
871     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
872         .addReg(BasePtrReg)
873         .addImm(Spill[0].Lane)
874         .addReg(Spill[0].VGPR, RegState::Undef);
875   }
876 
877   // Emit the copy if we need an FP, and are using a free SGPR to save it.
878   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
879     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
880             FuncInfo->SGPRForFPSaveRestoreCopy)
881         .addReg(FramePtrReg)
882         .setMIFlag(MachineInstr::FrameSetup);
883   }
884 
885   // Emit the copy if we need a BP, and are using a free SGPR to save it.
886   if (FuncInfo->SGPRForBPSaveRestoreCopy) {
887     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
888             FuncInfo->SGPRForBPSaveRestoreCopy)
889         .addReg(BasePtrReg)
890         .setMIFlag(MachineInstr::FrameSetup);
891   }
892 
893   // If a copy has been emitted for FP and/or BP, Make the SGPRs
894   // used in the copy instructions live throughout the function.
895   SmallVector<MCPhysReg, 2> TempSGPRs;
896   if (FuncInfo->SGPRForFPSaveRestoreCopy)
897     TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
898 
899   if (FuncInfo->SGPRForBPSaveRestoreCopy)
900     TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
901 
902   if (!TempSGPRs.empty()) {
903     for (MachineBasicBlock &MBB : MF) {
904       for (MCPhysReg Reg : TempSGPRs)
905         MBB.addLiveIn(Reg);
906 
907       MBB.sortUniqueLiveIns();
908     }
909     if (!LiveRegs.empty()) {
910       LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
911       LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
912     }
913   }
914 
915   if (TRI.hasStackRealignment(MF)) {
916     HasFP = true;
917     const unsigned Alignment = MFI.getMaxAlign().value();
918 
919     RoundedSize += Alignment;
920     if (LiveRegs.empty()) {
921       LiveRegs.init(TRI);
922       LiveRegs.addLiveIns(MBB);
923     }
924 
925     // s_add_i32 s33, s32, NumBytes
926     // s_and_b32 s33, s33, 0b111...0000
927     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
928         .addReg(StackPtrReg)
929         .addImm((Alignment - 1) * getScratchScaleFactor(ST))
930         .setMIFlag(MachineInstr::FrameSetup);
931     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
932         .addReg(FramePtrReg, RegState::Kill)
933         .addImm(-Alignment * getScratchScaleFactor(ST))
934         .setMIFlag(MachineInstr::FrameSetup);
935     FuncInfo->setIsStackRealigned(true);
936   } else if ((HasFP = hasFP(MF))) {
937     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
938         .addReg(StackPtrReg)
939         .setMIFlag(MachineInstr::FrameSetup);
940   }
941 
942   // If we need a base pointer, set it up here. It's whatever the value of
943   // the stack pointer is at this point. Any variable size objects will be
944   // allocated after this, so we can still use the base pointer to reference
945   // the incoming arguments.
946   if ((HasBP = TRI.hasBasePointer(MF))) {
947     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
948         .addReg(StackPtrReg)
949         .setMIFlag(MachineInstr::FrameSetup);
950   }
951 
952   if (HasFP && RoundedSize != 0) {
953     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
954         .addReg(StackPtrReg)
955         .addImm(RoundedSize * getScratchScaleFactor(ST))
956         .setMIFlag(MachineInstr::FrameSetup);
957   }
958 
959   assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
960                      FuncInfo->FramePointerSaveIndex)) &&
961          "Needed to save FP but didn't save it anywhere");
962 
963   // If we allow spilling to AGPRs we may have saved FP but then spill
964   // everything into AGPRs instead of the stack.
965   assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
966                     !FuncInfo->FramePointerSaveIndex) ||
967                    EnableSpillVGPRToAGPR) &&
968          "Saved FP but didn't need it");
969 
970   assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
971                      FuncInfo->BasePointerSaveIndex)) &&
972          "Needed to save BP but didn't save it anywhere");
973 
974   assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
975                     !FuncInfo->BasePointerSaveIndex)) &&
976          "Saved BP but didn't need it");
977 }
978 
979 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
980                                    MachineBasicBlock &MBB) const {
981   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
982   if (FuncInfo->isEntryFunction())
983     return;
984 
985   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
986   const SIInstrInfo *TII = ST.getInstrInfo();
987   MachineRegisterInfo &MRI = MF.getRegInfo();
988   const SIRegisterInfo &TRI = TII->getRegisterInfo();
989   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
990   LivePhysRegs LiveRegs;
991   DebugLoc DL;
992 
993   const MachineFrameInfo &MFI = MF.getFrameInfo();
994   uint32_t NumBytes = MFI.getStackSize();
995   uint32_t RoundedSize = FuncInfo->isStackRealigned()
996                              ? NumBytes + MFI.getMaxAlign().value()
997                              : NumBytes;
998   const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
999   const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1000   const Register BasePtrReg =
1001       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1002 
1003   Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
1004   Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
1005 
1006   if (RoundedSize != 0 && hasFP(MF)) {
1007     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1008         .addReg(StackPtrReg)
1009         .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
1010         .setMIFlag(MachineInstr::FrameDestroy);
1011   }
1012 
1013   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
1014     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1015         .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
1016         .setMIFlag(MachineInstr::FrameDestroy);
1017   }
1018 
1019   if (FuncInfo->SGPRForBPSaveRestoreCopy) {
1020     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1021         .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
1022         .setMIFlag(MachineInstr::FrameDestroy);
1023   }
1024 
1025   if (FPSaveIndex) {
1026     const int FramePtrFI = *FPSaveIndex;
1027     assert(!MFI.isDeadObjectIndex(FramePtrFI));
1028     if (spilledToMemory(MF, FramePtrFI)) {
1029       initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1030 
1031       MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1032           MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1033       if (!TmpVGPR)
1034         report_fatal_error("failed to find free scratch register");
1035       buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1036                          TmpVGPR, FramePtrFI);
1037       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
1038           .addReg(TmpVGPR, RegState::Kill);
1039     } else {
1040       // Reload from VGPR spill.
1041       assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
1042       ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1043           FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
1044       assert(Spill.size() == 1);
1045       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
1046           .addReg(Spill[0].VGPR)
1047           .addImm(Spill[0].Lane);
1048     }
1049   }
1050 
1051   if (BPSaveIndex) {
1052     const int BasePtrFI = *BPSaveIndex;
1053     assert(!MFI.isDeadObjectIndex(BasePtrFI));
1054     if (spilledToMemory(MF, BasePtrFI)) {
1055       initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1056 
1057       MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1058           MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1059       if (!TmpVGPR)
1060         report_fatal_error("failed to find free scratch register");
1061       buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1062                          TmpVGPR, BasePtrFI);
1063       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
1064           .addReg(TmpVGPR, RegState::Kill);
1065     } else {
1066       // Reload from VGPR spill.
1067       assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
1068       ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1069           FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
1070       assert(Spill.size() == 1);
1071       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
1072           .addReg(Spill[0].VGPR)
1073           .addImm(Spill[0].Lane);
1074     }
1075   }
1076 
1077   Register ScratchExecCopy;
1078   for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
1079        FuncInfo->getSGPRSpillVGPRs()) {
1080     if (!Reg.FI)
1081       continue;
1082 
1083     if (!ScratchExecCopy)
1084       ScratchExecCopy =
1085           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1086 
1087     buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1088                        Reg.VGPR, *Reg.FI);
1089   }
1090 
1091   for (const auto &Reg : FuncInfo->WWMReservedRegs) {
1092     auto VGPR = Reg.first;
1093     auto FI = Reg.second;
1094     if (!FI)
1095       continue;
1096 
1097     if (!ScratchExecCopy)
1098       ScratchExecCopy =
1099           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1100 
1101     buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
1102                        *FI);
1103   }
1104 
1105   if (ScratchExecCopy) {
1106     // FIXME: Split block and make terminator.
1107     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1108     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1109     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1110         .addReg(ScratchExecCopy, RegState::Kill);
1111   }
1112 }
1113 
1114 #ifndef NDEBUG
1115 static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1116   const MachineFrameInfo &MFI = MF.getFrameInfo();
1117   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1118   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1119        I != E; ++I) {
1120     if (!MFI.isDeadObjectIndex(I) &&
1121         MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1122         (I != FuncInfo->FramePointerSaveIndex &&
1123          I != FuncInfo->BasePointerSaveIndex)) {
1124       return false;
1125     }
1126   }
1127 
1128   return true;
1129 }
1130 #endif
1131 
1132 StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1133                                                     int FI,
1134                                                     Register &FrameReg) const {
1135   const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1136 
1137   FrameReg = RI->getFrameRegister(MF);
1138   return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
1139 }
1140 
1141 void SIFrameLowering::processFunctionBeforeFrameFinalized(
1142   MachineFunction &MF,
1143   RegScavenger *RS) const {
1144   MachineFrameInfo &MFI = MF.getFrameInfo();
1145 
1146   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1147   const SIInstrInfo *TII = ST.getInstrInfo();
1148   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1149   MachineRegisterInfo &MRI = MF.getRegInfo();
1150   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1151 
1152   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1153                                && EnableSpillVGPRToAGPR;
1154 
1155   if (SpillVGPRToAGPR) {
1156     // To track the spill frame indices handled in this pass.
1157     BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1158 
1159     bool SeenDbgInstr = false;
1160 
1161     for (MachineBasicBlock &MBB : MF) {
1162       for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
1163         if (MI.isDebugInstr())
1164           SeenDbgInstr = true;
1165 
1166         if (TII->isVGPRSpill(MI)) {
1167           // Try to eliminate stack used by VGPR spills before frame
1168           // finalization.
1169           unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1170                                                      AMDGPU::OpName::vaddr);
1171           int FI = MI.getOperand(FIOp).getIndex();
1172           Register VReg =
1173             TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1174           if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1175                                                 TRI->isAGPR(MRI, VReg))) {
1176             // FIXME: change to enterBasicBlockEnd()
1177             RS->enterBasicBlock(MBB);
1178             TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1179             SpillFIs.set(FI);
1180             continue;
1181           }
1182         }
1183       }
1184     }
1185 
1186     for (MachineBasicBlock &MBB : MF) {
1187       for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1188         MBB.addLiveIn(Reg);
1189 
1190       for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1191         MBB.addLiveIn(Reg);
1192 
1193       MBB.sortUniqueLiveIns();
1194 
1195       if (!SpillFIs.empty() && SeenDbgInstr) {
1196         // FIXME: The dead frame indices are replaced with a null register from
1197         // the debug value instructions. We should instead, update it with the
1198         // correct register value. But not sure the register value alone is
1199         for (MachineInstr &MI : MBB) {
1200           if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1201               SpillFIs[MI.getOperand(0).getIndex()]) {
1202             MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1203           }
1204         }
1205       }
1206     }
1207   }
1208 
1209   FuncInfo->removeDeadFrameIndices(MFI);
1210   assert(allSGPRSpillsAreDead(MF) &&
1211          "SGPR spill should have been removed in SILowerSGPRSpills");
1212 
1213   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1214   // but currently hasNonSpillStackObjects is set only from source
1215   // allocas. Stack temps produced from legalization are not counted currently.
1216   if (!allStackObjectsAreDead(MFI)) {
1217     assert(RS && "RegScavenger required if spilling");
1218 
1219     // Add an emergency spill slot
1220     RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1221   }
1222 }
1223 
1224 // Only report VGPRs to generic code.
1225 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1226                                            BitVector &SavedVGPRs,
1227                                            RegScavenger *RS) const {
1228   TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1229   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1230   if (MFI->isEntryFunction())
1231     return;
1232 
1233   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1234   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1235   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1236 
1237   // Ignore the SGPRs the default implementation found.
1238   SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1239 
1240   // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1241   // In gfx908 there was do AGPR loads and stores and thus spilling also
1242   // require a temporary VGPR.
1243   if (!ST.hasGFX90AInsts())
1244     SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1245 
1246   // hasFP only knows about stack objects that already exist. We're now
1247   // determining the stack slots that will be created, so we have to predict
1248   // them. Stack objects force FP usage with calls.
1249   //
1250   // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1251   // don't want to report it here.
1252   //
1253   // FIXME: Is this really hasReservedCallFrame?
1254   const bool WillHaveFP =
1255       FrameInfo.hasCalls() &&
1256       (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1257 
1258   // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1259   // so don't allow the default insertion to handle them.
1260   for (auto SSpill : MFI->getSGPRSpillVGPRs())
1261     SavedVGPRs.reset(SSpill.VGPR);
1262 
1263   LivePhysRegs LiveRegs;
1264   LiveRegs.init(*TRI);
1265 
1266   if (WillHaveFP || hasFP(MF)) {
1267     assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&
1268            "Re-reserving spill slot for FP");
1269     getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
1270                                    MFI->FramePointerSaveIndex, true);
1271   }
1272 
1273   if (TRI->hasBasePointer(MF)) {
1274     if (MFI->SGPRForFPSaveRestoreCopy)
1275       LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1276 
1277     assert(!MFI->SGPRForBPSaveRestoreCopy &&
1278            !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
1279     getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
1280                                    MFI->BasePointerSaveIndex, false);
1281   }
1282 }
1283 
1284 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
1285                                                BitVector &SavedRegs,
1286                                                RegScavenger *RS) const {
1287   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1288   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1289   if (MFI->isEntryFunction())
1290     return;
1291 
1292   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1293   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1294 
1295   // The SP is specifically managed and we don't want extra spills of it.
1296   SavedRegs.reset(MFI->getStackPtrOffsetReg());
1297 
1298   const BitVector AllSavedRegs = SavedRegs;
1299   SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1300 
1301   // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
1302   const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
1303 
1304   // We have to anticipate introducing CSR VGPR spills or spill of caller
1305   // save VGPR reserved for SGPR spills as we now always create stack entry
1306   // for it, if we don't have any stack objects already, since we require
1307   // an FP if there is a call and stack.
1308   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1309   const bool WillHaveFP =
1310       FrameInfo.hasCalls() && (HaveAnyCSRVGPR || MFI->VGPRReservedForSGPRSpill);
1311 
1312   // FP will be specially managed like SP.
1313   if (WillHaveFP || hasFP(MF))
1314     SavedRegs.reset(MFI->getFrameOffsetReg());
1315 }
1316 
1317 bool SIFrameLowering::assignCalleeSavedSpillSlots(
1318     MachineFunction &MF, const TargetRegisterInfo *TRI,
1319     std::vector<CalleeSavedInfo> &CSI) const {
1320   if (CSI.empty())
1321     return true; // Early exit if no callee saved registers are modified!
1322 
1323   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1324   if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1325       !FuncInfo->SGPRForBPSaveRestoreCopy)
1326     return false;
1327 
1328   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1329   const SIRegisterInfo *RI = ST.getRegisterInfo();
1330   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1331   Register BasePtrReg = RI->getBaseRegister();
1332   unsigned NumModifiedRegs = 0;
1333 
1334   if (FuncInfo->SGPRForFPSaveRestoreCopy)
1335     NumModifiedRegs++;
1336   if (FuncInfo->SGPRForBPSaveRestoreCopy)
1337     NumModifiedRegs++;
1338 
1339   for (auto &CS : CSI) {
1340     if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1341       CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1342       if (--NumModifiedRegs)
1343         break;
1344     } else if (CS.getReg() == BasePtrReg &&
1345                FuncInfo->SGPRForBPSaveRestoreCopy) {
1346       CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1347       if (--NumModifiedRegs)
1348         break;
1349     }
1350   }
1351 
1352   return false;
1353 }
1354 
1355 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1356   MachineFunction &MF,
1357   MachineBasicBlock &MBB,
1358   MachineBasicBlock::iterator I) const {
1359   int64_t Amount = I->getOperand(0).getImm();
1360   if (Amount == 0)
1361     return MBB.erase(I);
1362 
1363   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1364   const SIInstrInfo *TII = ST.getInstrInfo();
1365   const DebugLoc &DL = I->getDebugLoc();
1366   unsigned Opc = I->getOpcode();
1367   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1368   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1369 
1370   if (!hasReservedCallFrame(MF)) {
1371     Amount = alignTo(Amount, getStackAlign());
1372     assert(isUInt<32>(Amount) && "exceeded stack address space size");
1373     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1374     Register SPReg = MFI->getStackPtrOffsetReg();
1375 
1376     Amount *= getScratchScaleFactor(ST);
1377     if (IsDestroy)
1378       Amount = -Amount;
1379     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
1380         .addReg(SPReg)
1381         .addImm(Amount);
1382   } else if (CalleePopAmount != 0) {
1383     llvm_unreachable("is this used?");
1384   }
1385 
1386   return MBB.erase(I);
1387 }
1388 
1389 /// Returns true if the frame will require a reference to the stack pointer.
1390 ///
1391 /// This is the set of conditions common to setting up the stack pointer in a
1392 /// kernel, and for using a frame pointer in a callable function.
1393 ///
1394 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1395 /// references SP.
1396 static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1397   return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1398 }
1399 
1400 // The FP for kernels is always known 0, so we never really need to setup an
1401 // explicit register for it. However, DisableFramePointerElim will force us to
1402 // use a register for it.
1403 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1404   const MachineFrameInfo &MFI = MF.getFrameInfo();
1405 
1406   // For entry functions we can use an immediate offset in most cases, so the
1407   // presence of calls doesn't imply we need a distinct frame pointer.
1408   if (MFI.hasCalls() &&
1409       !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1410     // All offsets are unsigned, so need to be addressed in the same direction
1411     // as stack growth.
1412 
1413     // FIXME: This function is pretty broken, since it can be called before the
1414     // frame layout is determined or CSR spills are inserted.
1415     return MFI.getStackSize() != 0;
1416   }
1417 
1418   return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1419          MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1420              MF) ||
1421          MF.getTarget().Options.DisableFramePointerElim(MF);
1422 }
1423 
1424 // This is essentially a reduced version of hasFP for entry functions. Since the
1425 // stack pointer is known 0 on entry to kernels, we never really need an FP
1426 // register. We may need to initialize the stack pointer depending on the frame
1427 // properties, which logically overlaps many of the cases where an ordinary
1428 // function would require an FP.
1429 bool SIFrameLowering::requiresStackPointerReference(
1430     const MachineFunction &MF) const {
1431   // Callable functions always require a stack pointer reference.
1432   assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
1433          "only expected to call this for entry points");
1434 
1435   const MachineFrameInfo &MFI = MF.getFrameInfo();
1436 
1437   // Entry points ordinarily don't need to initialize SP. We have to set it up
1438   // for callees if there are any. Also note tail calls are impossible/don't
1439   // make any sense for kernels.
1440   if (MFI.hasCalls())
1441     return true;
1442 
1443   // We still need to initialize the SP if we're doing anything weird that
1444   // references the SP, like variable sized stack objects.
1445   return frameTriviallyRequiresSP(MFI);
1446 }
1447