1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
13 #include "llvm/CodeGen/MachineFrameInfo.h"
14 #include "llvm/CodeGen/MachineInstrBuilder.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
18 
19 #define MAX_LANES 64
20 
21 using namespace llvm;
22 
23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
24   : AMDGPUMachineFunction(MF),
25     TIDReg(AMDGPU::NoRegister),
26     ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG),
27     ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
28     FrameOffsetReg(AMDGPU::FP_REG),
29     StackPtrOffsetReg(AMDGPU::SP_REG),
30     PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
31     DispatchPtrUserSGPR(AMDGPU::NoRegister),
32     QueuePtrUserSGPR(AMDGPU::NoRegister),
33     KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
34     DispatchIDUserSGPR(AMDGPU::NoRegister),
35     FlatScratchInitUserSGPR(AMDGPU::NoRegister),
36     PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
37     GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
38     GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
39     GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
40     WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
41     WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
42     WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
43     WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
44     PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
45     WorkItemIDXVGPR(AMDGPU::NoRegister),
46     WorkItemIDYVGPR(AMDGPU::NoRegister),
47     WorkItemIDZVGPR(AMDGPU::NoRegister),
48     PSInputAddr(0),
49     PSInputEnable(0),
50     ReturnsVoid(true),
51     FlatWorkGroupSizes(0, 0),
52     WavesPerEU(0, 0),
53     DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
54     DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
55     LDSWaveSpillSize(0),
56     NumUserSGPRs(0),
57     NumSystemSGPRs(0),
58     HasSpilledSGPRs(false),
59     HasSpilledVGPRs(false),
60     HasNonSpillStackObjects(false),
61     NumSpilledSGPRs(0),
62     NumSpilledVGPRs(0),
63     PrivateSegmentBuffer(false),
64     DispatchPtr(false),
65     QueuePtr(false),
66     KernargSegmentPtr(false),
67     DispatchID(false),
68     FlatScratchInit(false),
69     GridWorkgroupCountX(false),
70     GridWorkgroupCountY(false),
71     GridWorkgroupCountZ(false),
72     WorkGroupIDX(false),
73     WorkGroupIDY(false),
74     WorkGroupIDZ(false),
75     WorkGroupInfo(false),
76     PrivateSegmentWaveByteOffset(false),
77     WorkItemIDX(false),
78     WorkItemIDY(false),
79     WorkItemIDZ(false),
80     ImplicitBufferPtr(false) {
81   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
82   const Function *F = MF.getFunction();
83   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
84   WavesPerEU = ST.getWavesPerEU(*F);
85 
86   if (!isEntryFunction()) {
87     // Non-entry functions have no special inputs for now, other registers
88     // required for scratch access.
89     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
90     ScratchWaveOffsetReg = AMDGPU::SGPR4;
91     FrameOffsetReg = AMDGPU::SGPR5;
92     StackPtrOffsetReg = AMDGPU::SGPR32;
93 
94     // FIXME: Not really a system SGPR.
95     PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
96     if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
97       ImplicitArgPtr = true;
98   } else {
99     if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
100       KernargSegmentPtr = true;
101   }
102 
103   CallingConv::ID CC = F->getCallingConv();
104   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
105     if (!F->arg_empty())
106       KernargSegmentPtr = true;
107     WorkGroupIDX = true;
108     WorkItemIDX = true;
109   } else if (CC == CallingConv::AMDGPU_PS) {
110     PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
111   }
112 
113   if (ST.debuggerEmitPrologue()) {
114     // Enable everything.
115     WorkGroupIDX = true;
116     WorkGroupIDY = true;
117     WorkGroupIDZ = true;
118     WorkItemIDX = true;
119     WorkItemIDY = true;
120     WorkItemIDZ = true;
121   } else {
122     if (F->hasFnAttribute("amdgpu-work-group-id-x"))
123       WorkGroupIDX = true;
124 
125     if (F->hasFnAttribute("amdgpu-work-group-id-y"))
126       WorkGroupIDY = true;
127 
128     if (F->hasFnAttribute("amdgpu-work-group-id-z"))
129       WorkGroupIDZ = true;
130 
131     if (F->hasFnAttribute("amdgpu-work-item-id-x"))
132       WorkItemIDX = true;
133 
134     if (F->hasFnAttribute("amdgpu-work-item-id-y"))
135       WorkItemIDY = true;
136 
137     if (F->hasFnAttribute("amdgpu-work-item-id-z"))
138       WorkItemIDZ = true;
139   }
140 
141   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
142   bool MaySpill = ST.isVGPRSpillingEnabled(*F);
143   bool HasStackObjects = FrameInfo.hasStackObjects();
144 
145   if (isEntryFunction()) {
146     // X, XY, and XYZ are the only supported combinations, so make sure Y is
147     // enabled if Z is.
148     if (WorkItemIDZ)
149       WorkItemIDY = true;
150 
151     if (HasStackObjects || MaySpill) {
152       PrivateSegmentWaveByteOffset = true;
153 
154       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
155       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
156           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
157         PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
158     }
159   }
160 
161   bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
162   if (IsCOV2) {
163     if (HasStackObjects || MaySpill)
164       PrivateSegmentBuffer = true;
165 
166     if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
167       DispatchPtr = true;
168 
169     if (F->hasFnAttribute("amdgpu-queue-ptr"))
170       QueuePtr = true;
171 
172     if (F->hasFnAttribute("amdgpu-dispatch-id"))
173       DispatchID = true;
174   } else if (ST.isMesaGfxShader(MF)) {
175     if (HasStackObjects || MaySpill)
176       ImplicitBufferPtr = true;
177   }
178 
179   if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
180     KernargSegmentPtr = true;
181 
182   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
183     // TODO: This could be refined a lot. The attribute is a poor way of
184     // detecting calls that may require it before argument lowering.
185     if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
186       FlatScratchInit = true;
187   }
188 }
189 
190 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
191   const SIRegisterInfo &TRI) {
192   PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
193     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
194   NumUserSGPRs += 4;
195   return PrivateSegmentBufferUserSGPR;
196 }
197 
198 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
199   DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
200     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
201   NumUserSGPRs += 2;
202   return DispatchPtrUserSGPR;
203 }
204 
205 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
206   QueuePtrUserSGPR = TRI.getMatchingSuperReg(
207     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
208   NumUserSGPRs += 2;
209   return QueuePtrUserSGPR;
210 }
211 
212 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
213   KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
214     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
215   NumUserSGPRs += 2;
216   return KernargSegmentPtrUserSGPR;
217 }
218 
219 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
220   DispatchIDUserSGPR = TRI.getMatchingSuperReg(
221     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
222   NumUserSGPRs += 2;
223   return DispatchIDUserSGPR;
224 }
225 
226 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
227   FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
228     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
229   NumUserSGPRs += 2;
230   return FlatScratchInitUserSGPR;
231 }
232 
233 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
234   ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg(
235     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
236   NumUserSGPRs += 2;
237   return ImplicitBufferPtrUserSGPR;
238 }
239 
240 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
241 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
242                                                     int FI) {
243   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
244 
245   // This has already been allocated.
246   if (!SpillLanes.empty())
247     return true;
248 
249   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
250   const SIRegisterInfo *TRI = ST.getRegisterInfo();
251   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
252   MachineRegisterInfo &MRI = MF.getRegInfo();
253   unsigned WaveSize = ST.getWavefrontSize();
254 
255   unsigned Size = FrameInfo.getObjectSize(FI);
256   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
257   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
258 
259   int NumLanes = Size / 4;
260 
261   // Make sure to handle the case where a wide SGPR spill may span between two
262   // VGPRs.
263   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
264     unsigned LaneVGPR;
265     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
266 
267     if (VGPRIndex == 0) {
268       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
269       if (LaneVGPR == AMDGPU::NoRegister) {
270         // We have no VGPRs left for spilling SGPRs. Reset because we won't
271         // partially spill the SGPR to VGPRs.
272         SGPRToVGPRSpills.erase(FI);
273         NumVGPRSpillLanes -= I;
274         return false;
275       }
276 
277       SpillVGPRs.push_back(LaneVGPR);
278 
279       // Add this register as live-in to all blocks to avoid machine verifer
280       // complaining about use of an undefined physical register.
281       for (MachineBasicBlock &BB : MF)
282         BB.addLiveIn(LaneVGPR);
283     } else {
284       LaneVGPR = SpillVGPRs.back();
285     }
286 
287     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
288   }
289 
290   return true;
291 }
292 
293 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
294   for (auto &R : SGPRToVGPRSpills)
295     MFI.RemoveStackObject(R.first);
296 }
297