1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUArgumentUsageInfo.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIRegisterInfo.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Function.h"
23 #include <cassert>
24 #include <vector>
25 
26 #define MAX_LANES 64
27 
28 using namespace llvm;
29 
30 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
31   : AMDGPUMachineFunction(MF),
32     PrivateSegmentBuffer(false),
33     DispatchPtr(false),
34     QueuePtr(false),
35     KernargSegmentPtr(false),
36     DispatchID(false),
37     FlatScratchInit(false),
38     GridWorkgroupCountX(false),
39     GridWorkgroupCountY(false),
40     GridWorkgroupCountZ(false),
41     WorkGroupIDX(false),
42     WorkGroupIDY(false),
43     WorkGroupIDZ(false),
44     WorkGroupInfo(false),
45     PrivateSegmentWaveByteOffset(false),
46     WorkItemIDX(false),
47     WorkItemIDY(false),
48     WorkItemIDZ(false),
49     ImplicitBufferPtr(false),
50     ImplicitArgPtr(false),
51     GITPtrHigh(0xffffffff),
52     HighBitsOf32BitAddress(0) {
53   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
54   const Function &F = MF.getFunction();
55   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
56   WavesPerEU = ST.getWavesPerEU(F);
57 
58   if (!isEntryFunction()) {
59     // Non-entry functions have no special inputs for now, other registers
60     // required for scratch access.
61     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
62     ScratchWaveOffsetReg = AMDGPU::SGPR4;
63     FrameOffsetReg = AMDGPU::SGPR5;
64     StackPtrOffsetReg = AMDGPU::SGPR32;
65 
66     ArgInfo.PrivateSegmentBuffer =
67       ArgDescriptor::createRegister(ScratchRSrcReg);
68     ArgInfo.PrivateSegmentWaveByteOffset =
69       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
70 
71     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
72       ImplicitArgPtr = true;
73   } else {
74     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
75       KernargSegmentPtr = true;
76   }
77 
78   CallingConv::ID CC = F.getCallingConv();
79   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
80     if (!F.arg_empty())
81       KernargSegmentPtr = true;
82     WorkGroupIDX = true;
83     WorkItemIDX = true;
84   } else if (CC == CallingConv::AMDGPU_PS) {
85     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
86   }
87 
88   if (ST.debuggerEmitPrologue()) {
89     // Enable everything.
90     WorkGroupIDX = true;
91     WorkGroupIDY = true;
92     WorkGroupIDZ = true;
93     WorkItemIDX = true;
94     WorkItemIDY = true;
95     WorkItemIDZ = true;
96   } else {
97     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
98       WorkGroupIDX = true;
99 
100     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
101       WorkGroupIDY = true;
102 
103     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
104       WorkGroupIDZ = true;
105 
106     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
107       WorkItemIDX = true;
108 
109     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
110       WorkItemIDY = true;
111 
112     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
113       WorkItemIDZ = true;
114   }
115 
116   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
117   bool MaySpill = ST.isVGPRSpillingEnabled(F);
118   bool HasStackObjects = FrameInfo.hasStackObjects();
119 
120   if (isEntryFunction()) {
121     // X, XY, and XYZ are the only supported combinations, so make sure Y is
122     // enabled if Z is.
123     if (WorkItemIDZ)
124       WorkItemIDY = true;
125 
126     if (HasStackObjects || MaySpill) {
127       PrivateSegmentWaveByteOffset = true;
128 
129     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
130     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
131         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
132       ArgInfo.PrivateSegmentWaveByteOffset
133         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
134     }
135   }
136 
137   bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
138   if (IsCOV2) {
139     if (HasStackObjects || MaySpill)
140       PrivateSegmentBuffer = true;
141 
142     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
143       DispatchPtr = true;
144 
145     if (F.hasFnAttribute("amdgpu-queue-ptr"))
146       QueuePtr = true;
147 
148     if (F.hasFnAttribute("amdgpu-dispatch-id"))
149       DispatchID = true;
150   } else if (ST.isMesaGfxShader(MF)) {
151     if (HasStackObjects || MaySpill)
152       ImplicitBufferPtr = true;
153   }
154 
155   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
156     KernargSegmentPtr = true;
157 
158   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
159     // TODO: This could be refined a lot. The attribute is a poor way of
160     // detecting calls that may require it before argument lowering.
161     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
162       FlatScratchInit = true;
163   }
164 
165   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
166   StringRef S = A.getValueAsString();
167   if (!S.empty())
168     S.consumeInteger(0, GITPtrHigh);
169 
170   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
171   S = A.getValueAsString();
172   if (!S.empty())
173     S.consumeInteger(0, HighBitsOf32BitAddress);
174 }
175 
176 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
177   const SIRegisterInfo &TRI) {
178   ArgInfo.PrivateSegmentBuffer =
179     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
180     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
181   NumUserSGPRs += 4;
182   return ArgInfo.PrivateSegmentBuffer.getRegister();
183 }
184 
185 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
186   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
187     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
188   NumUserSGPRs += 2;
189   return ArgInfo.DispatchPtr.getRegister();
190 }
191 
192 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
193   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
194     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
195   NumUserSGPRs += 2;
196   return ArgInfo.QueuePtr.getRegister();
197 }
198 
199 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
200   ArgInfo.KernargSegmentPtr
201     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
202     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
203   NumUserSGPRs += 2;
204   return ArgInfo.KernargSegmentPtr.getRegister();
205 }
206 
207 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
208   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
209     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
210   NumUserSGPRs += 2;
211   return ArgInfo.DispatchID.getRegister();
212 }
213 
214 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
215   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
216     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
217   NumUserSGPRs += 2;
218   return ArgInfo.FlatScratchInit.getRegister();
219 }
220 
221 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
222   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
223     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
224   NumUserSGPRs += 2;
225   return ArgInfo.ImplicitBufferPtr.getRegister();
226 }
227 
228 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
229   for (unsigned I = 0; CSRegs[I]; ++I) {
230     if (CSRegs[I] == Reg)
231       return true;
232   }
233 
234   return false;
235 }
236 
237 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
238 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
239                                                     int FI) {
240   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
241 
242   // This has already been allocated.
243   if (!SpillLanes.empty())
244     return true;
245 
246   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
247   const SIRegisterInfo *TRI = ST.getRegisterInfo();
248   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
249   MachineRegisterInfo &MRI = MF.getRegInfo();
250   unsigned WaveSize = ST.getWavefrontSize();
251 
252   unsigned Size = FrameInfo.getObjectSize(FI);
253   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
254   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
255 
256   int NumLanes = Size / 4;
257 
258   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
259 
260   // Make sure to handle the case where a wide SGPR spill may span between two
261   // VGPRs.
262   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
263     unsigned LaneVGPR;
264     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
265 
266     if (VGPRIndex == 0) {
267       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
268       if (LaneVGPR == AMDGPU::NoRegister) {
269         // We have no VGPRs left for spilling SGPRs. Reset because we will not
270         // partially spill the SGPR to VGPRs.
271         SGPRToVGPRSpills.erase(FI);
272         NumVGPRSpillLanes -= I;
273         return false;
274       }
275 
276       Optional<int> CSRSpillFI;
277       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
278           isCalleeSavedReg(CSRegs, LaneVGPR)) {
279         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
280       }
281 
282       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
283 
284       // Add this register as live-in to all blocks to avoid machine verifer
285       // complaining about use of an undefined physical register.
286       for (MachineBasicBlock &BB : MF)
287         BB.addLiveIn(LaneVGPR);
288     } else {
289       LaneVGPR = SpillVGPRs.back().VGPR;
290     }
291 
292     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
293   }
294 
295   return true;
296 }
297 
298 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
299   for (auto &R : SGPRToVGPRSpills)
300     MFI.RemoveStackObject(R.first);
301 }
302 
303 
304 /// \returns VGPR used for \p Dim' work item ID.
305 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
306   switch (Dim) {
307   case 0:
308     assert(hasWorkItemIDX());
309     return AMDGPU::VGPR0;
310   case 1:
311     assert(hasWorkItemIDY());
312     return AMDGPU::VGPR1;
313   case 2:
314     assert(hasWorkItemIDZ());
315     return AMDGPU::VGPR2;
316   }
317   llvm_unreachable("unexpected dimension");
318 }
319 
320 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
321   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
322   return AMDGPU::SGPR0 + NumUserSGPRs;
323 }
324 
325 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
326   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
327 }
328