1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUArgumentUsageInfo.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
22 #include <cassert>
23 #include <vector>
24 
25 #define MAX_LANES 64
26 
27 using namespace llvm;
28 
29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30   : AMDGPUMachineFunction(MF),
31     PrivateSegmentBuffer(false),
32     DispatchPtr(false),
33     QueuePtr(false),
34     KernargSegmentPtr(false),
35     DispatchID(false),
36     FlatScratchInit(false),
37     GridWorkgroupCountX(false),
38     GridWorkgroupCountY(false),
39     GridWorkgroupCountZ(false),
40     WorkGroupIDX(false),
41     WorkGroupIDY(false),
42     WorkGroupIDZ(false),
43     WorkGroupInfo(false),
44     PrivateSegmentWaveByteOffset(false),
45     WorkItemIDX(false),
46     WorkItemIDY(false),
47     WorkItemIDZ(false),
48     ImplicitBufferPtr(false),
49     ImplicitArgPtr(false) {
50   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
51   const Function *F = MF.getFunction();
52   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
53   WavesPerEU = ST.getWavesPerEU(*F);
54 
55   if (!isEntryFunction()) {
56     // Non-entry functions have no special inputs for now, other registers
57     // required for scratch access.
58     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
59     ScratchWaveOffsetReg = AMDGPU::SGPR4;
60     FrameOffsetReg = AMDGPU::SGPR5;
61     StackPtrOffsetReg = AMDGPU::SGPR32;
62 
63     ArgInfo.PrivateSegmentBuffer =
64       ArgDescriptor::createRegister(ScratchRSrcReg);
65     ArgInfo.PrivateSegmentWaveByteOffset =
66       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
67 
68     if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
69       ImplicitArgPtr = true;
70   } else {
71     if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
72       KernargSegmentPtr = true;
73   }
74 
75   CallingConv::ID CC = F->getCallingConv();
76   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
77     if (!F->arg_empty())
78       KernargSegmentPtr = true;
79     WorkGroupIDX = true;
80     WorkItemIDX = true;
81   } else if (CC == CallingConv::AMDGPU_PS) {
82     PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
83   }
84 
85   if (ST.debuggerEmitPrologue()) {
86     // Enable everything.
87     WorkGroupIDX = true;
88     WorkGroupIDY = true;
89     WorkGroupIDZ = true;
90     WorkItemIDX = true;
91     WorkItemIDY = true;
92     WorkItemIDZ = true;
93   } else {
94     if (F->hasFnAttribute("amdgpu-work-group-id-x"))
95       WorkGroupIDX = true;
96 
97     if (F->hasFnAttribute("amdgpu-work-group-id-y"))
98       WorkGroupIDY = true;
99 
100     if (F->hasFnAttribute("amdgpu-work-group-id-z"))
101       WorkGroupIDZ = true;
102 
103     if (F->hasFnAttribute("amdgpu-work-item-id-x"))
104       WorkItemIDX = true;
105 
106     if (F->hasFnAttribute("amdgpu-work-item-id-y"))
107       WorkItemIDY = true;
108 
109     if (F->hasFnAttribute("amdgpu-work-item-id-z"))
110       WorkItemIDZ = true;
111   }
112 
113   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
114   bool MaySpill = ST.isVGPRSpillingEnabled(*F);
115   bool HasStackObjects = FrameInfo.hasStackObjects();
116 
117   if (isEntryFunction()) {
118     // X, XY, and XYZ are the only supported combinations, so make sure Y is
119     // enabled if Z is.
120     if (WorkItemIDZ)
121       WorkItemIDY = true;
122 
123     if (HasStackObjects || MaySpill) {
124       PrivateSegmentWaveByteOffset = true;
125 
126     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
127     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
128         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
129       ArgInfo.PrivateSegmentWaveByteOffset
130         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
131     }
132   }
133 
134   bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
135   if (IsCOV2) {
136     if (HasStackObjects || MaySpill)
137       PrivateSegmentBuffer = true;
138 
139     if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
140       DispatchPtr = true;
141 
142     if (F->hasFnAttribute("amdgpu-queue-ptr"))
143       QueuePtr = true;
144 
145     if (F->hasFnAttribute("amdgpu-dispatch-id"))
146       DispatchID = true;
147   } else if (ST.isMesaGfxShader(MF)) {
148     if (HasStackObjects || MaySpill)
149       ImplicitBufferPtr = true;
150   }
151 
152   if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
153     KernargSegmentPtr = true;
154 
155   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
156     // TODO: This could be refined a lot. The attribute is a poor way of
157     // detecting calls that may require it before argument lowering.
158     if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
159       FlatScratchInit = true;
160   }
161 }
162 
163 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
164   const SIRegisterInfo &TRI) {
165   ArgInfo.PrivateSegmentBuffer =
166     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
167     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
168   NumUserSGPRs += 4;
169   return ArgInfo.PrivateSegmentBuffer.getRegister();
170 }
171 
172 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
173   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
174     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
175   NumUserSGPRs += 2;
176   return ArgInfo.DispatchPtr.getRegister();
177 }
178 
179 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
180   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
181     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
182   NumUserSGPRs += 2;
183   return ArgInfo.QueuePtr.getRegister();
184 }
185 
186 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
187   ArgInfo.KernargSegmentPtr
188     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
189     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
190   NumUserSGPRs += 2;
191   return ArgInfo.KernargSegmentPtr.getRegister();
192 }
193 
194 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
195   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
196     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
197   NumUserSGPRs += 2;
198   return ArgInfo.DispatchID.getRegister();
199 }
200 
201 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
202   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
203     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
204   NumUserSGPRs += 2;
205   return ArgInfo.FlatScratchInit.getRegister();
206 }
207 
208 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
209   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
210     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
211   NumUserSGPRs += 2;
212   return ArgInfo.ImplicitBufferPtr.getRegister();
213 }
214 
215 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
216   for (unsigned I = 0; CSRegs[I]; ++I) {
217     if (CSRegs[I] == Reg)
218       return true;
219   }
220 
221   return false;
222 }
223 
224 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
225 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
226                                                     int FI) {
227   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
228 
229   // This has already been allocated.
230   if (!SpillLanes.empty())
231     return true;
232 
233   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
234   const SIRegisterInfo *TRI = ST.getRegisterInfo();
235   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
236   MachineRegisterInfo &MRI = MF.getRegInfo();
237   unsigned WaveSize = ST.getWavefrontSize();
238 
239   unsigned Size = FrameInfo.getObjectSize(FI);
240   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
241   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
242 
243   int NumLanes = Size / 4;
244 
245   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
246 
247   // Make sure to handle the case where a wide SGPR spill may span between two
248   // VGPRs.
249   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
250     unsigned LaneVGPR;
251     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
252 
253     if (VGPRIndex == 0) {
254       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
255       if (LaneVGPR == AMDGPU::NoRegister) {
256         // We have no VGPRs left for spilling SGPRs. Reset because we won't
257         // partially spill the SGPR to VGPRs.
258         SGPRToVGPRSpills.erase(FI);
259         NumVGPRSpillLanes -= I;
260         return false;
261       }
262 
263       Optional<int> CSRSpillFI;
264       if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
265         // TODO: Should this be a CreateSpillStackObject? This is technically a
266         // weird CSR spill.
267         CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
268       }
269 
270       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
271 
272       // Add this register as live-in to all blocks to avoid machine verifer
273       // complaining about use of an undefined physical register.
274       for (MachineBasicBlock &BB : MF)
275         BB.addLiveIn(LaneVGPR);
276     } else {
277       LaneVGPR = SpillVGPRs.back().VGPR;
278     }
279 
280     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
281   }
282 
283   return true;
284 }
285 
286 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
287   for (auto &R : SGPRToVGPRSpills)
288     MFI.RemoveStackObject(R.first);
289 }
290