1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUArgumentUsageInfo.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
22 #include <cassert>
23 #include <vector>
24 
25 #define MAX_LANES 64
26 
27 using namespace llvm;
28 
29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30   : AMDGPUMachineFunction(MF),
31     BufferPSV(*(MF.getSubtarget().getInstrInfo())),
32     ImagePSV(*(MF.getSubtarget().getInstrInfo())),
33     PrivateSegmentBuffer(false),
34     DispatchPtr(false),
35     QueuePtr(false),
36     KernargSegmentPtr(false),
37     DispatchID(false),
38     FlatScratchInit(false),
39     GridWorkgroupCountX(false),
40     GridWorkgroupCountY(false),
41     GridWorkgroupCountZ(false),
42     WorkGroupIDX(false),
43     WorkGroupIDY(false),
44     WorkGroupIDZ(false),
45     WorkGroupInfo(false),
46     PrivateSegmentWaveByteOffset(false),
47     WorkItemIDX(false),
48     WorkItemIDY(false),
49     WorkItemIDZ(false),
50     ImplicitBufferPtr(false),
51     ImplicitArgPtr(false) {
52   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
53   const Function *F = MF.getFunction();
54   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
55   WavesPerEU = ST.getWavesPerEU(*F);
56 
57   if (!isEntryFunction()) {
58     // Non-entry functions have no special inputs for now, other registers
59     // required for scratch access.
60     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
61     ScratchWaveOffsetReg = AMDGPU::SGPR4;
62     FrameOffsetReg = AMDGPU::SGPR5;
63     StackPtrOffsetReg = AMDGPU::SGPR32;
64 
65     ArgInfo.PrivateSegmentBuffer =
66       ArgDescriptor::createRegister(ScratchRSrcReg);
67     ArgInfo.PrivateSegmentWaveByteOffset =
68       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
69 
70     if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
71       ImplicitArgPtr = true;
72   } else {
73     if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
74       KernargSegmentPtr = true;
75   }
76 
77   CallingConv::ID CC = F->getCallingConv();
78   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
79     if (!F->arg_empty())
80       KernargSegmentPtr = true;
81     WorkGroupIDX = true;
82     WorkItemIDX = true;
83   } else if (CC == CallingConv::AMDGPU_PS) {
84     PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
85   }
86 
87   if (ST.debuggerEmitPrologue()) {
88     // Enable everything.
89     WorkGroupIDX = true;
90     WorkGroupIDY = true;
91     WorkGroupIDZ = true;
92     WorkItemIDX = true;
93     WorkItemIDY = true;
94     WorkItemIDZ = true;
95   } else {
96     if (F->hasFnAttribute("amdgpu-work-group-id-x"))
97       WorkGroupIDX = true;
98 
99     if (F->hasFnAttribute("amdgpu-work-group-id-y"))
100       WorkGroupIDY = true;
101 
102     if (F->hasFnAttribute("amdgpu-work-group-id-z"))
103       WorkGroupIDZ = true;
104 
105     if (F->hasFnAttribute("amdgpu-work-item-id-x"))
106       WorkItemIDX = true;
107 
108     if (F->hasFnAttribute("amdgpu-work-item-id-y"))
109       WorkItemIDY = true;
110 
111     if (F->hasFnAttribute("amdgpu-work-item-id-z"))
112       WorkItemIDZ = true;
113   }
114 
115   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
116   bool MaySpill = ST.isVGPRSpillingEnabled(*F);
117   bool HasStackObjects = FrameInfo.hasStackObjects();
118 
119   if (isEntryFunction()) {
120     // X, XY, and XYZ are the only supported combinations, so make sure Y is
121     // enabled if Z is.
122     if (WorkItemIDZ)
123       WorkItemIDY = true;
124 
125     if (HasStackObjects || MaySpill) {
126       PrivateSegmentWaveByteOffset = true;
127 
128     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
129     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
130         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
131       ArgInfo.PrivateSegmentWaveByteOffset
132         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
133     }
134   }
135 
136   bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
137   if (IsCOV2) {
138     if (HasStackObjects || MaySpill)
139       PrivateSegmentBuffer = true;
140 
141     if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
142       DispatchPtr = true;
143 
144     if (F->hasFnAttribute("amdgpu-queue-ptr"))
145       QueuePtr = true;
146 
147     if (F->hasFnAttribute("amdgpu-dispatch-id"))
148       DispatchID = true;
149   } else if (ST.isMesaGfxShader(MF)) {
150     if (HasStackObjects || MaySpill)
151       ImplicitBufferPtr = true;
152   }
153 
154   if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
155     KernargSegmentPtr = true;
156 
157   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
158     // TODO: This could be refined a lot. The attribute is a poor way of
159     // detecting calls that may require it before argument lowering.
160     if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
161       FlatScratchInit = true;
162   }
163 }
164 
165 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
166   const SIRegisterInfo &TRI) {
167   ArgInfo.PrivateSegmentBuffer =
168     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
169     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
170   NumUserSGPRs += 4;
171   return ArgInfo.PrivateSegmentBuffer.getRegister();
172 }
173 
174 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
175   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
176     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
177   NumUserSGPRs += 2;
178   return ArgInfo.DispatchPtr.getRegister();
179 }
180 
181 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
182   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
183     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
184   NumUserSGPRs += 2;
185   return ArgInfo.QueuePtr.getRegister();
186 }
187 
188 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
189   ArgInfo.KernargSegmentPtr
190     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
191     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
192   NumUserSGPRs += 2;
193   return ArgInfo.KernargSegmentPtr.getRegister();
194 }
195 
196 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
197   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
198     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
199   NumUserSGPRs += 2;
200   return ArgInfo.DispatchID.getRegister();
201 }
202 
203 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
204   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
205     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
206   NumUserSGPRs += 2;
207   return ArgInfo.FlatScratchInit.getRegister();
208 }
209 
210 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
211   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
212     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
213   NumUserSGPRs += 2;
214   return ArgInfo.ImplicitBufferPtr.getRegister();
215 }
216 
217 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
218   for (unsigned I = 0; CSRegs[I]; ++I) {
219     if (CSRegs[I] == Reg)
220       return true;
221   }
222 
223   return false;
224 }
225 
226 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
227 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
228                                                     int FI) {
229   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
230 
231   // This has already been allocated.
232   if (!SpillLanes.empty())
233     return true;
234 
235   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
236   const SIRegisterInfo *TRI = ST.getRegisterInfo();
237   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
238   MachineRegisterInfo &MRI = MF.getRegInfo();
239   unsigned WaveSize = ST.getWavefrontSize();
240 
241   unsigned Size = FrameInfo.getObjectSize(FI);
242   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
243   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
244 
245   int NumLanes = Size / 4;
246 
247   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
248 
249   // Make sure to handle the case where a wide SGPR spill may span between two
250   // VGPRs.
251   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
252     unsigned LaneVGPR;
253     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
254 
255     if (VGPRIndex == 0) {
256       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
257       if (LaneVGPR == AMDGPU::NoRegister) {
258         // We have no VGPRs left for spilling SGPRs. Reset because we will not
259         // partially spill the SGPR to VGPRs.
260         SGPRToVGPRSpills.erase(FI);
261         NumVGPRSpillLanes -= I;
262         return false;
263       }
264 
265       Optional<int> CSRSpillFI;
266       if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
267         // TODO: Should this be a CreateSpillStackObject? This is technically a
268         // weird CSR spill.
269         CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
270       }
271 
272       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
273 
274       // Add this register as live-in to all blocks to avoid machine verifer
275       // complaining about use of an undefined physical register.
276       for (MachineBasicBlock &BB : MF)
277         BB.addLiveIn(LaneVGPR);
278     } else {
279       LaneVGPR = SpillVGPRs.back().VGPR;
280     }
281 
282     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
283   }
284 
285   return true;
286 }
287 
288 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
289   for (auto &R : SGPRToVGPRSpills)
290     MFI.RemoveStackObject(R.first);
291 }
292