1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
13 #include "llvm/CodeGen/MachineFrameInfo.h"
14 #include "llvm/CodeGen/MachineInstrBuilder.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
18 
19 #define MAX_LANES 64
20 
21 using namespace llvm;
22 
23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
24   : AMDGPUMachineFunction(MF),
25     TIDReg(AMDGPU::NoRegister),
26     ScratchRSrcReg(AMDGPU::NoRegister),
27     ScratchWaveOffsetReg(AMDGPU::NoRegister),
28     PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
29     DispatchPtrUserSGPR(AMDGPU::NoRegister),
30     QueuePtrUserSGPR(AMDGPU::NoRegister),
31     KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
32     DispatchIDUserSGPR(AMDGPU::NoRegister),
33     FlatScratchInitUserSGPR(AMDGPU::NoRegister),
34     PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
35     GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
36     GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
37     GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
38     WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
39     WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
40     WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
41     WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
42     PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
43     PSInputAddr(0),
44     PSInputEnable(0),
45     ReturnsVoid(true),
46     FlatWorkGroupSizes(0, 0),
47     WavesPerEU(0, 0),
48     DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
49     DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
50     LDSWaveSpillSize(0),
51     NumUserSGPRs(0),
52     NumSystemSGPRs(0),
53     HasSpilledSGPRs(false),
54     HasSpilledVGPRs(false),
55     HasNonSpillStackObjects(false),
56     NumSpilledSGPRs(0),
57     NumSpilledVGPRs(0),
58     PrivateSegmentBuffer(false),
59     DispatchPtr(false),
60     QueuePtr(false),
61     KernargSegmentPtr(false),
62     DispatchID(false),
63     FlatScratchInit(false),
64     GridWorkgroupCountX(false),
65     GridWorkgroupCountY(false),
66     GridWorkgroupCountZ(false),
67     WorkGroupIDX(false),
68     WorkGroupIDY(false),
69     WorkGroupIDZ(false),
70     WorkGroupInfo(false),
71     PrivateSegmentWaveByteOffset(false),
72     WorkItemIDX(false),
73     WorkItemIDY(false),
74     WorkItemIDZ(false),
75     PrivateMemoryInputPtr(false) {
76   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
77   const Function *F = MF.getFunction();
78   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
79   WavesPerEU = ST.getWavesPerEU(*F);
80 
81   // Non-entry functions have no special inputs for now.
82   // TODO: Return early for non-entry CCs.
83 
84   CallingConv::ID CC = F->getCallingConv();
85   if (CC == CallingConv::AMDGPU_PS)
86     PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
87 
88   if (AMDGPU::isKernel(CC)) {
89     KernargSegmentPtr = true;
90     WorkGroupIDX = true;
91     WorkItemIDX = true;
92   }
93 
94   if (ST.debuggerEmitPrologue()) {
95     // Enable everything.
96     WorkGroupIDY = true;
97     WorkGroupIDZ = true;
98     WorkItemIDY = true;
99     WorkItemIDZ = true;
100   } else {
101     if (F->hasFnAttribute("amdgpu-work-group-id-y"))
102       WorkGroupIDY = true;
103 
104     if (F->hasFnAttribute("amdgpu-work-group-id-z"))
105       WorkGroupIDZ = true;
106 
107     if (F->hasFnAttribute("amdgpu-work-item-id-y"))
108       WorkItemIDY = true;
109 
110     if (F->hasFnAttribute("amdgpu-work-item-id-z"))
111       WorkItemIDZ = true;
112   }
113 
114   // X, XY, and XYZ are the only supported combinations, so make sure Y is
115   // enabled if Z is.
116   if (WorkItemIDZ)
117     WorkItemIDY = true;
118 
119   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
120   bool MaySpill = ST.isVGPRSpillingEnabled(*F);
121   bool HasStackObjects = FrameInfo.hasStackObjects();
122 
123   if (HasStackObjects || MaySpill)
124     PrivateSegmentWaveByteOffset = true;
125 
126   if (ST.isAmdCodeObjectV2(MF)) {
127     if (HasStackObjects || MaySpill)
128       PrivateSegmentBuffer = true;
129 
130     if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
131       DispatchPtr = true;
132 
133     if (F->hasFnAttribute("amdgpu-queue-ptr"))
134       QueuePtr = true;
135 
136     if (F->hasFnAttribute("amdgpu-dispatch-id"))
137       DispatchID = true;
138   } else if (ST.isMesaGfxShader(MF)) {
139     if (HasStackObjects || MaySpill)
140       PrivateMemoryInputPtr = true;
141   }
142 
143   // We don't need to worry about accessing spills with flat instructions.
144   // TODO: On VI where we must use flat for global, we should be able to omit
145   // this if it is never used for generic access.
146   if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS())
147     FlatScratchInit = true;
148 }
149 
150 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
151   const SIRegisterInfo &TRI) {
152   PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
153     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
154   NumUserSGPRs += 4;
155   return PrivateSegmentBufferUserSGPR;
156 }
157 
158 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
159   DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
160     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
161   NumUserSGPRs += 2;
162   return DispatchPtrUserSGPR;
163 }
164 
165 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
166   QueuePtrUserSGPR = TRI.getMatchingSuperReg(
167     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
168   NumUserSGPRs += 2;
169   return QueuePtrUserSGPR;
170 }
171 
172 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
173   KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
174     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
175   NumUserSGPRs += 2;
176   return KernargSegmentPtrUserSGPR;
177 }
178 
179 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
180   DispatchIDUserSGPR = TRI.getMatchingSuperReg(
181     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
182   NumUserSGPRs += 2;
183   return DispatchIDUserSGPR;
184 }
185 
186 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
187   FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
188     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
189   NumUserSGPRs += 2;
190   return FlatScratchInitUserSGPR;
191 }
192 
193 unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
194   PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
195     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
196   NumUserSGPRs += 2;
197   return PrivateMemoryPtrUserSGPR;
198 }
199 
200 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
201 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
202                                                     int FI) {
203   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
204 
205   // This has already been allocated.
206   if (!SpillLanes.empty())
207     return true;
208 
209   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
210   const SIRegisterInfo *TRI = ST.getRegisterInfo();
211   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
212   MachineRegisterInfo &MRI = MF.getRegInfo();
213   unsigned WaveSize = ST.getWavefrontSize();
214 
215   unsigned Size = FrameInfo.getObjectSize(FI);
216   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
217   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
218 
219   int NumLanes = Size / 4;
220 
221   // Make sure to handle the case where a wide SGPR spill may span between two
222   // VGPRs.
223   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
224     unsigned LaneVGPR;
225     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
226 
227     if (VGPRIndex == 0) {
228       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
229       if (LaneVGPR == AMDGPU::NoRegister) {
230         // We have no VGPRs left for spilling SGPRs. Reset because we won't
231         // partially spill the SGPR to VGPRs.
232         SGPRToVGPRSpills.erase(FI);
233         NumVGPRSpillLanes -= I;
234         return false;
235       }
236 
237       SpillVGPRs.push_back(LaneVGPR);
238 
239       // Add this register as live-in to all blocks to avoid machine verifer
240       // complaining about use of an undefined physical register.
241       for (MachineBasicBlock &BB : MF)
242         BB.addLiveIn(LaneVGPR);
243     } else {
244       LaneVGPR = SpillVGPRs.back();
245     }
246 
247     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
248   }
249 
250   return true;
251 }
252 
253 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
254   for (auto &R : SGPRToVGPRSpills)
255     MFI.RemoveStackObject(R.first);
256 }
257