1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
13 #include "llvm/CodeGen/MachineFrameInfo.h"
14 #include "llvm/CodeGen/MachineInstrBuilder.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
18 
19 #define MAX_LANES 64
20 
21 using namespace llvm;
22 
23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
24   : AMDGPUMachineFunction(MF),
25     TIDReg(AMDGPU::NoRegister),
26     ScratchRSrcReg(AMDGPU::NoRegister),
27     ScratchWaveOffsetReg(AMDGPU::NoRegister),
28     FrameOffsetReg(AMDGPU::NoRegister),
29     StackPtrOffsetReg(AMDGPU::NoRegister),
30     PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
31     DispatchPtrUserSGPR(AMDGPU::NoRegister),
32     QueuePtrUserSGPR(AMDGPU::NoRegister),
33     KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
34     DispatchIDUserSGPR(AMDGPU::NoRegister),
35     FlatScratchInitUserSGPR(AMDGPU::NoRegister),
36     PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
37     GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
38     GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
39     GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
40     WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
41     WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
42     WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
43     WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
44     PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
45     PSInputAddr(0),
46     PSInputEnable(0),
47     ReturnsVoid(true),
48     FlatWorkGroupSizes(0, 0),
49     WavesPerEU(0, 0),
50     DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
51     DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
52     LDSWaveSpillSize(0),
53     NumUserSGPRs(0),
54     NumSystemSGPRs(0),
55     HasSpilledSGPRs(false),
56     HasSpilledVGPRs(false),
57     HasNonSpillStackObjects(false),
58     NumSpilledSGPRs(0),
59     NumSpilledVGPRs(0),
60     PrivateSegmentBuffer(false),
61     DispatchPtr(false),
62     QueuePtr(false),
63     KernargSegmentPtr(false),
64     DispatchID(false),
65     FlatScratchInit(false),
66     GridWorkgroupCountX(false),
67     GridWorkgroupCountY(false),
68     GridWorkgroupCountZ(false),
69     WorkGroupIDX(false),
70     WorkGroupIDY(false),
71     WorkGroupIDZ(false),
72     WorkGroupInfo(false),
73     PrivateSegmentWaveByteOffset(false),
74     WorkItemIDX(false),
75     WorkItemIDY(false),
76     WorkItemIDZ(false),
77     PrivateMemoryInputPtr(false) {
78   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
79   const Function *F = MF.getFunction();
80   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
81   WavesPerEU = ST.getWavesPerEU(*F);
82 
83   // Non-entry functions have no special inputs for now.
84   // TODO: Return early for non-entry CCs.
85 
86   CallingConv::ID CC = F->getCallingConv();
87   if (CC == CallingConv::AMDGPU_PS)
88     PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
89 
90   if (AMDGPU::isKernel(CC)) {
91     KernargSegmentPtr = true;
92     WorkGroupIDX = true;
93     WorkItemIDX = true;
94   }
95 
96   if (ST.debuggerEmitPrologue()) {
97     // Enable everything.
98     WorkGroupIDY = true;
99     WorkGroupIDZ = true;
100     WorkItemIDY = true;
101     WorkItemIDZ = true;
102   } else {
103     if (F->hasFnAttribute("amdgpu-work-group-id-y"))
104       WorkGroupIDY = true;
105 
106     if (F->hasFnAttribute("amdgpu-work-group-id-z"))
107       WorkGroupIDZ = true;
108 
109     if (F->hasFnAttribute("amdgpu-work-item-id-y"))
110       WorkItemIDY = true;
111 
112     if (F->hasFnAttribute("amdgpu-work-item-id-z"))
113       WorkItemIDZ = true;
114   }
115 
116   // X, XY, and XYZ are the only supported combinations, so make sure Y is
117   // enabled if Z is.
118   if (WorkItemIDZ)
119     WorkItemIDY = true;
120 
121   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
122   bool MaySpill = ST.isVGPRSpillingEnabled(*F);
123   bool HasStackObjects = FrameInfo.hasStackObjects();
124 
125   if (HasStackObjects || MaySpill)
126     PrivateSegmentWaveByteOffset = true;
127 
128   if (ST.isAmdCodeObjectV2(MF)) {
129     if (HasStackObjects || MaySpill)
130       PrivateSegmentBuffer = true;
131 
132     if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
133       DispatchPtr = true;
134 
135     if (F->hasFnAttribute("amdgpu-queue-ptr"))
136       QueuePtr = true;
137 
138     if (F->hasFnAttribute("amdgpu-dispatch-id"))
139       DispatchID = true;
140   } else if (ST.isMesaGfxShader(MF)) {
141     if (HasStackObjects || MaySpill)
142       PrivateMemoryInputPtr = true;
143   }
144 
145   // We don't need to worry about accessing spills with flat instructions.
146   // TODO: On VI where we must use flat for global, we should be able to omit
147   // this if it is never used for generic access.
148   if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS())
149     FlatScratchInit = true;
150 }
151 
152 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
153   const SIRegisterInfo &TRI) {
154   PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
155     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
156   NumUserSGPRs += 4;
157   return PrivateSegmentBufferUserSGPR;
158 }
159 
160 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
161   DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
162     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
163   NumUserSGPRs += 2;
164   return DispatchPtrUserSGPR;
165 }
166 
167 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
168   QueuePtrUserSGPR = TRI.getMatchingSuperReg(
169     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
170   NumUserSGPRs += 2;
171   return QueuePtrUserSGPR;
172 }
173 
174 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
175   KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
176     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
177   NumUserSGPRs += 2;
178   return KernargSegmentPtrUserSGPR;
179 }
180 
181 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
182   DispatchIDUserSGPR = TRI.getMatchingSuperReg(
183     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
184   NumUserSGPRs += 2;
185   return DispatchIDUserSGPR;
186 }
187 
188 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
189   FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
190     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
191   NumUserSGPRs += 2;
192   return FlatScratchInitUserSGPR;
193 }
194 
195 unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
196   PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
197     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
198   NumUserSGPRs += 2;
199   return PrivateMemoryPtrUserSGPR;
200 }
201 
202 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
203 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
204                                                     int FI) {
205   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
206 
207   // This has already been allocated.
208   if (!SpillLanes.empty())
209     return true;
210 
211   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
212   const SIRegisterInfo *TRI = ST.getRegisterInfo();
213   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
214   MachineRegisterInfo &MRI = MF.getRegInfo();
215   unsigned WaveSize = ST.getWavefrontSize();
216 
217   unsigned Size = FrameInfo.getObjectSize(FI);
218   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
219   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
220 
221   int NumLanes = Size / 4;
222 
223   // Make sure to handle the case where a wide SGPR spill may span between two
224   // VGPRs.
225   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
226     unsigned LaneVGPR;
227     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
228 
229     if (VGPRIndex == 0) {
230       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
231       if (LaneVGPR == AMDGPU::NoRegister) {
232         // We have no VGPRs left for spilling SGPRs. Reset because we won't
233         // partially spill the SGPR to VGPRs.
234         SGPRToVGPRSpills.erase(FI);
235         NumVGPRSpillLanes -= I;
236         return false;
237       }
238 
239       SpillVGPRs.push_back(LaneVGPR);
240 
241       // Add this register as live-in to all blocks to avoid machine verifer
242       // complaining about use of an undefined physical register.
243       for (MachineBasicBlock &BB : MF)
244         BB.addLiveIn(LaneVGPR);
245     } else {
246       LaneVGPR = SpillVGPRs.back();
247     }
248 
249     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
250   }
251 
252   return true;
253 }
254 
255 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
256   for (auto &R : SGPRToVGPRSpills)
257     MFI.RemoveStackObject(R.first);
258 }
259