1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include <cassert>
26 #include <vector>
27
28 #define MAX_LANES 64
29
30 using namespace llvm;
31
SIMachineFunctionInfo(const MachineFunction & MF)32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33 : AMDGPUMachineFunction(MF),
34 BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
35 ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
36 GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
37 PrivateSegmentBuffer(false),
38 DispatchPtr(false),
39 QueuePtr(false),
40 KernargSegmentPtr(false),
41 DispatchID(false),
42 FlatScratchInit(false),
43 WorkGroupIDX(false),
44 WorkGroupIDY(false),
45 WorkGroupIDZ(false),
46 WorkGroupInfo(false),
47 LDSKernelId(false),
48 PrivateSegmentWaveByteOffset(false),
49 WorkItemIDX(false),
50 WorkItemIDY(false),
51 WorkItemIDZ(false),
52 ImplicitBufferPtr(false),
53 ImplicitArgPtr(false),
54 GITPtrHigh(0xffffffff),
55 HighBitsOf32BitAddress(0) {
56 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
57 const Function &F = MF.getFunction();
58 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
59 WavesPerEU = ST.getWavesPerEU(F);
60
61 Occupancy = ST.computeOccupancy(F, getLDSSize());
62 CallingConv::ID CC = F.getCallingConv();
63
64 // FIXME: Should have analysis or something rather than attribute to detect
65 // calls.
66 const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
67
68 const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
69 CC == CallingConv::SPIR_KERNEL;
70
71 if (IsKernel) {
72 if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
73 KernargSegmentPtr = true;
74 WorkGroupIDX = true;
75 WorkItemIDX = true;
76 } else if (CC == CallingConv::AMDGPU_PS) {
77 PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
78 }
79
80 MayNeedAGPRs = ST.hasMAIInsts();
81
82 if (!isEntryFunction()) {
83 if (CC != CallingConv::AMDGPU_Gfx)
84 ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
85
86 // TODO: Pick a high register, and shift down, similar to a kernel.
87 FrameOffsetReg = AMDGPU::SGPR33;
88 StackPtrOffsetReg = AMDGPU::SGPR32;
89
90 if (!ST.enableFlatScratch()) {
91 // Non-entry functions have no special inputs for now, other registers
92 // required for scratch access.
93 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
94
95 ArgInfo.PrivateSegmentBuffer =
96 ArgDescriptor::createRegister(ScratchRSrcReg);
97 }
98
99 if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
100 ImplicitArgPtr = true;
101 } else {
102 ImplicitArgPtr = false;
103 MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
104 MaxKernArgAlign);
105
106 if (ST.hasGFX90AInsts() &&
107 ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
108 !mayUseAGPRs(MF))
109 MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
110 }
111
112 bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
113 if (isAmdHsaOrMesa && !ST.enableFlatScratch())
114 PrivateSegmentBuffer = true;
115 else if (ST.isMesaGfxShader(F))
116 ImplicitBufferPtr = true;
117
118 if (!AMDGPU::isGraphics(CC)) {
119 if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
120 WorkGroupIDX = true;
121
122 if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
123 WorkGroupIDY = true;
124
125 if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
126 WorkGroupIDZ = true;
127
128 if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
129 WorkItemIDX = true;
130
131 if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
132 ST.getMaxWorkitemID(F, 1) != 0)
133 WorkItemIDY = true;
134
135 if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
136 ST.getMaxWorkitemID(F, 2) != 0)
137 WorkItemIDZ = true;
138
139 if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
140 DispatchPtr = true;
141
142 if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
143 QueuePtr = true;
144
145 if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
146 DispatchID = true;
147
148 if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
149 LDSKernelId = true;
150 }
151
152 // FIXME: This attribute is a hack, we just need an analysis on the function
153 // to look for allocas.
154 bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
155
156 // TODO: This could be refined a lot. The attribute is a poor way of
157 // detecting calls or stack objects that may require it before argument
158 // lowering.
159 if (ST.hasFlatAddressSpace() && isEntryFunction() &&
160 (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
161 (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
162 !ST.flatScratchIsArchitected()) {
163 FlatScratchInit = true;
164 }
165
166 if (isEntryFunction()) {
167 // X, XY, and XYZ are the only supported combinations, so make sure Y is
168 // enabled if Z is.
169 if (WorkItemIDZ)
170 WorkItemIDY = true;
171
172 if (!ST.flatScratchIsArchitected()) {
173 PrivateSegmentWaveByteOffset = true;
174
175 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
176 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
177 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
178 ArgInfo.PrivateSegmentWaveByteOffset =
179 ArgDescriptor::createRegister(AMDGPU::SGPR5);
180 }
181 }
182
183 Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
184 StringRef S = A.getValueAsString();
185 if (!S.empty())
186 S.consumeInteger(0, GITPtrHigh);
187
188 A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
189 S = A.getValueAsString();
190 if (!S.empty())
191 S.consumeInteger(0, HighBitsOf32BitAddress);
192
193 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
194 // VGPR available at all times. For now, reserve highest available VGPR. After
195 // RA, shift it to the lowest available unused VGPR if the one exist.
196 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
197 VGPRForAGPRCopy =
198 AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
199 }
200 }
201
clone(BumpPtrAllocator & Allocator,MachineFunction & DestMF,const DenseMap<MachineBasicBlock *,MachineBasicBlock * > & Src2DstMBB) const202 MachineFunctionInfo *SIMachineFunctionInfo::clone(
203 BumpPtrAllocator &Allocator, MachineFunction &DestMF,
204 const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
205 const {
206 return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
207 }
208
limitOccupancy(const MachineFunction & MF)209 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
210 limitOccupancy(getMaxWavesPerEU());
211 const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
212 limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
213 MF.getFunction()));
214 }
215
addPrivateSegmentBuffer(const SIRegisterInfo & TRI)216 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
217 const SIRegisterInfo &TRI) {
218 ArgInfo.PrivateSegmentBuffer =
219 ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
220 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
221 NumUserSGPRs += 4;
222 return ArgInfo.PrivateSegmentBuffer.getRegister();
223 }
224
addDispatchPtr(const SIRegisterInfo & TRI)225 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
226 ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
227 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
228 NumUserSGPRs += 2;
229 return ArgInfo.DispatchPtr.getRegister();
230 }
231
addQueuePtr(const SIRegisterInfo & TRI)232 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
233 ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
234 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
235 NumUserSGPRs += 2;
236 return ArgInfo.QueuePtr.getRegister();
237 }
238
addKernargSegmentPtr(const SIRegisterInfo & TRI)239 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
240 ArgInfo.KernargSegmentPtr
241 = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
242 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
243 NumUserSGPRs += 2;
244 return ArgInfo.KernargSegmentPtr.getRegister();
245 }
246
addDispatchID(const SIRegisterInfo & TRI)247 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
248 ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
249 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
250 NumUserSGPRs += 2;
251 return ArgInfo.DispatchID.getRegister();
252 }
253
addFlatScratchInit(const SIRegisterInfo & TRI)254 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
255 ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
256 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
257 NumUserSGPRs += 2;
258 return ArgInfo.FlatScratchInit.getRegister();
259 }
260
addImplicitBufferPtr(const SIRegisterInfo & TRI)261 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
262 ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
263 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
264 NumUserSGPRs += 2;
265 return ArgInfo.ImplicitBufferPtr.getRegister();
266 }
267
addLDSKernelId()268 Register SIMachineFunctionInfo::addLDSKernelId() {
269 ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());
270 NumUserSGPRs += 1;
271 return ArgInfo.LDSKernelId.getRegister();
272 }
273
isCalleeSavedReg(const MCPhysReg * CSRegs,MCPhysReg Reg)274 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
275 MCPhysReg Reg) {
276 for (unsigned I = 0; CSRegs[I]; ++I) {
277 if (CSRegs[I] == Reg)
278 return true;
279 }
280
281 return false;
282 }
283
284 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
285 /// SGPR spilling.
286 //
287 // FIXME: This only works after processFunctionBeforeFrameFinalized
haveFreeLanesForSGPRSpill(const MachineFunction & MF,unsigned NumNeed) const288 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
289 unsigned NumNeed) const {
290 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
291 unsigned WaveSize = ST.getWavefrontSize();
292 return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
293 }
294
295 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
allocateSGPRSpillToVGPR(MachineFunction & MF,int FI)296 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
297 int FI) {
298 std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
299
300 // This has already been allocated.
301 if (!SpillLanes.empty())
302 return true;
303
304 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
305 const SIRegisterInfo *TRI = ST.getRegisterInfo();
306 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
307 MachineRegisterInfo &MRI = MF.getRegInfo();
308 unsigned WaveSize = ST.getWavefrontSize();
309
310 unsigned Size = FrameInfo.getObjectSize(FI);
311 unsigned NumLanes = Size / 4;
312
313 if (NumLanes > WaveSize)
314 return false;
315
316 assert(Size >= 4 && "invalid sgpr spill size");
317 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
318
319 // Make sure to handle the case where a wide SGPR spill may span between two
320 // VGPRs.
321 for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
322 Register LaneVGPR;
323 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
324
325 if (VGPRIndex == 0) {
326 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
327 if (LaneVGPR == AMDGPU::NoRegister) {
328 // We have no VGPRs left for spilling SGPRs. Reset because we will not
329 // partially spill the SGPR to VGPRs.
330 SGPRToVGPRSpills.erase(FI);
331 NumVGPRSpillLanes -= I;
332
333 // FIXME: We can run out of free registers with split allocation if
334 // IPRA is enabled and a called function already uses every VGPR.
335 #if 0
336 DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
337 "VGPRs for SGPR spilling",
338 0, DS_Error);
339 MF.getFunction().getContext().diagnose(DiagOutOfRegs);
340 #endif
341 return false;
342 }
343
344 Optional<int> SpillFI;
345 // We need to preserve inactive lanes, so always save, even caller-save
346 // registers.
347 if (!isEntryFunction()) {
348 SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
349 }
350
351 SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
352
353 // Add this register as live-in to all blocks to avoid machine verifier
354 // complaining about use of an undefined physical register.
355 for (MachineBasicBlock &BB : MF)
356 BB.addLiveIn(LaneVGPR);
357 } else {
358 LaneVGPR = SpillVGPRs.back().VGPR;
359 }
360
361 SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex));
362 }
363
364 return true;
365 }
366
367 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
368 /// Either AGPR is spilled to VGPR to vice versa.
369 /// Returns true if a \p FI can be eliminated completely.
allocateVGPRSpillToAGPR(MachineFunction & MF,int FI,bool isAGPRtoVGPR)370 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
371 int FI,
372 bool isAGPRtoVGPR) {
373 MachineRegisterInfo &MRI = MF.getRegInfo();
374 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
375 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
376
377 assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
378
379 auto &Spill = VGPRToAGPRSpills[FI];
380
381 // This has already been allocated.
382 if (!Spill.Lanes.empty())
383 return Spill.FullyAllocated;
384
385 unsigned Size = FrameInfo.getObjectSize(FI);
386 unsigned NumLanes = Size / 4;
387 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
388
389 const TargetRegisterClass &RC =
390 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
391 auto Regs = RC.getRegisters();
392
393 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
394 const SIRegisterInfo *TRI = ST.getRegisterInfo();
395 Spill.FullyAllocated = true;
396
397 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
398 // once.
399 BitVector OtherUsedRegs;
400 OtherUsedRegs.resize(TRI->getNumRegs());
401
402 const uint32_t *CSRMask =
403 TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
404 if (CSRMask)
405 OtherUsedRegs.setBitsInMask(CSRMask);
406
407 // TODO: Should include register tuples, but doesn't matter with current
408 // usage.
409 for (MCPhysReg Reg : SpillAGPR)
410 OtherUsedRegs.set(Reg);
411 for (MCPhysReg Reg : SpillVGPR)
412 OtherUsedRegs.set(Reg);
413
414 SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
415 for (int I = NumLanes - 1; I >= 0; --I) {
416 NextSpillReg = std::find_if(
417 NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
418 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
419 !OtherUsedRegs[Reg];
420 });
421
422 if (NextSpillReg == Regs.end()) { // Registers exhausted
423 Spill.FullyAllocated = false;
424 break;
425 }
426
427 OtherUsedRegs.set(*NextSpillReg);
428 SpillRegs.push_back(*NextSpillReg);
429 Spill.Lanes[I] = *NextSpillReg++;
430 }
431
432 return Spill.FullyAllocated;
433 }
434
removeDeadFrameIndices(MachineFrameInfo & MFI,bool ResetSGPRSpillStackIDs)435 bool SIMachineFunctionInfo::removeDeadFrameIndices(
436 MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
437 // Remove dead frame indices from function frame, however keep FP & BP since
438 // spills for them haven't been inserted yet. And also make sure to remove the
439 // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
440 // result in an unexpected side effect and bug, in case of any re-mapping of
441 // freed frame indices by later pass(es) like "stack slot coloring".
442 for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
443 if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
444 MFI.RemoveStackObject(R.first);
445 SGPRToVGPRSpills.erase(R.first);
446 }
447 }
448
449 bool HaveSGPRToMemory = false;
450
451 if (ResetSGPRSpillStackIDs) {
452 // All other SPGRs must be allocated on the default stack, so reset the
453 // stack ID.
454 for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
455 ++i) {
456 if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
457 if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
458 MFI.setStackID(i, TargetStackID::Default);
459 HaveSGPRToMemory = true;
460 }
461 }
462 }
463 }
464
465 for (auto &R : VGPRToAGPRSpills) {
466 if (R.second.IsDead)
467 MFI.RemoveStackObject(R.first);
468 }
469
470 return HaveSGPRToMemory;
471 }
472
allocateWWMReservedSpillSlots(MachineFrameInfo & MFI,const SIRegisterInfo & TRI)473 void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
474 MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
475 assert(WWMReservedFrameIndexes.empty());
476
477 WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
478
479 int I = 0;
480 for (Register VGPR : WWMReservedRegs) {
481 const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
482 WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
483 TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
484 }
485 }
486
getScavengeFI(MachineFrameInfo & MFI,const SIRegisterInfo & TRI)487 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
488 const SIRegisterInfo &TRI) {
489 if (ScavengeFI)
490 return *ScavengeFI;
491 if (isEntryFunction()) {
492 ScavengeFI = MFI.CreateFixedObject(
493 TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
494 } else {
495 ScavengeFI = MFI.CreateStackObject(
496 TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
497 TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
498 }
499 return *ScavengeFI;
500 }
501
getNextUserSGPR() const502 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
503 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
504 return AMDGPU::SGPR0 + NumUserSGPRs;
505 }
506
getNextSystemSGPR() const507 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
508 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
509 }
510
511 Register
getGITPtrLoReg(const MachineFunction & MF) const512 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
513 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
514 if (!ST.isAmdPalOS())
515 return Register();
516 Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
517 if (ST.hasMergedShaders()) {
518 switch (MF.getFunction().getCallingConv()) {
519 case CallingConv::AMDGPU_HS:
520 case CallingConv::AMDGPU_GS:
521 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
522 // ES+GS merged shader on gfx9+.
523 GitPtrLo = AMDGPU::SGPR8;
524 return GitPtrLo;
525 default:
526 return GitPtrLo;
527 }
528 }
529 return GitPtrLo;
530 }
531
regToString(Register Reg,const TargetRegisterInfo & TRI)532 static yaml::StringValue regToString(Register Reg,
533 const TargetRegisterInfo &TRI) {
534 yaml::StringValue Dest;
535 {
536 raw_string_ostream OS(Dest.Value);
537 OS << printReg(Reg, &TRI);
538 }
539 return Dest;
540 }
541
542 static Optional<yaml::SIArgumentInfo>
convertArgumentInfo(const AMDGPUFunctionArgInfo & ArgInfo,const TargetRegisterInfo & TRI)543 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
544 const TargetRegisterInfo &TRI) {
545 yaml::SIArgumentInfo AI;
546
547 auto convertArg = [&](Optional<yaml::SIArgument> &A,
548 const ArgDescriptor &Arg) {
549 if (!Arg)
550 return false;
551
552 // Create a register or stack argument.
553 yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
554 if (Arg.isRegister()) {
555 raw_string_ostream OS(SA.RegisterName.Value);
556 OS << printReg(Arg.getRegister(), &TRI);
557 } else
558 SA.StackOffset = Arg.getStackOffset();
559 // Check and update the optional mask.
560 if (Arg.isMasked())
561 SA.Mask = Arg.getMask();
562
563 A = SA;
564 return true;
565 };
566
567 bool Any = false;
568 Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
569 Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
570 Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
571 Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
572 Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
573 Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
574 Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);
575 Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
576 Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
577 Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
578 Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
579 Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
580 Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
581 ArgInfo.PrivateSegmentWaveByteOffset);
582 Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
583 Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
584 Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
585 Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
586 Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
587
588 if (Any)
589 return AI;
590
591 return None;
592 }
593
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo & MFI,const TargetRegisterInfo & TRI,const llvm::MachineFunction & MF)594 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
595 const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
596 const llvm::MachineFunction &MF)
597 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
598 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
599 GDSSize(MFI.getGDSSize()),
600 DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
601 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
602 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
603 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
604 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
605 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
606 Occupancy(MFI.getOccupancy()),
607 ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
608 FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
609 StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
610 BytesInStackArgArea(MFI.getBytesInStackArgArea()),
611 ReturnsVoid(MFI.returnsVoid()),
612 ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
613 for (Register Reg : MFI.WWMReservedRegs)
614 WWMReservedRegs.push_back(regToString(Reg, TRI));
615
616 if (MFI.getVGPRForAGPRCopy())
617 VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
618 auto SFI = MFI.getOptionalScavengeFI();
619 if (SFI)
620 ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
621 }
622
mappingImpl(yaml::IO & YamlIO)623 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
624 MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
625 }
626
initializeBaseYamlFields(const yaml::SIMachineFunctionInfo & YamlMFI,const MachineFunction & MF,PerFunctionMIParsingState & PFS,SMDiagnostic & Error,SMRange & SourceRange)627 bool SIMachineFunctionInfo::initializeBaseYamlFields(
628 const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
629 PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
630 ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
631 MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
632 LDSSize = YamlMFI.LDSSize;
633 GDSSize = YamlMFI.GDSSize;
634 DynLDSAlign = YamlMFI.DynLDSAlign;
635 HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
636 Occupancy = YamlMFI.Occupancy;
637 IsEntryFunction = YamlMFI.IsEntryFunction;
638 NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
639 MemoryBound = YamlMFI.MemoryBound;
640 WaveLimiter = YamlMFI.WaveLimiter;
641 HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
642 HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
643 BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
644 ReturnsVoid = YamlMFI.ReturnsVoid;
645
646 if (YamlMFI.ScavengeFI) {
647 auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
648 if (!FIOrErr) {
649 // Create a diagnostic for a the frame index.
650 const MemoryBuffer &Buffer =
651 *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
652
653 Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
654 SourceMgr::DK_Error, toString(FIOrErr.takeError()),
655 "", None, None);
656 SourceRange = YamlMFI.ScavengeFI->SourceRange;
657 return true;
658 }
659 ScavengeFI = *FIOrErr;
660 } else {
661 ScavengeFI = None;
662 }
663 return false;
664 }
665
mayUseAGPRs(const MachineFunction & MF) const666 bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
667 for (const BasicBlock &BB : MF.getFunction()) {
668 for (const Instruction &I : BB) {
669 const auto *CB = dyn_cast<CallBase>(&I);
670 if (!CB)
671 continue;
672
673 if (CB->isInlineAsm()) {
674 const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
675 for (const auto &CI : IA->ParseConstraints()) {
676 for (StringRef Code : CI.Codes) {
677 Code.consume_front("{");
678 if (Code.startswith("a"))
679 return true;
680 }
681 }
682 continue;
683 }
684
685 const Function *Callee =
686 dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
687 if (!Callee)
688 return true;
689
690 if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
691 return true;
692 }
693 }
694
695 return false;
696 }
697
usesAGPRs(const MachineFunction & MF) const698 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
699 if (UsesAGPRs)
700 return *UsesAGPRs;
701
702 if (!mayNeedAGPRs()) {
703 UsesAGPRs = false;
704 return false;
705 }
706
707 if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
708 MF.getFrameInfo().hasCalls()) {
709 UsesAGPRs = true;
710 return true;
711 }
712
713 const MachineRegisterInfo &MRI = MF.getRegInfo();
714
715 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
716 const Register Reg = Register::index2VirtReg(I);
717 const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
718 if (RC && SIRegisterInfo::isAGPRClass(RC)) {
719 UsesAGPRs = true;
720 return true;
721 } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
722 // Defer caching UsesAGPRs, function might not yet been regbank selected.
723 return true;
724 }
725 }
726
727 for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
728 if (MRI.isPhysRegUsed(Reg)) {
729 UsesAGPRs = true;
730 return true;
731 }
732 }
733
734 UsesAGPRs = false;
735 return false;
736 }
737