1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include <cassert>
26 #include <vector>
27 
28 #define MAX_LANES 64
29 
30 using namespace llvm;
31 
32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33   : AMDGPUMachineFunction(MF),
34     PrivateSegmentBuffer(false),
35     DispatchPtr(false),
36     QueuePtr(false),
37     KernargSegmentPtr(false),
38     DispatchID(false),
39     FlatScratchInit(false),
40     WorkGroupIDX(false),
41     WorkGroupIDY(false),
42     WorkGroupIDZ(false),
43     WorkGroupInfo(false),
44     PrivateSegmentWaveByteOffset(false),
45     WorkItemIDX(false),
46     WorkItemIDY(false),
47     WorkItemIDZ(false),
48     ImplicitBufferPtr(false),
49     ImplicitArgPtr(false),
50     HostcallPtr(false),
51     HeapPtr(false),
52     GITPtrHigh(0xffffffff),
53     HighBitsOf32BitAddress(0),
54     GDSSize(0) {
55   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
56   const Function &F = MF.getFunction();
57   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
58   WavesPerEU = ST.getWavesPerEU(F);
59 
60   Occupancy = ST.computeOccupancy(F, getLDSSize());
61   CallingConv::ID CC = F.getCallingConv();
62 
63   // FIXME: Should have analysis or something rather than attribute to detect
64   // calls.
65   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
66 
67   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
68                         CC == CallingConv::SPIR_KERNEL;
69 
70   if (IsKernel) {
71     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
72       KernargSegmentPtr = true;
73     WorkGroupIDX = true;
74     WorkItemIDX = true;
75   } else if (CC == CallingConv::AMDGPU_PS) {
76     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
77   }
78 
79   MayNeedAGPRs = ST.hasMAIInsts();
80 
81   if (!isEntryFunction()) {
82     if (CC != CallingConv::AMDGPU_Gfx)
83       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
84 
85     // TODO: Pick a high register, and shift down, similar to a kernel.
86     FrameOffsetReg = AMDGPU::SGPR33;
87     StackPtrOffsetReg = AMDGPU::SGPR32;
88 
89     if (!ST.enableFlatScratch()) {
90       // Non-entry functions have no special inputs for now, other registers
91       // required for scratch access.
92       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
93 
94       ArgInfo.PrivateSegmentBuffer =
95         ArgDescriptor::createRegister(ScratchRSrcReg);
96     }
97 
98     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
99       ImplicitArgPtr = true;
100   } else {
101     ImplicitArgPtr = false;
102     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
103                                MaxKernArgAlign);
104 
105     if (ST.hasGFX90AInsts() &&
106         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
107         !mayUseAGPRs(MF))
108       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
109   }
110 
111   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
112   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
113     PrivateSegmentBuffer = true;
114   else if (ST.isMesaGfxShader(F))
115     ImplicitBufferPtr = true;
116 
117   if (!AMDGPU::isGraphics(CC)) {
118     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
119       WorkGroupIDX = true;
120 
121     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
122       WorkGroupIDY = true;
123 
124     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
125       WorkGroupIDZ = true;
126 
127     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
128       WorkItemIDX = true;
129 
130     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
131         ST.getMaxWorkitemID(F, 1) != 0)
132       WorkItemIDY = true;
133 
134     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
135         ST.getMaxWorkitemID(F, 2) != 0)
136       WorkItemIDZ = true;
137 
138     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
139       DispatchPtr = true;
140 
141     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
142       QueuePtr = true;
143 
144     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
145       DispatchID = true;
146 
147     if (!F.hasFnAttribute("amdgpu-no-hostcall-ptr"))
148       HostcallPtr = true;
149 
150     if (!F.hasFnAttribute("amdgpu-no-heap-ptr"))
151       HeapPtr = true;
152   }
153 
154   // FIXME: This attribute is a hack, we just need an analysis on the function
155   // to look for allocas.
156   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
157 
158   // TODO: This could be refined a lot. The attribute is a poor way of
159   // detecting calls or stack objects that may require it before argument
160   // lowering.
161   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
162       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
163       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
164       !ST.flatScratchIsArchitected()) {
165     FlatScratchInit = true;
166   }
167 
168   if (isEntryFunction()) {
169     // X, XY, and XYZ are the only supported combinations, so make sure Y is
170     // enabled if Z is.
171     if (WorkItemIDZ)
172       WorkItemIDY = true;
173 
174     if (!ST.flatScratchIsArchitected()) {
175       PrivateSegmentWaveByteOffset = true;
176 
177       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
178       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
179           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
180         ArgInfo.PrivateSegmentWaveByteOffset =
181             ArgDescriptor::createRegister(AMDGPU::SGPR5);
182     }
183   }
184 
185   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
186   StringRef S = A.getValueAsString();
187   if (!S.empty())
188     S.consumeInteger(0, GITPtrHigh);
189 
190   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
191   S = A.getValueAsString();
192   if (!S.empty())
193     S.consumeInteger(0, HighBitsOf32BitAddress);
194 
195   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
196   if (!S.empty())
197     S.consumeInteger(0, GDSSize);
198 }
199 
200 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
201   limitOccupancy(getMaxWavesPerEU());
202   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
203   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
204                  MF.getFunction()));
205 }
206 
207 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
208   const SIRegisterInfo &TRI) {
209   ArgInfo.PrivateSegmentBuffer =
210     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
211     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
212   NumUserSGPRs += 4;
213   return ArgInfo.PrivateSegmentBuffer.getRegister();
214 }
215 
216 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
217   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
218     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
219   NumUserSGPRs += 2;
220   return ArgInfo.DispatchPtr.getRegister();
221 }
222 
223 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
224   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
225     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
226   NumUserSGPRs += 2;
227   return ArgInfo.QueuePtr.getRegister();
228 }
229 
230 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
231   ArgInfo.KernargSegmentPtr
232     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
233     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
234   NumUserSGPRs += 2;
235   return ArgInfo.KernargSegmentPtr.getRegister();
236 }
237 
238 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
239   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
240     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
241   NumUserSGPRs += 2;
242   return ArgInfo.DispatchID.getRegister();
243 }
244 
245 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
246   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
247     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
248   NumUserSGPRs += 2;
249   return ArgInfo.FlatScratchInit.getRegister();
250 }
251 
252 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
253   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
254     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
255   NumUserSGPRs += 2;
256   return ArgInfo.ImplicitBufferPtr.getRegister();
257 }
258 
259 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
260                                              MCPhysReg Reg) {
261   for (unsigned I = 0; CSRegs[I]; ++I) {
262     if (CSRegs[I] == Reg)
263       return true;
264   }
265 
266   return false;
267 }
268 
269 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
270 /// SGPR spilling.
271 //
272 // FIXME: This only works after processFunctionBeforeFrameFinalized
273 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
274                                                       unsigned NumNeed) const {
275   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
276   unsigned WaveSize = ST.getWavefrontSize();
277   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
278 }
279 
280 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
281 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
282                                                     int FI) {
283   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
284 
285   // This has already been allocated.
286   if (!SpillLanes.empty())
287     return true;
288 
289   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
290   const SIRegisterInfo *TRI = ST.getRegisterInfo();
291   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
292   MachineRegisterInfo &MRI = MF.getRegInfo();
293   unsigned WaveSize = ST.getWavefrontSize();
294 
295   unsigned Size = FrameInfo.getObjectSize(FI);
296   unsigned NumLanes = Size / 4;
297 
298   if (NumLanes > WaveSize)
299     return false;
300 
301   assert(Size >= 4 && "invalid sgpr spill size");
302   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
303 
304   // Make sure to handle the case where a wide SGPR spill may span between two
305   // VGPRs.
306   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
307     Register LaneVGPR;
308     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
309 
310     if (VGPRIndex == 0) {
311       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
312       if (LaneVGPR == AMDGPU::NoRegister) {
313         // We have no VGPRs left for spilling SGPRs. Reset because we will not
314         // partially spill the SGPR to VGPRs.
315         SGPRToVGPRSpills.erase(FI);
316         NumVGPRSpillLanes -= I;
317 
318         // FIXME: We can run out of free registers with split allocation if
319         // IPRA is enabled and a called function already uses every VGPR.
320 #if 0
321         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
322                                                   "VGPRs for SGPR spilling",
323                                                   0, DS_Error);
324         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
325 #endif
326         return false;
327       }
328 
329       Optional<int> SpillFI;
330       // We need to preserve inactive lanes, so always save, even caller-save
331       // registers.
332       if (!isEntryFunction()) {
333         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
334       }
335 
336       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
337 
338       // Add this register as live-in to all blocks to avoid machine verifier
339       // complaining about use of an undefined physical register.
340       for (MachineBasicBlock &BB : MF)
341         BB.addLiveIn(LaneVGPR);
342     } else {
343       LaneVGPR = SpillVGPRs.back().VGPR;
344     }
345 
346     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
347   }
348 
349   return true;
350 }
351 
352 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
353 /// Either AGPR is spilled to VGPR to vice versa.
354 /// Returns true if a \p FI can be eliminated completely.
355 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
356                                                     int FI,
357                                                     bool isAGPRtoVGPR) {
358   MachineRegisterInfo &MRI = MF.getRegInfo();
359   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
360   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
361 
362   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
363 
364   auto &Spill = VGPRToAGPRSpills[FI];
365 
366   // This has already been allocated.
367   if (!Spill.Lanes.empty())
368     return Spill.FullyAllocated;
369 
370   unsigned Size = FrameInfo.getObjectSize(FI);
371   unsigned NumLanes = Size / 4;
372   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
373 
374   const TargetRegisterClass &RC =
375       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
376   auto Regs = RC.getRegisters();
377 
378   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
379   const SIRegisterInfo *TRI = ST.getRegisterInfo();
380   Spill.FullyAllocated = true;
381 
382   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
383   // once.
384   BitVector OtherUsedRegs;
385   OtherUsedRegs.resize(TRI->getNumRegs());
386 
387   const uint32_t *CSRMask =
388       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
389   if (CSRMask)
390     OtherUsedRegs.setBitsInMask(CSRMask);
391 
392   // TODO: Should include register tuples, but doesn't matter with current
393   // usage.
394   for (MCPhysReg Reg : SpillAGPR)
395     OtherUsedRegs.set(Reg);
396   for (MCPhysReg Reg : SpillVGPR)
397     OtherUsedRegs.set(Reg);
398 
399   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
400   for (int I = NumLanes - 1; I >= 0; --I) {
401     NextSpillReg = std::find_if(
402         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
403           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
404                  !OtherUsedRegs[Reg];
405         });
406 
407     if (NextSpillReg == Regs.end()) { // Registers exhausted
408       Spill.FullyAllocated = false;
409       break;
410     }
411 
412     OtherUsedRegs.set(*NextSpillReg);
413     SpillRegs.push_back(*NextSpillReg);
414     Spill.Lanes[I] = *NextSpillReg++;
415   }
416 
417   return Spill.FullyAllocated;
418 }
419 
420 bool SIMachineFunctionInfo::removeDeadFrameIndices(
421     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
422   // Remove dead frame indices from function frame, however keep FP & BP since
423   // spills for them haven't been inserted yet. And also make sure to remove the
424   // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
425   // result in an unexpected side effect and bug, in case of any re-mapping of
426   // freed frame indices by later pass(es) like "stack slot coloring".
427   for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
428     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
429       MFI.RemoveStackObject(R.first);
430       SGPRToVGPRSpills.erase(R.first);
431     }
432   }
433 
434   bool HaveSGPRToMemory = false;
435 
436   if (ResetSGPRSpillStackIDs) {
437     // All other SPGRs must be allocated on the default stack, so reset the
438     // stack ID.
439     for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
440          ++i) {
441       if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
442         if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
443           MFI.setStackID(i, TargetStackID::Default);
444           HaveSGPRToMemory = true;
445         }
446       }
447     }
448   }
449 
450   for (auto &R : VGPRToAGPRSpills) {
451     if (R.second.IsDead)
452       MFI.RemoveStackObject(R.first);
453   }
454 
455   return HaveSGPRToMemory;
456 }
457 
458 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
459                                          const SIRegisterInfo &TRI) {
460   if (ScavengeFI)
461     return *ScavengeFI;
462   if (isEntryFunction()) {
463     ScavengeFI = MFI.CreateFixedObject(
464         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
465   } else {
466     ScavengeFI = MFI.CreateStackObject(
467         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
468         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
469   }
470   return *ScavengeFI;
471 }
472 
473 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
474   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
475   return AMDGPU::SGPR0 + NumUserSGPRs;
476 }
477 
478 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
479   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
480 }
481 
482 Register
483 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
484   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
485   if (!ST.isAmdPalOS())
486     return Register();
487   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
488   if (ST.hasMergedShaders()) {
489     switch (MF.getFunction().getCallingConv()) {
490     case CallingConv::AMDGPU_HS:
491     case CallingConv::AMDGPU_GS:
492       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
493       // ES+GS merged shader on gfx9+.
494       GitPtrLo = AMDGPU::SGPR8;
495       return GitPtrLo;
496     default:
497       return GitPtrLo;
498     }
499   }
500   return GitPtrLo;
501 }
502 
503 static yaml::StringValue regToString(Register Reg,
504                                      const TargetRegisterInfo &TRI) {
505   yaml::StringValue Dest;
506   {
507     raw_string_ostream OS(Dest.Value);
508     OS << printReg(Reg, &TRI);
509   }
510   return Dest;
511 }
512 
513 static Optional<yaml::SIArgumentInfo>
514 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
515                     const TargetRegisterInfo &TRI) {
516   yaml::SIArgumentInfo AI;
517 
518   auto convertArg = [&](Optional<yaml::SIArgument> &A,
519                         const ArgDescriptor &Arg) {
520     if (!Arg)
521       return false;
522 
523     // Create a register or stack argument.
524     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
525     if (Arg.isRegister()) {
526       raw_string_ostream OS(SA.RegisterName.Value);
527       OS << printReg(Arg.getRegister(), &TRI);
528     } else
529       SA.StackOffset = Arg.getStackOffset();
530     // Check and update the optional mask.
531     if (Arg.isMasked())
532       SA.Mask = Arg.getMask();
533 
534     A = SA;
535     return true;
536   };
537 
538   bool Any = false;
539   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
540   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
541   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
542   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
543   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
544   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
545   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
546   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
547   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
548   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
549   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
550   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
551                     ArgInfo.PrivateSegmentWaveByteOffset);
552   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
553   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
554   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
555   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
556   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
557 
558   if (Any)
559     return AI;
560 
561   return None;
562 }
563 
564 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
565     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
566     const llvm::MachineFunction &MF)
567     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
568       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
569       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
570       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
571       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
572       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
573       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
574       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
575       Occupancy(MFI.getOccupancy()),
576       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
577       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
578       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
579       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
580   auto SFI = MFI.getOptionalScavengeFI();
581   if (SFI)
582     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
583 }
584 
585 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
586   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
587 }
588 
589 bool SIMachineFunctionInfo::initializeBaseYamlFields(
590     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
591     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
592   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
593   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
594   LDSSize = YamlMFI.LDSSize;
595   DynLDSAlign = YamlMFI.DynLDSAlign;
596   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
597   Occupancy = YamlMFI.Occupancy;
598   IsEntryFunction = YamlMFI.IsEntryFunction;
599   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
600   MemoryBound = YamlMFI.MemoryBound;
601   WaveLimiter = YamlMFI.WaveLimiter;
602   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
603   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
604 
605   if (YamlMFI.ScavengeFI) {
606     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
607     if (!FIOrErr) {
608       // Create a diagnostic for a the frame index.
609       const MemoryBuffer &Buffer =
610           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
611 
612       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
613                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
614                            "", None, None);
615       SourceRange = YamlMFI.ScavengeFI->SourceRange;
616       return true;
617     }
618     ScavengeFI = *FIOrErr;
619   } else {
620     ScavengeFI = None;
621   }
622   return false;
623 }
624 
625 bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
626   for (const BasicBlock &BB : MF.getFunction()) {
627     for (const Instruction &I : BB) {
628       const auto *CB = dyn_cast<CallBase>(&I);
629       if (!CB)
630         continue;
631 
632       if (CB->isInlineAsm()) {
633         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
634         for (const auto &CI : IA->ParseConstraints()) {
635           for (StringRef Code : CI.Codes) {
636             Code.consume_front("{");
637             if (Code.startswith("a"))
638               return true;
639           }
640         }
641         continue;
642       }
643 
644       const Function *Callee =
645           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
646       if (!Callee)
647         return true;
648 
649       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
650         return true;
651     }
652   }
653 
654   return false;
655 }
656 
657 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
658   if (UsesAGPRs)
659     return *UsesAGPRs;
660 
661   if (!mayNeedAGPRs()) {
662     UsesAGPRs = false;
663     return false;
664   }
665 
666   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
667       MF.getFrameInfo().hasCalls()) {
668     UsesAGPRs = true;
669     return true;
670   }
671 
672   const MachineRegisterInfo &MRI = MF.getRegInfo();
673 
674   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
675     const Register Reg = Register::index2VirtReg(I);
676     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
677     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
678       UsesAGPRs = true;
679       return true;
680     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
681       // Defer caching UsesAGPRs, function might not yet been regbank selected.
682       return true;
683     }
684   }
685 
686   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
687     if (MRI.isPhysRegUsed(Reg)) {
688       UsesAGPRs = true;
689       return true;
690     }
691   }
692 
693   UsesAGPRs = false;
694   return false;
695 }
696