1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include <cassert>
26 #include <vector>
27 
28 #define MAX_LANES 64
29 
30 using namespace llvm;
31 
32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33   : AMDGPUMachineFunction(MF),
34     PrivateSegmentBuffer(false),
35     DispatchPtr(false),
36     QueuePtr(false),
37     KernargSegmentPtr(false),
38     DispatchID(false),
39     FlatScratchInit(false),
40     WorkGroupIDX(false),
41     WorkGroupIDY(false),
42     WorkGroupIDZ(false),
43     WorkGroupInfo(false),
44     PrivateSegmentWaveByteOffset(false),
45     WorkItemIDX(false),
46     WorkItemIDY(false),
47     WorkItemIDZ(false),
48     ImplicitBufferPtr(false),
49     ImplicitArgPtr(false),
50     GITPtrHigh(0xffffffff),
51     HighBitsOf32BitAddress(0),
52     GDSSize(0) {
53   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
54   const Function &F = MF.getFunction();
55   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
56   WavesPerEU = ST.getWavesPerEU(F);
57 
58   Occupancy = ST.computeOccupancy(F, getLDSSize());
59   CallingConv::ID CC = F.getCallingConv();
60 
61   // FIXME: Should have analysis or something rather than attribute to detect
62   // calls.
63   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
64 
65   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
66                         CC == CallingConv::SPIR_KERNEL;
67 
68   if (IsKernel) {
69     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
70       KernargSegmentPtr = true;
71     WorkGroupIDX = true;
72     WorkItemIDX = true;
73   } else if (CC == CallingConv::AMDGPU_PS) {
74     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
75   }
76 
77   MayNeedAGPRs = ST.hasMAIInsts();
78 
79   if (!isEntryFunction()) {
80     if (CC != CallingConv::AMDGPU_Gfx)
81       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
82 
83     // TODO: Pick a high register, and shift down, similar to a kernel.
84     FrameOffsetReg = AMDGPU::SGPR33;
85     StackPtrOffsetReg = AMDGPU::SGPR32;
86 
87     if (!ST.enableFlatScratch()) {
88       // Non-entry functions have no special inputs for now, other registers
89       // required for scratch access.
90       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
91 
92       ArgInfo.PrivateSegmentBuffer =
93         ArgDescriptor::createRegister(ScratchRSrcReg);
94     }
95 
96     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
97       ImplicitArgPtr = true;
98   } else {
99     ImplicitArgPtr = false;
100     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
101                                MaxKernArgAlign);
102 
103     if (ST.hasGFX90AInsts() &&
104         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
105         !mayUseAGPRs(MF))
106       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
107   }
108 
109   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
110   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
111     PrivateSegmentBuffer = true;
112   else if (ST.isMesaGfxShader(F))
113     ImplicitBufferPtr = true;
114 
115   if (!AMDGPU::isGraphics(CC)) {
116     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
117       WorkGroupIDX = true;
118 
119     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
120       WorkGroupIDY = true;
121 
122     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
123       WorkGroupIDZ = true;
124 
125     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
126       WorkItemIDX = true;
127 
128     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
129         ST.getMaxWorkitemID(F, 1) != 0)
130       WorkItemIDY = true;
131 
132     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
133         ST.getMaxWorkitemID(F, 2) != 0)
134       WorkItemIDZ = true;
135 
136     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
137       DispatchPtr = true;
138 
139     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
140       QueuePtr = true;
141 
142     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
143       DispatchID = true;
144   }
145 
146   // FIXME: This attribute is a hack, we just need an analysis on the function
147   // to look for allocas.
148   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
149 
150   // TODO: This could be refined a lot. The attribute is a poor way of
151   // detecting calls or stack objects that may require it before argument
152   // lowering.
153   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
154       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
155       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
156       !ST.flatScratchIsArchitected()) {
157     FlatScratchInit = true;
158   }
159 
160   if (isEntryFunction()) {
161     // X, XY, and XYZ are the only supported combinations, so make sure Y is
162     // enabled if Z is.
163     if (WorkItemIDZ)
164       WorkItemIDY = true;
165 
166     if (!ST.flatScratchIsArchitected()) {
167       PrivateSegmentWaveByteOffset = true;
168 
169       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
170       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
171           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
172         ArgInfo.PrivateSegmentWaveByteOffset =
173             ArgDescriptor::createRegister(AMDGPU::SGPR5);
174     }
175   }
176 
177   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
178   StringRef S = A.getValueAsString();
179   if (!S.empty())
180     S.consumeInteger(0, GITPtrHigh);
181 
182   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
183   S = A.getValueAsString();
184   if (!S.empty())
185     S.consumeInteger(0, HighBitsOf32BitAddress);
186 
187   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
188   if (!S.empty())
189     S.consumeInteger(0, GDSSize);
190 }
191 
192 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
193   limitOccupancy(getMaxWavesPerEU());
194   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
195   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
196                  MF.getFunction()));
197 }
198 
199 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
200   const SIRegisterInfo &TRI) {
201   ArgInfo.PrivateSegmentBuffer =
202     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
203     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
204   NumUserSGPRs += 4;
205   return ArgInfo.PrivateSegmentBuffer.getRegister();
206 }
207 
208 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
209   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
210     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
211   NumUserSGPRs += 2;
212   return ArgInfo.DispatchPtr.getRegister();
213 }
214 
215 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
216   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
217     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
218   NumUserSGPRs += 2;
219   return ArgInfo.QueuePtr.getRegister();
220 }
221 
222 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
223   ArgInfo.KernargSegmentPtr
224     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
225     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
226   NumUserSGPRs += 2;
227   return ArgInfo.KernargSegmentPtr.getRegister();
228 }
229 
230 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
231   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
232     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
233   NumUserSGPRs += 2;
234   return ArgInfo.DispatchID.getRegister();
235 }
236 
237 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
238   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
239     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
240   NumUserSGPRs += 2;
241   return ArgInfo.FlatScratchInit.getRegister();
242 }
243 
244 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
245   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
246     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
247   NumUserSGPRs += 2;
248   return ArgInfo.ImplicitBufferPtr.getRegister();
249 }
250 
251 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
252                                              MCPhysReg Reg) {
253   for (unsigned I = 0; CSRegs[I]; ++I) {
254     if (CSRegs[I] == Reg)
255       return true;
256   }
257 
258   return false;
259 }
260 
261 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
262 /// SGPR spilling.
263 //
264 // FIXME: This only works after processFunctionBeforeFrameFinalized
265 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
266                                                       unsigned NumNeed) const {
267   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
268   unsigned WaveSize = ST.getWavefrontSize();
269   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
270 }
271 
272 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
273 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
274                                                     int FI) {
275   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
276 
277   // This has already been allocated.
278   if (!SpillLanes.empty())
279     return true;
280 
281   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
282   const SIRegisterInfo *TRI = ST.getRegisterInfo();
283   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
284   MachineRegisterInfo &MRI = MF.getRegInfo();
285   unsigned WaveSize = ST.getWavefrontSize();
286 
287   unsigned Size = FrameInfo.getObjectSize(FI);
288   unsigned NumLanes = Size / 4;
289 
290   if (NumLanes > WaveSize)
291     return false;
292 
293   assert(Size >= 4 && "invalid sgpr spill size");
294   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
295 
296   // Make sure to handle the case where a wide SGPR spill may span between two
297   // VGPRs.
298   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
299     Register LaneVGPR;
300     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
301 
302     if (VGPRIndex == 0) {
303       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
304       if (LaneVGPR == AMDGPU::NoRegister) {
305         // We have no VGPRs left for spilling SGPRs. Reset because we will not
306         // partially spill the SGPR to VGPRs.
307         SGPRToVGPRSpills.erase(FI);
308         NumVGPRSpillLanes -= I;
309 
310         // FIXME: We can run out of free registers with split allocation if
311         // IPRA is enabled and a called function already uses every VGPR.
312 #if 0
313         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
314                                                   "VGPRs for SGPR spilling",
315                                                   0, DS_Error);
316         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
317 #endif
318         return false;
319       }
320 
321       Optional<int> SpillFI;
322       // We need to preserve inactive lanes, so always save, even caller-save
323       // registers.
324       if (!isEntryFunction()) {
325         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
326       }
327 
328       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
329 
330       // Add this register as live-in to all blocks to avoid machine verifer
331       // complaining about use of an undefined physical register.
332       for (MachineBasicBlock &BB : MF)
333         BB.addLiveIn(LaneVGPR);
334     } else {
335       LaneVGPR = SpillVGPRs.back().VGPR;
336     }
337 
338     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
339   }
340 
341   return true;
342 }
343 
344 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
345 /// Either AGPR is spilled to VGPR to vice versa.
346 /// Returns true if a \p FI can be eliminated completely.
347 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
348                                                     int FI,
349                                                     bool isAGPRtoVGPR) {
350   MachineRegisterInfo &MRI = MF.getRegInfo();
351   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
352   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
353 
354   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
355 
356   auto &Spill = VGPRToAGPRSpills[FI];
357 
358   // This has already been allocated.
359   if (!Spill.Lanes.empty())
360     return Spill.FullyAllocated;
361 
362   unsigned Size = FrameInfo.getObjectSize(FI);
363   unsigned NumLanes = Size / 4;
364   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
365 
366   const TargetRegisterClass &RC =
367       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
368   auto Regs = RC.getRegisters();
369 
370   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
371   const SIRegisterInfo *TRI = ST.getRegisterInfo();
372   Spill.FullyAllocated = true;
373 
374   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
375   // once.
376   BitVector OtherUsedRegs;
377   OtherUsedRegs.resize(TRI->getNumRegs());
378 
379   const uint32_t *CSRMask =
380       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
381   if (CSRMask)
382     OtherUsedRegs.setBitsInMask(CSRMask);
383 
384   // TODO: Should include register tuples, but doesn't matter with current
385   // usage.
386   for (MCPhysReg Reg : SpillAGPR)
387     OtherUsedRegs.set(Reg);
388   for (MCPhysReg Reg : SpillVGPR)
389     OtherUsedRegs.set(Reg);
390 
391   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
392   for (int I = NumLanes - 1; I >= 0; --I) {
393     NextSpillReg = std::find_if(
394         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
395           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
396                  !OtherUsedRegs[Reg];
397         });
398 
399     if (NextSpillReg == Regs.end()) { // Registers exhausted
400       Spill.FullyAllocated = false;
401       break;
402     }
403 
404     OtherUsedRegs.set(*NextSpillReg);
405     SpillRegs.push_back(*NextSpillReg);
406     Spill.Lanes[I] = *NextSpillReg++;
407   }
408 
409   return Spill.FullyAllocated;
410 }
411 
412 bool SIMachineFunctionInfo::removeDeadFrameIndices(
413     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
414   // Remove dead frame indices from function frame, however keep FP & BP since
415   // spills for them haven't been inserted yet. And also make sure to remove the
416   // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
417   // result in an unexpected side effect and bug, in case of any re-mapping of
418   // freed frame indices by later pass(es) like "stack slot coloring".
419   for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
420     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
421       MFI.RemoveStackObject(R.first);
422       SGPRToVGPRSpills.erase(R.first);
423     }
424   }
425 
426   bool HaveSGPRToMemory = false;
427 
428   if (ResetSGPRSpillStackIDs) {
429     // All other SPGRs must be allocated on the default stack, so reset the
430     // stack ID.
431     for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
432          ++i) {
433       if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
434         if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
435           MFI.setStackID(i, TargetStackID::Default);
436           HaveSGPRToMemory = true;
437         }
438       }
439     }
440   }
441 
442   for (auto &R : VGPRToAGPRSpills) {
443     if (R.second.IsDead)
444       MFI.RemoveStackObject(R.first);
445   }
446 
447   return HaveSGPRToMemory;
448 }
449 
450 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
451                                          const SIRegisterInfo &TRI) {
452   if (ScavengeFI)
453     return *ScavengeFI;
454   if (isEntryFunction()) {
455     ScavengeFI = MFI.CreateFixedObject(
456         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
457   } else {
458     ScavengeFI = MFI.CreateStackObject(
459         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
460         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
461   }
462   return *ScavengeFI;
463 }
464 
465 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
466   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
467   return AMDGPU::SGPR0 + NumUserSGPRs;
468 }
469 
470 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
471   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
472 }
473 
474 Register
475 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
476   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
477   if (!ST.isAmdPalOS())
478     return Register();
479   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
480   if (ST.hasMergedShaders()) {
481     switch (MF.getFunction().getCallingConv()) {
482     case CallingConv::AMDGPU_HS:
483     case CallingConv::AMDGPU_GS:
484       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
485       // ES+GS merged shader on gfx9+.
486       GitPtrLo = AMDGPU::SGPR8;
487       return GitPtrLo;
488     default:
489       return GitPtrLo;
490     }
491   }
492   return GitPtrLo;
493 }
494 
495 static yaml::StringValue regToString(Register Reg,
496                                      const TargetRegisterInfo &TRI) {
497   yaml::StringValue Dest;
498   {
499     raw_string_ostream OS(Dest.Value);
500     OS << printReg(Reg, &TRI);
501   }
502   return Dest;
503 }
504 
505 static Optional<yaml::SIArgumentInfo>
506 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
507                     const TargetRegisterInfo &TRI) {
508   yaml::SIArgumentInfo AI;
509 
510   auto convertArg = [&](Optional<yaml::SIArgument> &A,
511                         const ArgDescriptor &Arg) {
512     if (!Arg)
513       return false;
514 
515     // Create a register or stack argument.
516     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
517     if (Arg.isRegister()) {
518       raw_string_ostream OS(SA.RegisterName.Value);
519       OS << printReg(Arg.getRegister(), &TRI);
520     } else
521       SA.StackOffset = Arg.getStackOffset();
522     // Check and update the optional mask.
523     if (Arg.isMasked())
524       SA.Mask = Arg.getMask();
525 
526     A = SA;
527     return true;
528   };
529 
530   bool Any = false;
531   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
532   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
533   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
534   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
535   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
536   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
537   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
538   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
539   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
540   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
541   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
542   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
543                     ArgInfo.PrivateSegmentWaveByteOffset);
544   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
545   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
546   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
547   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
548   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
549 
550   if (Any)
551     return AI;
552 
553   return None;
554 }
555 
556 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
557     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
558     const llvm::MachineFunction &MF)
559     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
560       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
561       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
562       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
563       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
564       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
565       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
566       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
567       Occupancy(MFI.getOccupancy()),
568       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
569       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
570       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
571       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
572   auto SFI = MFI.getOptionalScavengeFI();
573   if (SFI)
574     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
575 }
576 
577 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
578   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
579 }
580 
581 bool SIMachineFunctionInfo::initializeBaseYamlFields(
582     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
583     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
584   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
585   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
586   LDSSize = YamlMFI.LDSSize;
587   DynLDSAlign = YamlMFI.DynLDSAlign;
588   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
589   Occupancy = YamlMFI.Occupancy;
590   IsEntryFunction = YamlMFI.IsEntryFunction;
591   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
592   MemoryBound = YamlMFI.MemoryBound;
593   WaveLimiter = YamlMFI.WaveLimiter;
594   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
595   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
596 
597   if (YamlMFI.ScavengeFI) {
598     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
599     if (!FIOrErr) {
600       // Create a diagnostic for a the frame index.
601       const MemoryBuffer &Buffer =
602           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
603 
604       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
605                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
606                            "", None, None);
607       SourceRange = YamlMFI.ScavengeFI->SourceRange;
608       return true;
609     }
610     ScavengeFI = *FIOrErr;
611   } else {
612     ScavengeFI = None;
613   }
614   return false;
615 }
616 
617 bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
618   for (const BasicBlock &BB : MF.getFunction()) {
619     for (const Instruction &I : BB) {
620       const auto *CB = dyn_cast<CallBase>(&I);
621       if (!CB)
622         continue;
623 
624       if (CB->isInlineAsm()) {
625         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
626         for (const auto &CI : IA->ParseConstraints()) {
627           for (StringRef Code : CI.Codes) {
628             Code.consume_front("{");
629             if (Code.startswith("a"))
630               return true;
631           }
632         }
633         continue;
634       }
635 
636       const Function *Callee =
637           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
638       if (!Callee)
639         return true;
640 
641       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
642         return true;
643     }
644   }
645 
646   return false;
647 }
648 
649 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
650   if (UsesAGPRs)
651     return *UsesAGPRs;
652 
653   if (!mayNeedAGPRs()) {
654     UsesAGPRs = false;
655     return false;
656   }
657 
658   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
659       MF.getFrameInfo().hasCalls()) {
660     UsesAGPRs = true;
661     return true;
662   }
663 
664   const MachineRegisterInfo &MRI = MF.getRegInfo();
665 
666   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
667     const Register Reg = Register::index2VirtReg(I);
668     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
669     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
670       UsesAGPRs = true;
671       return true;
672     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
673       // Defer caching UsesAGPRs, function might not yet been regbank selected.
674       return true;
675     }
676   }
677 
678   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
679     if (MRI.isPhysRegUsed(Reg)) {
680       UsesAGPRs = true;
681       return true;
682     }
683   }
684 
685   UsesAGPRs = false;
686   return false;
687 }
688