1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include <cassert>
26 #include <vector>
27 
28 #define MAX_LANES 64
29 
30 using namespace llvm;
31 
32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33   : AMDGPUMachineFunction(MF),
34     PrivateSegmentBuffer(false),
35     DispatchPtr(false),
36     QueuePtr(false),
37     KernargSegmentPtr(false),
38     DispatchID(false),
39     FlatScratchInit(false),
40     WorkGroupIDX(false),
41     WorkGroupIDY(false),
42     WorkGroupIDZ(false),
43     WorkGroupInfo(false),
44     PrivateSegmentWaveByteOffset(false),
45     WorkItemIDX(false),
46     WorkItemIDY(false),
47     WorkItemIDZ(false),
48     ImplicitBufferPtr(false),
49     ImplicitArgPtr(false),
50     GITPtrHigh(0xffffffff),
51     HighBitsOf32BitAddress(0),
52     GDSSize(0) {
53   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
54   const Function &F = MF.getFunction();
55   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
56   WavesPerEU = ST.getWavesPerEU(F);
57 
58   Occupancy = ST.computeOccupancy(F, getLDSSize());
59   CallingConv::ID CC = F.getCallingConv();
60 
61   // FIXME: Should have analysis or something rather than attribute to detect
62   // calls.
63   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
64 
65   // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
66   // have any calls.
67   const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
68                            CC != CallingConv::AMDGPU_Gfx &&
69                            (!isEntryFunction() || HasCalls);
70 
71   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
72     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
73       KernargSegmentPtr = true;
74     WorkGroupIDX = true;
75     WorkItemIDX = true;
76   } else if (CC == CallingConv::AMDGPU_PS) {
77     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
78   }
79 
80   if (!isEntryFunction()) {
81     if (UseFixedABI)
82       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
83 
84     // TODO: Pick a high register, and shift down, similar to a kernel.
85     FrameOffsetReg = AMDGPU::SGPR33;
86     StackPtrOffsetReg = AMDGPU::SGPR32;
87 
88     if (!ST.enableFlatScratch()) {
89       // Non-entry functions have no special inputs for now, other registers
90       // required for scratch access.
91       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
92 
93       ArgInfo.PrivateSegmentBuffer =
94         ArgDescriptor::createRegister(ScratchRSrcReg);
95     }
96 
97     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
98       ImplicitArgPtr = true;
99   } else {
100     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
101       KernargSegmentPtr = true;
102       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
103                                  MaxKernArgAlign);
104     }
105   }
106 
107   if (UseFixedABI) {
108     WorkGroupIDX = true;
109     WorkGroupIDY = true;
110     WorkGroupIDZ = true;
111     WorkItemIDX = true;
112     WorkItemIDY = true;
113     WorkItemIDZ = true;
114     ImplicitArgPtr = true;
115   } else {
116     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
117       WorkGroupIDX = true;
118 
119     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
120       WorkGroupIDY = true;
121 
122     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
123       WorkGroupIDZ = true;
124 
125     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
126       WorkItemIDX = true;
127 
128     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
129       WorkItemIDY = true;
130 
131     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
132       WorkItemIDZ = true;
133   }
134 
135   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
136   if (isEntryFunction()) {
137     // X, XY, and XYZ are the only supported combinations, so make sure Y is
138     // enabled if Z is.
139     if (WorkItemIDZ)
140       WorkItemIDY = true;
141 
142     if (!ST.flatScratchIsArchitected()) {
143       PrivateSegmentWaveByteOffset = true;
144 
145       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
146       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
147           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
148         ArgInfo.PrivateSegmentWaveByteOffset =
149             ArgDescriptor::createRegister(AMDGPU::SGPR5);
150     }
151   }
152 
153   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
154   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
155     PrivateSegmentBuffer = true;
156   else if (ST.isMesaGfxShader(F))
157     ImplicitBufferPtr = true;
158 
159   if (!AMDGPU::isGraphics(CC)) {
160     if (UseFixedABI) {
161       DispatchPtr = true;
162       QueuePtr = true;
163 
164       // FIXME: We don't need this?
165       DispatchID = true;
166     } else {
167       if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
168         DispatchPtr = true;
169 
170       if (F.hasFnAttribute("amdgpu-queue-ptr"))
171         QueuePtr = true;
172 
173       if (F.hasFnAttribute("amdgpu-dispatch-id"))
174         DispatchID = true;
175     }
176   }
177 
178   // TODO: This could be refined a lot. The attribute is a poor way of
179   // detecting calls or stack objects that may require it before argument
180   // lowering.
181   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
182       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
183       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
184       !ST.flatScratchIsArchitected()) {
185     FlatScratchInit = true;
186   }
187 
188   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
189   StringRef S = A.getValueAsString();
190   if (!S.empty())
191     S.consumeInteger(0, GITPtrHigh);
192 
193   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
194   S = A.getValueAsString();
195   if (!S.empty())
196     S.consumeInteger(0, HighBitsOf32BitAddress);
197 
198   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
199   if (!S.empty())
200     S.consumeInteger(0, GDSSize);
201 }
202 
203 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
204   limitOccupancy(getMaxWavesPerEU());
205   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
206   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
207                  MF.getFunction()));
208 }
209 
210 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
211   const SIRegisterInfo &TRI) {
212   ArgInfo.PrivateSegmentBuffer =
213     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
214     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
215   NumUserSGPRs += 4;
216   return ArgInfo.PrivateSegmentBuffer.getRegister();
217 }
218 
219 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
220   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
221     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
222   NumUserSGPRs += 2;
223   return ArgInfo.DispatchPtr.getRegister();
224 }
225 
226 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
227   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
228     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
229   NumUserSGPRs += 2;
230   return ArgInfo.QueuePtr.getRegister();
231 }
232 
233 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
234   ArgInfo.KernargSegmentPtr
235     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
236     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
237   NumUserSGPRs += 2;
238   return ArgInfo.KernargSegmentPtr.getRegister();
239 }
240 
241 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
242   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
243     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
244   NumUserSGPRs += 2;
245   return ArgInfo.DispatchID.getRegister();
246 }
247 
248 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
249   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
250     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
251   NumUserSGPRs += 2;
252   return ArgInfo.FlatScratchInit.getRegister();
253 }
254 
255 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
256   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
257     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
258   NumUserSGPRs += 2;
259   return ArgInfo.ImplicitBufferPtr.getRegister();
260 }
261 
262 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
263                                              MCPhysReg Reg) {
264   for (unsigned I = 0; CSRegs[I]; ++I) {
265     if (CSRegs[I] == Reg)
266       return true;
267   }
268 
269   return false;
270 }
271 
272 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
273 /// SGPR spilling.
274 //
275 // FIXME: This only works after processFunctionBeforeFrameFinalized
276 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
277                                                       unsigned NumNeed) const {
278   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
279   unsigned WaveSize = ST.getWavefrontSize();
280   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
281 }
282 
283 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
284 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
285                                                     int FI) {
286   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
287 
288   // This has already been allocated.
289   if (!SpillLanes.empty())
290     return true;
291 
292   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
293   const SIRegisterInfo *TRI = ST.getRegisterInfo();
294   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
295   MachineRegisterInfo &MRI = MF.getRegInfo();
296   unsigned WaveSize = ST.getWavefrontSize();
297   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
298 
299   unsigned Size = FrameInfo.getObjectSize(FI);
300   unsigned NumLanes = Size / 4;
301 
302   if (NumLanes > WaveSize)
303     return false;
304 
305   assert(Size >= 4 && "invalid sgpr spill size");
306   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
307 
308   // Make sure to handle the case where a wide SGPR spill may span between two
309   // VGPRs.
310   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
311     Register LaneVGPR;
312     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
313 
314     // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
315     // when one of the two conditions is true:
316     // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
317     // reserved.
318     // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
319     // required.
320     if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
321       assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
322       LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
323     } else if (VGPRIndex == 0) {
324       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
325       if (LaneVGPR == AMDGPU::NoRegister) {
326         // We have no VGPRs left for spilling SGPRs. Reset because we will not
327         // partially spill the SGPR to VGPRs.
328         SGPRToVGPRSpills.erase(FI);
329         NumVGPRSpillLanes -= I;
330 
331 #if 0
332         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
333                                                   "VGPRs for SGPR spilling",
334                                                   0, DS_Error);
335         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
336 #endif
337         return false;
338       }
339 
340       Optional<int> SpillFI;
341       // We need to preserve inactive lanes, so always save, even caller-save
342       // registers.
343       if (!isEntryFunction()) {
344         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
345       }
346 
347       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
348 
349       // Add this register as live-in to all blocks to avoid machine verifer
350       // complaining about use of an undefined physical register.
351       for (MachineBasicBlock &BB : MF)
352         BB.addLiveIn(LaneVGPR);
353     } else {
354       LaneVGPR = SpillVGPRs.back().VGPR;
355     }
356 
357     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
358   }
359 
360   return true;
361 }
362 
363 /// Reserve a VGPR for spilling of SGPRs
364 bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
365   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
366   const SIRegisterInfo *TRI = ST.getRegisterInfo();
367   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
368 
369   Register LaneVGPR = TRI->findUnusedRegister(
370       MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
371   if (LaneVGPR == Register())
372     return false;
373   SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, None));
374   FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
375   return true;
376 }
377 
378 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
379 /// Either AGPR is spilled to VGPR to vice versa.
380 /// Returns true if a \p FI can be eliminated completely.
381 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
382                                                     int FI,
383                                                     bool isAGPRtoVGPR) {
384   MachineRegisterInfo &MRI = MF.getRegInfo();
385   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
386   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
387 
388   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
389 
390   auto &Spill = VGPRToAGPRSpills[FI];
391 
392   // This has already been allocated.
393   if (!Spill.Lanes.empty())
394     return Spill.FullyAllocated;
395 
396   unsigned Size = FrameInfo.getObjectSize(FI);
397   unsigned NumLanes = Size / 4;
398   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
399 
400   const TargetRegisterClass &RC =
401       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
402   auto Regs = RC.getRegisters();
403 
404   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
405   const SIRegisterInfo *TRI = ST.getRegisterInfo();
406   Spill.FullyAllocated = true;
407 
408   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
409   // once.
410   BitVector OtherUsedRegs;
411   OtherUsedRegs.resize(TRI->getNumRegs());
412 
413   const uint32_t *CSRMask =
414       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
415   if (CSRMask)
416     OtherUsedRegs.setBitsInMask(CSRMask);
417 
418   // TODO: Should include register tuples, but doesn't matter with current
419   // usage.
420   for (MCPhysReg Reg : SpillAGPR)
421     OtherUsedRegs.set(Reg);
422   for (MCPhysReg Reg : SpillVGPR)
423     OtherUsedRegs.set(Reg);
424 
425   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
426   for (int I = NumLanes - 1; I >= 0; --I) {
427     NextSpillReg = std::find_if(
428         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
429           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
430                  !OtherUsedRegs[Reg];
431         });
432 
433     if (NextSpillReg == Regs.end()) { // Registers exhausted
434       Spill.FullyAllocated = false;
435       break;
436     }
437 
438     OtherUsedRegs.set(*NextSpillReg);
439     SpillRegs.push_back(*NextSpillReg);
440     Spill.Lanes[I] = *NextSpillReg++;
441   }
442 
443   return Spill.FullyAllocated;
444 }
445 
446 void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
447   // The FP & BP spills haven't been inserted yet, so keep them around.
448   for (auto &R : SGPRToVGPRSpills) {
449     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
450       MFI.RemoveStackObject(R.first);
451   }
452 
453   // All other SPGRs must be allocated on the default stack, so reset the stack
454   // ID.
455   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
456        ++i)
457     if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
458       MFI.setStackID(i, TargetStackID::Default);
459 
460   for (auto &R : VGPRToAGPRSpills) {
461     if (R.second.FullyAllocated)
462       MFI.RemoveStackObject(R.first);
463   }
464 }
465 
466 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
467                                          const SIRegisterInfo &TRI) {
468   if (ScavengeFI)
469     return *ScavengeFI;
470   if (isEntryFunction()) {
471     ScavengeFI = MFI.CreateFixedObject(
472         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
473   } else {
474     ScavengeFI = MFI.CreateStackObject(
475         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
476         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
477   }
478   return *ScavengeFI;
479 }
480 
481 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
482   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
483   return AMDGPU::SGPR0 + NumUserSGPRs;
484 }
485 
486 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
487   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
488 }
489 
490 Register
491 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
492   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
493   if (!ST.isAmdPalOS())
494     return Register();
495   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
496   if (ST.hasMergedShaders()) {
497     switch (MF.getFunction().getCallingConv()) {
498     case CallingConv::AMDGPU_HS:
499     case CallingConv::AMDGPU_GS:
500       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
501       // ES+GS merged shader on gfx9+.
502       GitPtrLo = AMDGPU::SGPR8;
503       return GitPtrLo;
504     default:
505       return GitPtrLo;
506     }
507   }
508   return GitPtrLo;
509 }
510 
511 static yaml::StringValue regToString(Register Reg,
512                                      const TargetRegisterInfo &TRI) {
513   yaml::StringValue Dest;
514   {
515     raw_string_ostream OS(Dest.Value);
516     OS << printReg(Reg, &TRI);
517   }
518   return Dest;
519 }
520 
521 static Optional<yaml::SIArgumentInfo>
522 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
523                     const TargetRegisterInfo &TRI) {
524   yaml::SIArgumentInfo AI;
525 
526   auto convertArg = [&](Optional<yaml::SIArgument> &A,
527                         const ArgDescriptor &Arg) {
528     if (!Arg)
529       return false;
530 
531     // Create a register or stack argument.
532     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
533     if (Arg.isRegister()) {
534       raw_string_ostream OS(SA.RegisterName.Value);
535       OS << printReg(Arg.getRegister(), &TRI);
536     } else
537       SA.StackOffset = Arg.getStackOffset();
538     // Check and update the optional mask.
539     if (Arg.isMasked())
540       SA.Mask = Arg.getMask();
541 
542     A = SA;
543     return true;
544   };
545 
546   bool Any = false;
547   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
548   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
549   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
550   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
551   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
552   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
553   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
554   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
555   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
556   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
557   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
558   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
559                     ArgInfo.PrivateSegmentWaveByteOffset);
560   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
561   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
562   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
563   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
564   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
565 
566   if (Any)
567     return AI;
568 
569   return None;
570 }
571 
572 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
573     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
574     const llvm::MachineFunction &MF)
575     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
576       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
577       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
578       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
579       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
580       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
581       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
582       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
583       Occupancy(MFI.getOccupancy()),
584       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
585       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
586       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
587       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
588   auto SFI = MFI.getOptionalScavengeFI();
589   if (SFI)
590     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
591 }
592 
593 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
594   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
595 }
596 
597 bool SIMachineFunctionInfo::initializeBaseYamlFields(
598     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
599     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
600   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
601   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
602   LDSSize = YamlMFI.LDSSize;
603   DynLDSAlign = YamlMFI.DynLDSAlign;
604   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
605   Occupancy = YamlMFI.Occupancy;
606   IsEntryFunction = YamlMFI.IsEntryFunction;
607   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
608   MemoryBound = YamlMFI.MemoryBound;
609   WaveLimiter = YamlMFI.WaveLimiter;
610   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
611   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
612 
613   if (YamlMFI.ScavengeFI) {
614     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
615     if (!FIOrErr) {
616       // Create a diagnostic for a the frame index.
617       const MemoryBuffer &Buffer =
618           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
619 
620       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
621                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
622                            "", None, None);
623       SourceRange = YamlMFI.ScavengeFI->SourceRange;
624       return true;
625     }
626     ScavengeFI = *FIOrErr;
627   } else {
628     ScavengeFI = None;
629   }
630   return false;
631 }
632 
633 // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
634 bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
635                                                    MachineFunction &MF) {
636   for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
637     if (i->VGPR == ReservedVGPR) {
638       SpillVGPRs.erase(i);
639 
640       for (MachineBasicBlock &MBB : MF) {
641         MBB.removeLiveIn(ReservedVGPR);
642         MBB.sortUniqueLiveIns();
643       }
644       this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
645       return true;
646     }
647   }
648   return false;
649 }
650