1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUArgumentUsageInfo.h"
11 #include "AMDGPUTargetMachine.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIRegisterInfo.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Function.h"
23 #include <cassert>
24 #include <vector>
25 
26 #define MAX_LANES 64
27 
28 using namespace llvm;
29 
30 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
31   : AMDGPUMachineFunction(MF),
32     PrivateSegmentBuffer(false),
33     DispatchPtr(false),
34     QueuePtr(false),
35     KernargSegmentPtr(false),
36     DispatchID(false),
37     FlatScratchInit(false),
38     WorkGroupIDX(false),
39     WorkGroupIDY(false),
40     WorkGroupIDZ(false),
41     WorkGroupInfo(false),
42     PrivateSegmentWaveByteOffset(false),
43     WorkItemIDX(false),
44     WorkItemIDY(false),
45     WorkItemIDZ(false),
46     ImplicitBufferPtr(false),
47     ImplicitArgPtr(false),
48     GITPtrHigh(0xffffffff),
49     HighBitsOf32BitAddress(0),
50     GDSSize(0) {
51   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
52   const Function &F = MF.getFunction();
53   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
54   WavesPerEU = ST.getWavesPerEU(F);
55 
56   Occupancy = ST.computeOccupancy(F, getLDSSize());
57   CallingConv::ID CC = F.getCallingConv();
58 
59   // FIXME: Should have analysis or something rather than attribute to detect
60   // calls.
61   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
62 
63   // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
64   // have any calls.
65   const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
66                            (!isEntryFunction() || HasCalls);
67 
68   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
69     if (!F.arg_empty())
70       KernargSegmentPtr = true;
71     WorkGroupIDX = true;
72     WorkItemIDX = true;
73   } else if (CC == CallingConv::AMDGPU_PS) {
74     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
75   }
76 
77   if (!isEntryFunction()) {
78     // Non-entry functions have no special inputs for now, other registers
79     // required for scratch access.
80     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
81 
82     // TODO: Pick a high register, and shift down, similar to a kernel.
83     FrameOffsetReg = AMDGPU::SGPR33;
84     StackPtrOffsetReg = AMDGPU::SGPR32;
85 
86     ArgInfo.PrivateSegmentBuffer =
87       ArgDescriptor::createRegister(ScratchRSrcReg);
88 
89     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
90       ImplicitArgPtr = true;
91   } else {
92     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
93       KernargSegmentPtr = true;
94       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
95                                  MaxKernArgAlign);
96     }
97   }
98 
99   if (UseFixedABI) {
100     WorkGroupIDX = true;
101     WorkGroupIDY = true;
102     WorkGroupIDZ = true;
103     WorkItemIDX = true;
104     WorkItemIDY = true;
105     WorkItemIDZ = true;
106     ImplicitArgPtr = true;
107   } else {
108     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
109       WorkGroupIDX = true;
110 
111     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
112       WorkGroupIDY = true;
113 
114     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
115       WorkGroupIDZ = true;
116 
117     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
118       WorkItemIDX = true;
119 
120     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
121       WorkItemIDY = true;
122 
123     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
124       WorkItemIDZ = true;
125   }
126 
127   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
128   if (isEntryFunction()) {
129     // X, XY, and XYZ are the only supported combinations, so make sure Y is
130     // enabled if Z is.
131     if (WorkItemIDZ)
132       WorkItemIDY = true;
133 
134     PrivateSegmentWaveByteOffset = true;
135 
136     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
137     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
138         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
139       ArgInfo.PrivateSegmentWaveByteOffset =
140           ArgDescriptor::createRegister(AMDGPU::SGPR5);
141   }
142 
143   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
144   if (isAmdHsaOrMesa) {
145     PrivateSegmentBuffer = true;
146 
147     if (UseFixedABI) {
148       DispatchPtr = true;
149       QueuePtr = true;
150 
151       // FIXME: We don't need this?
152       DispatchID = true;
153     } else {
154       if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
155         DispatchPtr = true;
156 
157       if (F.hasFnAttribute("amdgpu-queue-ptr"))
158         QueuePtr = true;
159 
160       if (F.hasFnAttribute("amdgpu-dispatch-id"))
161         DispatchID = true;
162     }
163   } else if (ST.isMesaGfxShader(F)) {
164     ImplicitBufferPtr = true;
165   }
166 
167   if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
168     KernargSegmentPtr = true;
169 
170   if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
171     // TODO: This could be refined a lot. The attribute is a poor way of
172     // detecting calls or stack objects that may require it before argument
173     // lowering.
174     if (HasCalls || HasStackObjects)
175       FlatScratchInit = true;
176   }
177 
178   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
179   StringRef S = A.getValueAsString();
180   if (!S.empty())
181     S.consumeInteger(0, GITPtrHigh);
182 
183   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
184   S = A.getValueAsString();
185   if (!S.empty())
186     S.consumeInteger(0, HighBitsOf32BitAddress);
187 
188   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
189   if (!S.empty())
190     S.consumeInteger(0, GDSSize);
191 }
192 
193 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
194   limitOccupancy(getMaxWavesPerEU());
195   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
196   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
197                  MF.getFunction()));
198 }
199 
200 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
201   const SIRegisterInfo &TRI) {
202   ArgInfo.PrivateSegmentBuffer =
203     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
204     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
205   NumUserSGPRs += 4;
206   return ArgInfo.PrivateSegmentBuffer.getRegister();
207 }
208 
209 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
210   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
211     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
212   NumUserSGPRs += 2;
213   return ArgInfo.DispatchPtr.getRegister();
214 }
215 
216 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
217   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
218     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
219   NumUserSGPRs += 2;
220   return ArgInfo.QueuePtr.getRegister();
221 }
222 
223 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
224   ArgInfo.KernargSegmentPtr
225     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
226     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
227   NumUserSGPRs += 2;
228   return ArgInfo.KernargSegmentPtr.getRegister();
229 }
230 
231 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
232   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
233     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
234   NumUserSGPRs += 2;
235   return ArgInfo.DispatchID.getRegister();
236 }
237 
238 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
239   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
240     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
241   NumUserSGPRs += 2;
242   return ArgInfo.FlatScratchInit.getRegister();
243 }
244 
245 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
246   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
247     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
248   NumUserSGPRs += 2;
249   return ArgInfo.ImplicitBufferPtr.getRegister();
250 }
251 
252 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
253                                              MCPhysReg Reg) {
254   for (unsigned I = 0; CSRegs[I]; ++I) {
255     if (CSRegs[I] == Reg)
256       return true;
257   }
258 
259   return false;
260 }
261 
262 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
263 /// SGPR spilling.
264 //
265 // FIXME: This only works after processFunctionBeforeFrameFinalized
266 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
267                                                       unsigned NumNeed) const {
268   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
269   unsigned WaveSize = ST.getWavefrontSize();
270   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
271 }
272 
273 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
274 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
275                                                     int FI) {
276   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
277 
278   // This has already been allocated.
279   if (!SpillLanes.empty())
280     return true;
281 
282   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
283   const SIRegisterInfo *TRI = ST.getRegisterInfo();
284   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
285   MachineRegisterInfo &MRI = MF.getRegInfo();
286   unsigned WaveSize = ST.getWavefrontSize();
287   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
288 
289   unsigned Size = FrameInfo.getObjectSize(FI);
290   unsigned NumLanes = Size / 4;
291 
292   if (NumLanes > WaveSize)
293     return false;
294 
295   assert(Size >= 4 && "invalid sgpr spill size");
296   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
297 
298   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
299 
300   // Make sure to handle the case where a wide SGPR spill may span between two
301   // VGPRs.
302   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
303     Register LaneVGPR;
304     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
305 
306     // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
307     // when one of the two conditions is true:
308     // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
309     // reserved.
310     // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
311     // required.
312     if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
313       assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
314       LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
315     } else if (VGPRIndex == 0) {
316       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
317       if (LaneVGPR == AMDGPU::NoRegister) {
318         // We have no VGPRs left for spilling SGPRs. Reset because we will not
319         // partially spill the SGPR to VGPRs.
320         SGPRToVGPRSpills.erase(FI);
321         NumVGPRSpillLanes -= I;
322         return false;
323       }
324 
325       Optional<int> CSRSpillFI;
326       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
327           isCalleeSavedReg(CSRegs, LaneVGPR)) {
328         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
329       }
330 
331       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
332 
333       // Add this register as live-in to all blocks to avoid machine verifer
334       // complaining about use of an undefined physical register.
335       for (MachineBasicBlock &BB : MF)
336         BB.addLiveIn(LaneVGPR);
337     } else {
338       LaneVGPR = SpillVGPRs.back().VGPR;
339     }
340 
341     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
342   }
343 
344   return true;
345 }
346 
347 /// Reserve a VGPR for spilling of SGPRs
348 bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
349   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
350   const SIRegisterInfo *TRI = ST.getRegisterInfo();
351   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
352 
353   Register LaneVGPR = TRI->findUnusedRegister(
354       MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
355   if (LaneVGPR == Register())
356     return false;
357   SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None));
358   FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
359   return true;
360 }
361 
362 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
363 /// Either AGPR is spilled to VGPR to vice versa.
364 /// Returns true if a \p FI can be eliminated completely.
365 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
366                                                     int FI,
367                                                     bool isAGPRtoVGPR) {
368   MachineRegisterInfo &MRI = MF.getRegInfo();
369   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
370   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
371 
372   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
373 
374   auto &Spill = VGPRToAGPRSpills[FI];
375 
376   // This has already been allocated.
377   if (!Spill.Lanes.empty())
378     return Spill.FullyAllocated;
379 
380   unsigned Size = FrameInfo.getObjectSize(FI);
381   unsigned NumLanes = Size / 4;
382   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
383 
384   const TargetRegisterClass &RC =
385       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
386   auto Regs = RC.getRegisters();
387 
388   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
389   const SIRegisterInfo *TRI = ST.getRegisterInfo();
390   Spill.FullyAllocated = true;
391 
392   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
393   // once.
394   BitVector OtherUsedRegs;
395   OtherUsedRegs.resize(TRI->getNumRegs());
396 
397   const uint32_t *CSRMask =
398       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
399   if (CSRMask)
400     OtherUsedRegs.setBitsInMask(CSRMask);
401 
402   // TODO: Should include register tuples, but doesn't matter with current
403   // usage.
404   for (MCPhysReg Reg : SpillAGPR)
405     OtherUsedRegs.set(Reg);
406   for (MCPhysReg Reg : SpillVGPR)
407     OtherUsedRegs.set(Reg);
408 
409   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
410   for (unsigned I = 0; I < NumLanes; ++I) {
411     NextSpillReg = std::find_if(
412         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
413           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
414                  !OtherUsedRegs[Reg];
415         });
416 
417     if (NextSpillReg == Regs.end()) { // Registers exhausted
418       Spill.FullyAllocated = false;
419       break;
420     }
421 
422     OtherUsedRegs.set(*NextSpillReg);
423     SpillRegs.push_back(*NextSpillReg);
424     Spill.Lanes[I] = *NextSpillReg++;
425   }
426 
427   return Spill.FullyAllocated;
428 }
429 
430 void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
431   // The FP & BP spills haven't been inserted yet, so keep them around.
432   for (auto &R : SGPRToVGPRSpills) {
433     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
434       MFI.RemoveStackObject(R.first);
435   }
436 
437   // All other SPGRs must be allocated on the default stack, so reset the stack
438   // ID.
439   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
440        ++i)
441     if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
442       MFI.setStackID(i, TargetStackID::Default);
443 
444   for (auto &R : VGPRToAGPRSpills) {
445     if (R.second.FullyAllocated)
446       MFI.RemoveStackObject(R.first);
447   }
448 }
449 
450 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
451   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
452   return AMDGPU::SGPR0 + NumUserSGPRs;
453 }
454 
455 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
456   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
457 }
458 
459 Register
460 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
461   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
462   if (!ST.isAmdPalOS())
463     return Register();
464   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
465   if (ST.hasMergedShaders()) {
466     switch (MF.getFunction().getCallingConv()) {
467     case CallingConv::AMDGPU_HS:
468     case CallingConv::AMDGPU_GS:
469       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
470       // ES+GS merged shader on gfx9+.
471       GitPtrLo = AMDGPU::SGPR8;
472       return GitPtrLo;
473     default:
474       return GitPtrLo;
475     }
476   }
477   return GitPtrLo;
478 }
479 
480 static yaml::StringValue regToString(Register Reg,
481                                      const TargetRegisterInfo &TRI) {
482   yaml::StringValue Dest;
483   {
484     raw_string_ostream OS(Dest.Value);
485     OS << printReg(Reg, &TRI);
486   }
487   return Dest;
488 }
489 
490 static Optional<yaml::SIArgumentInfo>
491 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
492                     const TargetRegisterInfo &TRI) {
493   yaml::SIArgumentInfo AI;
494 
495   auto convertArg = [&](Optional<yaml::SIArgument> &A,
496                         const ArgDescriptor &Arg) {
497     if (!Arg)
498       return false;
499 
500     // Create a register or stack argument.
501     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
502     if (Arg.isRegister()) {
503       raw_string_ostream OS(SA.RegisterName.Value);
504       OS << printReg(Arg.getRegister(), &TRI);
505     } else
506       SA.StackOffset = Arg.getStackOffset();
507     // Check and update the optional mask.
508     if (Arg.isMasked())
509       SA.Mask = Arg.getMask();
510 
511     A = SA;
512     return true;
513   };
514 
515   bool Any = false;
516   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
517   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
518   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
519   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
520   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
521   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
522   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
523   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
524   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
525   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
526   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
527   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
528                     ArgInfo.PrivateSegmentWaveByteOffset);
529   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
530   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
531   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
532   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
533   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
534 
535   if (Any)
536     return AI;
537 
538   return None;
539 }
540 
541 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
542     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI)
543     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
544       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
545       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
546       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
547       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
548       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
549       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
550       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
551       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
552       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
553       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
554       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
555 }
556 
557 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
558   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
559 }
560 
561 bool SIMachineFunctionInfo::initializeBaseYamlFields(
562   const yaml::SIMachineFunctionInfo &YamlMFI) {
563   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
564   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
565   LDSSize = YamlMFI.LDSSize;
566   DynLDSAlign = YamlMFI.DynLDSAlign;
567   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
568   IsEntryFunction = YamlMFI.IsEntryFunction;
569   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
570   MemoryBound = YamlMFI.MemoryBound;
571   WaveLimiter = YamlMFI.WaveLimiter;
572   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
573   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
574   return false;
575 }
576 
577 // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
578 bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
579                                                    MachineFunction &MF) {
580   for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
581     if (i->VGPR == ReservedVGPR) {
582       SpillVGPRs.erase(i);
583 
584       for (MachineBasicBlock &MBB : MF) {
585         MBB.removeLiveIn(ReservedVGPR);
586         MBB.sortUniqueLiveIns();
587       }
588       this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
589       return true;
590     }
591   }
592   return false;
593 }
594