1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include <cassert>
26 #include <vector>
27 
28 #define MAX_LANES 64
29 
30 using namespace llvm;
31 
32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33   : AMDGPUMachineFunction(MF),
34     PrivateSegmentBuffer(false),
35     DispatchPtr(false),
36     QueuePtr(false),
37     KernargSegmentPtr(false),
38     DispatchID(false),
39     FlatScratchInit(false),
40     WorkGroupIDX(false),
41     WorkGroupIDY(false),
42     WorkGroupIDZ(false),
43     WorkGroupInfo(false),
44     PrivateSegmentWaveByteOffset(false),
45     WorkItemIDX(false),
46     WorkItemIDY(false),
47     WorkItemIDZ(false),
48     ImplicitBufferPtr(false),
49     ImplicitArgPtr(false),
50     GITPtrHigh(0xffffffff),
51     HighBitsOf32BitAddress(0) {
52   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
53   const Function &F = MF.getFunction();
54   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
55   WavesPerEU = ST.getWavesPerEU(F);
56 
57   Occupancy = ST.computeOccupancy(F, getLDSSize());
58   CallingConv::ID CC = F.getCallingConv();
59 
60   // FIXME: Should have analysis or something rather than attribute to detect
61   // calls.
62   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
63 
64   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
65                         CC == CallingConv::SPIR_KERNEL;
66 
67   if (IsKernel) {
68     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
69       KernargSegmentPtr = true;
70     WorkGroupIDX = true;
71     WorkItemIDX = true;
72   } else if (CC == CallingConv::AMDGPU_PS) {
73     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
74   }
75 
76   MayNeedAGPRs = ST.hasMAIInsts();
77 
78   if (!isEntryFunction()) {
79     if (CC != CallingConv::AMDGPU_Gfx)
80       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
81 
82     // TODO: Pick a high register, and shift down, similar to a kernel.
83     FrameOffsetReg = AMDGPU::SGPR33;
84     StackPtrOffsetReg = AMDGPU::SGPR32;
85 
86     if (!ST.enableFlatScratch()) {
87       // Non-entry functions have no special inputs for now, other registers
88       // required for scratch access.
89       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
90 
91       ArgInfo.PrivateSegmentBuffer =
92         ArgDescriptor::createRegister(ScratchRSrcReg);
93     }
94 
95     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
96       ImplicitArgPtr = true;
97   } else {
98     ImplicitArgPtr = false;
99     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
100                                MaxKernArgAlign);
101 
102     if (ST.hasGFX90AInsts() &&
103         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
104         !mayUseAGPRs(MF))
105       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
106   }
107 
108   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
109   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
110     PrivateSegmentBuffer = true;
111   else if (ST.isMesaGfxShader(F))
112     ImplicitBufferPtr = true;
113 
114   if (!AMDGPU::isGraphics(CC)) {
115     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
116       WorkGroupIDX = true;
117 
118     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
119       WorkGroupIDY = true;
120 
121     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
122       WorkGroupIDZ = true;
123 
124     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
125       WorkItemIDX = true;
126 
127     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
128         ST.getMaxWorkitemID(F, 1) != 0)
129       WorkItemIDY = true;
130 
131     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
132         ST.getMaxWorkitemID(F, 2) != 0)
133       WorkItemIDZ = true;
134 
135     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
136       DispatchPtr = true;
137 
138     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
139       QueuePtr = true;
140 
141     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
142       DispatchID = true;
143   }
144 
145   // FIXME: This attribute is a hack, we just need an analysis on the function
146   // to look for allocas.
147   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
148 
149   // TODO: This could be refined a lot. The attribute is a poor way of
150   // detecting calls or stack objects that may require it before argument
151   // lowering.
152   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
153       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
154       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
155       !ST.flatScratchIsArchitected()) {
156     FlatScratchInit = true;
157   }
158 
159   if (isEntryFunction()) {
160     // X, XY, and XYZ are the only supported combinations, so make sure Y is
161     // enabled if Z is.
162     if (WorkItemIDZ)
163       WorkItemIDY = true;
164 
165     if (!ST.flatScratchIsArchitected()) {
166       PrivateSegmentWaveByteOffset = true;
167 
168       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
169       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
170           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
171         ArgInfo.PrivateSegmentWaveByteOffset =
172             ArgDescriptor::createRegister(AMDGPU::SGPR5);
173     }
174   }
175 
176   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
177   StringRef S = A.getValueAsString();
178   if (!S.empty())
179     S.consumeInteger(0, GITPtrHigh);
180 
181   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
182   S = A.getValueAsString();
183   if (!S.empty())
184     S.consumeInteger(0, HighBitsOf32BitAddress);
185 
186   // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
187   // VGPR available at all times. For now, reserve highest available VGPR. After
188   // RA, shift it to the lowest available unused VGPR if the one exist.
189   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
190     VGPRForAGPRCopy =
191         AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
192   }
193 }
194 
195 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
196   limitOccupancy(getMaxWavesPerEU());
197   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
198   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
199                  MF.getFunction()));
200 }
201 
202 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
203   const SIRegisterInfo &TRI) {
204   ArgInfo.PrivateSegmentBuffer =
205     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
206     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
207   NumUserSGPRs += 4;
208   return ArgInfo.PrivateSegmentBuffer.getRegister();
209 }
210 
211 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
212   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
213     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
214   NumUserSGPRs += 2;
215   return ArgInfo.DispatchPtr.getRegister();
216 }
217 
218 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
219   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
220     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
221   NumUserSGPRs += 2;
222   return ArgInfo.QueuePtr.getRegister();
223 }
224 
225 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
226   ArgInfo.KernargSegmentPtr
227     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
228     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
229   NumUserSGPRs += 2;
230   return ArgInfo.KernargSegmentPtr.getRegister();
231 }
232 
233 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
234   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
235     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
236   NumUserSGPRs += 2;
237   return ArgInfo.DispatchID.getRegister();
238 }
239 
240 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
241   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
242     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
243   NumUserSGPRs += 2;
244   return ArgInfo.FlatScratchInit.getRegister();
245 }
246 
247 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
248   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
249     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
250   NumUserSGPRs += 2;
251   return ArgInfo.ImplicitBufferPtr.getRegister();
252 }
253 
254 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
255                                              MCPhysReg Reg) {
256   for (unsigned I = 0; CSRegs[I]; ++I) {
257     if (CSRegs[I] == Reg)
258       return true;
259   }
260 
261   return false;
262 }
263 
264 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
265 /// SGPR spilling.
266 //
267 // FIXME: This only works after processFunctionBeforeFrameFinalized
268 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
269                                                       unsigned NumNeed) const {
270   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
271   unsigned WaveSize = ST.getWavefrontSize();
272   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
273 }
274 
275 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
276 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
277                                                     int FI) {
278   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
279 
280   // This has already been allocated.
281   if (!SpillLanes.empty())
282     return true;
283 
284   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
285   const SIRegisterInfo *TRI = ST.getRegisterInfo();
286   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
287   MachineRegisterInfo &MRI = MF.getRegInfo();
288   unsigned WaveSize = ST.getWavefrontSize();
289 
290   unsigned Size = FrameInfo.getObjectSize(FI);
291   unsigned NumLanes = Size / 4;
292 
293   if (NumLanes > WaveSize)
294     return false;
295 
296   assert(Size >= 4 && "invalid sgpr spill size");
297   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
298 
299   // Make sure to handle the case where a wide SGPR spill may span between two
300   // VGPRs.
301   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
302     Register LaneVGPR;
303     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
304 
305     if (VGPRIndex == 0) {
306       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
307       if (LaneVGPR == AMDGPU::NoRegister) {
308         // We have no VGPRs left for spilling SGPRs. Reset because we will not
309         // partially spill the SGPR to VGPRs.
310         SGPRToVGPRSpills.erase(FI);
311         NumVGPRSpillLanes -= I;
312 
313         // FIXME: We can run out of free registers with split allocation if
314         // IPRA is enabled and a called function already uses every VGPR.
315 #if 0
316         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
317                                                   "VGPRs for SGPR spilling",
318                                                   0, DS_Error);
319         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
320 #endif
321         return false;
322       }
323 
324       Optional<int> SpillFI;
325       // We need to preserve inactive lanes, so always save, even caller-save
326       // registers.
327       if (!isEntryFunction()) {
328         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
329       }
330 
331       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
332 
333       // Add this register as live-in to all blocks to avoid machine verifier
334       // complaining about use of an undefined physical register.
335       for (MachineBasicBlock &BB : MF)
336         BB.addLiveIn(LaneVGPR);
337     } else {
338       LaneVGPR = SpillVGPRs.back().VGPR;
339     }
340 
341     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
342   }
343 
344   return true;
345 }
346 
347 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
348 /// Either AGPR is spilled to VGPR to vice versa.
349 /// Returns true if a \p FI can be eliminated completely.
350 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
351                                                     int FI,
352                                                     bool isAGPRtoVGPR) {
353   MachineRegisterInfo &MRI = MF.getRegInfo();
354   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
355   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
356 
357   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
358 
359   auto &Spill = VGPRToAGPRSpills[FI];
360 
361   // This has already been allocated.
362   if (!Spill.Lanes.empty())
363     return Spill.FullyAllocated;
364 
365   unsigned Size = FrameInfo.getObjectSize(FI);
366   unsigned NumLanes = Size / 4;
367   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
368 
369   const TargetRegisterClass &RC =
370       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
371   auto Regs = RC.getRegisters();
372 
373   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
374   const SIRegisterInfo *TRI = ST.getRegisterInfo();
375   Spill.FullyAllocated = true;
376 
377   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
378   // once.
379   BitVector OtherUsedRegs;
380   OtherUsedRegs.resize(TRI->getNumRegs());
381 
382   const uint32_t *CSRMask =
383       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
384   if (CSRMask)
385     OtherUsedRegs.setBitsInMask(CSRMask);
386 
387   // TODO: Should include register tuples, but doesn't matter with current
388   // usage.
389   for (MCPhysReg Reg : SpillAGPR)
390     OtherUsedRegs.set(Reg);
391   for (MCPhysReg Reg : SpillVGPR)
392     OtherUsedRegs.set(Reg);
393 
394   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
395   for (int I = NumLanes - 1; I >= 0; --I) {
396     NextSpillReg = std::find_if(
397         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
398           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
399                  !OtherUsedRegs[Reg];
400         });
401 
402     if (NextSpillReg == Regs.end()) { // Registers exhausted
403       Spill.FullyAllocated = false;
404       break;
405     }
406 
407     OtherUsedRegs.set(*NextSpillReg);
408     SpillRegs.push_back(*NextSpillReg);
409     Spill.Lanes[I] = *NextSpillReg++;
410   }
411 
412   return Spill.FullyAllocated;
413 }
414 
415 bool SIMachineFunctionInfo::removeDeadFrameIndices(
416     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
417   // Remove dead frame indices from function frame, however keep FP & BP since
418   // spills for them haven't been inserted yet. And also make sure to remove the
419   // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
420   // result in an unexpected side effect and bug, in case of any re-mapping of
421   // freed frame indices by later pass(es) like "stack slot coloring".
422   for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
423     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
424       MFI.RemoveStackObject(R.first);
425       SGPRToVGPRSpills.erase(R.first);
426     }
427   }
428 
429   bool HaveSGPRToMemory = false;
430 
431   if (ResetSGPRSpillStackIDs) {
432     // All other SPGRs must be allocated on the default stack, so reset the
433     // stack ID.
434     for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
435          ++i) {
436       if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
437         if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
438           MFI.setStackID(i, TargetStackID::Default);
439           HaveSGPRToMemory = true;
440         }
441       }
442     }
443   }
444 
445   for (auto &R : VGPRToAGPRSpills) {
446     if (R.second.IsDead)
447       MFI.RemoveStackObject(R.first);
448   }
449 
450   return HaveSGPRToMemory;
451 }
452 
453 void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
454     MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
455   assert(WWMReservedFrameIndexes.empty());
456 
457   WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
458 
459   int I = 0;
460   for (Register VGPR : WWMReservedRegs) {
461     const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
462     WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
463         TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
464   }
465 }
466 
467 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
468                                          const SIRegisterInfo &TRI) {
469   if (ScavengeFI)
470     return *ScavengeFI;
471   if (isEntryFunction()) {
472     ScavengeFI = MFI.CreateFixedObject(
473         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
474   } else {
475     ScavengeFI = MFI.CreateStackObject(
476         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
477         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
478   }
479   return *ScavengeFI;
480 }
481 
482 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
483   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
484   return AMDGPU::SGPR0 + NumUserSGPRs;
485 }
486 
487 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
488   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
489 }
490 
491 Register
492 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
493   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
494   if (!ST.isAmdPalOS())
495     return Register();
496   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
497   if (ST.hasMergedShaders()) {
498     switch (MF.getFunction().getCallingConv()) {
499     case CallingConv::AMDGPU_HS:
500     case CallingConv::AMDGPU_GS:
501       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
502       // ES+GS merged shader on gfx9+.
503       GitPtrLo = AMDGPU::SGPR8;
504       return GitPtrLo;
505     default:
506       return GitPtrLo;
507     }
508   }
509   return GitPtrLo;
510 }
511 
512 static yaml::StringValue regToString(Register Reg,
513                                      const TargetRegisterInfo &TRI) {
514   yaml::StringValue Dest;
515   {
516     raw_string_ostream OS(Dest.Value);
517     OS << printReg(Reg, &TRI);
518   }
519   return Dest;
520 }
521 
522 static Optional<yaml::SIArgumentInfo>
523 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
524                     const TargetRegisterInfo &TRI) {
525   yaml::SIArgumentInfo AI;
526 
527   auto convertArg = [&](Optional<yaml::SIArgument> &A,
528                         const ArgDescriptor &Arg) {
529     if (!Arg)
530       return false;
531 
532     // Create a register or stack argument.
533     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
534     if (Arg.isRegister()) {
535       raw_string_ostream OS(SA.RegisterName.Value);
536       OS << printReg(Arg.getRegister(), &TRI);
537     } else
538       SA.StackOffset = Arg.getStackOffset();
539     // Check and update the optional mask.
540     if (Arg.isMasked())
541       SA.Mask = Arg.getMask();
542 
543     A = SA;
544     return true;
545   };
546 
547   bool Any = false;
548   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
549   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
550   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
551   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
552   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
553   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
554   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
555   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
556   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
557   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
558   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
559   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
560                     ArgInfo.PrivateSegmentWaveByteOffset);
561   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
562   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
563   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
564   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
565   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
566 
567   if (Any)
568     return AI;
569 
570   return None;
571 }
572 
573 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
574     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
575     const llvm::MachineFunction &MF)
576     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
577       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
578       GDSSize(MFI.getGDSSize()),
579       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
580       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
581       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
582       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
583       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
584       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
585       Occupancy(MFI.getOccupancy()),
586       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
587       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
588       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
589       BytesInStackArgArea(MFI.getBytesInStackArgArea()),
590       ReturnsVoid(MFI.returnsVoid()),
591       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
592   for (Register Reg : MFI.WWMReservedRegs)
593     WWMReservedRegs.push_back(regToString(Reg, TRI));
594 
595   if (MFI.getVGPRForAGPRCopy())
596     VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
597   auto SFI = MFI.getOptionalScavengeFI();
598   if (SFI)
599     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
600 }
601 
602 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
603   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
604 }
605 
606 bool SIMachineFunctionInfo::initializeBaseYamlFields(
607     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
608     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
609   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
610   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
611   LDSSize = YamlMFI.LDSSize;
612   GDSSize = YamlMFI.GDSSize;
613   DynLDSAlign = YamlMFI.DynLDSAlign;
614   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
615   Occupancy = YamlMFI.Occupancy;
616   IsEntryFunction = YamlMFI.IsEntryFunction;
617   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
618   MemoryBound = YamlMFI.MemoryBound;
619   WaveLimiter = YamlMFI.WaveLimiter;
620   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
621   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
622   BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
623   ReturnsVoid = YamlMFI.ReturnsVoid;
624 
625   if (YamlMFI.ScavengeFI) {
626     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
627     if (!FIOrErr) {
628       // Create a diagnostic for a the frame index.
629       const MemoryBuffer &Buffer =
630           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
631 
632       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
633                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
634                            "", None, None);
635       SourceRange = YamlMFI.ScavengeFI->SourceRange;
636       return true;
637     }
638     ScavengeFI = *FIOrErr;
639   } else {
640     ScavengeFI = None;
641   }
642   return false;
643 }
644 
645 bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
646   for (const BasicBlock &BB : MF.getFunction()) {
647     for (const Instruction &I : BB) {
648       const auto *CB = dyn_cast<CallBase>(&I);
649       if (!CB)
650         continue;
651 
652       if (CB->isInlineAsm()) {
653         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
654         for (const auto &CI : IA->ParseConstraints()) {
655           for (StringRef Code : CI.Codes) {
656             Code.consume_front("{");
657             if (Code.startswith("a"))
658               return true;
659           }
660         }
661         continue;
662       }
663 
664       const Function *Callee =
665           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
666       if (!Callee)
667         return true;
668 
669       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
670         return true;
671     }
672   }
673 
674   return false;
675 }
676 
677 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
678   if (UsesAGPRs)
679     return *UsesAGPRs;
680 
681   if (!mayNeedAGPRs()) {
682     UsesAGPRs = false;
683     return false;
684   }
685 
686   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
687       MF.getFrameInfo().hasCalls()) {
688     UsesAGPRs = true;
689     return true;
690   }
691 
692   const MachineRegisterInfo &MRI = MF.getRegInfo();
693 
694   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
695     const Register Reg = Register::index2VirtReg(I);
696     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
697     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
698       UsesAGPRs = true;
699       return true;
700     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
701       // Defer caching UsesAGPRs, function might not yet been regbank selected.
702       return true;
703     }
704   }
705 
706   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
707     if (MRI.isPhysRegUsed(Reg)) {
708       UsesAGPRs = true;
709       return true;
710     }
711   }
712 
713   UsesAGPRs = false;
714   return false;
715 }
716