1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include <cassert>
26 #include <vector>
27 
28 #define MAX_LANES 64
29 
30 using namespace llvm;
31 
32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33   : AMDGPUMachineFunction(MF),
34     PrivateSegmentBuffer(false),
35     DispatchPtr(false),
36     QueuePtr(false),
37     KernargSegmentPtr(false),
38     DispatchID(false),
39     FlatScratchInit(false),
40     WorkGroupIDX(false),
41     WorkGroupIDY(false),
42     WorkGroupIDZ(false),
43     WorkGroupInfo(false),
44     PrivateSegmentWaveByteOffset(false),
45     WorkItemIDX(false),
46     WorkItemIDY(false),
47     WorkItemIDZ(false),
48     ImplicitBufferPtr(false),
49     ImplicitArgPtr(false),
50     GITPtrHigh(0xffffffff),
51     HighBitsOf32BitAddress(0) {
52   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
53   const Function &F = MF.getFunction();
54   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
55   WavesPerEU = ST.getWavesPerEU(F);
56 
57   Occupancy = ST.computeOccupancy(F, getLDSSize());
58   CallingConv::ID CC = F.getCallingConv();
59 
60   // FIXME: Should have analysis or something rather than attribute to detect
61   // calls.
62   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
63 
64   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
65                         CC == CallingConv::SPIR_KERNEL;
66 
67   if (IsKernel) {
68     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
69       KernargSegmentPtr = true;
70     WorkGroupIDX = true;
71     WorkItemIDX = true;
72   } else if (CC == CallingConv::AMDGPU_PS) {
73     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
74   }
75 
76   MayNeedAGPRs = ST.hasMAIInsts();
77 
78   if (!isEntryFunction()) {
79     if (CC != CallingConv::AMDGPU_Gfx)
80       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
81 
82     // TODO: Pick a high register, and shift down, similar to a kernel.
83     FrameOffsetReg = AMDGPU::SGPR33;
84     StackPtrOffsetReg = AMDGPU::SGPR32;
85 
86     if (!ST.enableFlatScratch()) {
87       // Non-entry functions have no special inputs for now, other registers
88       // required for scratch access.
89       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
90 
91       ArgInfo.PrivateSegmentBuffer =
92         ArgDescriptor::createRegister(ScratchRSrcReg);
93     }
94 
95     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
96       ImplicitArgPtr = true;
97   } else {
98     ImplicitArgPtr = false;
99     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
100                                MaxKernArgAlign);
101 
102     if (ST.hasGFX90AInsts() &&
103         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
104         !mayUseAGPRs(MF))
105       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
106   }
107 
108   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
109   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
110     PrivateSegmentBuffer = true;
111   else if (ST.isMesaGfxShader(F))
112     ImplicitBufferPtr = true;
113 
114   if (!AMDGPU::isGraphics(CC)) {
115     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
116       WorkGroupIDX = true;
117 
118     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
119       WorkGroupIDY = true;
120 
121     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
122       WorkGroupIDZ = true;
123 
124     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
125       WorkItemIDX = true;
126 
127     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
128         ST.getMaxWorkitemID(F, 1) != 0)
129       WorkItemIDY = true;
130 
131     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
132         ST.getMaxWorkitemID(F, 2) != 0)
133       WorkItemIDZ = true;
134 
135     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
136       DispatchPtr = true;
137 
138     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
139       QueuePtr = true;
140 
141     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
142       DispatchID = true;
143   }
144 
145   // FIXME: This attribute is a hack, we just need an analysis on the function
146   // to look for allocas.
147   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
148 
149   // TODO: This could be refined a lot. The attribute is a poor way of
150   // detecting calls or stack objects that may require it before argument
151   // lowering.
152   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
153       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
154       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
155       !ST.flatScratchIsArchitected()) {
156     FlatScratchInit = true;
157   }
158 
159   if (isEntryFunction()) {
160     // X, XY, and XYZ are the only supported combinations, so make sure Y is
161     // enabled if Z is.
162     if (WorkItemIDZ)
163       WorkItemIDY = true;
164 
165     if (!ST.flatScratchIsArchitected()) {
166       PrivateSegmentWaveByteOffset = true;
167 
168       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
169       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
170           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
171         ArgInfo.PrivateSegmentWaveByteOffset =
172             ArgDescriptor::createRegister(AMDGPU::SGPR5);
173     }
174   }
175 
176   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
177   StringRef S = A.getValueAsString();
178   if (!S.empty())
179     S.consumeInteger(0, GITPtrHigh);
180 
181   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
182   S = A.getValueAsString();
183   if (!S.empty())
184     S.consumeInteger(0, HighBitsOf32BitAddress);
185 
186   // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
187   // VGPR available at all times.
188   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
189     VGPRForAGPRCopy = AMDGPU::VGPR_32RegClass.getRegister(32);
190   }
191 }
192 
193 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
194   limitOccupancy(getMaxWavesPerEU());
195   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
196   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
197                  MF.getFunction()));
198 }
199 
200 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
201   const SIRegisterInfo &TRI) {
202   ArgInfo.PrivateSegmentBuffer =
203     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
204     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
205   NumUserSGPRs += 4;
206   return ArgInfo.PrivateSegmentBuffer.getRegister();
207 }
208 
209 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
210   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
211     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
212   NumUserSGPRs += 2;
213   return ArgInfo.DispatchPtr.getRegister();
214 }
215 
216 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
217   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
218     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
219   NumUserSGPRs += 2;
220   return ArgInfo.QueuePtr.getRegister();
221 }
222 
223 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
224   ArgInfo.KernargSegmentPtr
225     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
226     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
227   NumUserSGPRs += 2;
228   return ArgInfo.KernargSegmentPtr.getRegister();
229 }
230 
231 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
232   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
233     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
234   NumUserSGPRs += 2;
235   return ArgInfo.DispatchID.getRegister();
236 }
237 
238 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
239   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
240     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
241   NumUserSGPRs += 2;
242   return ArgInfo.FlatScratchInit.getRegister();
243 }
244 
245 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
246   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
247     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
248   NumUserSGPRs += 2;
249   return ArgInfo.ImplicitBufferPtr.getRegister();
250 }
251 
252 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
253                                              MCPhysReg Reg) {
254   for (unsigned I = 0; CSRegs[I]; ++I) {
255     if (CSRegs[I] == Reg)
256       return true;
257   }
258 
259   return false;
260 }
261 
262 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
263 /// SGPR spilling.
264 //
265 // FIXME: This only works after processFunctionBeforeFrameFinalized
266 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
267                                                       unsigned NumNeed) const {
268   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
269   unsigned WaveSize = ST.getWavefrontSize();
270   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
271 }
272 
273 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
274 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
275                                                     int FI) {
276   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
277 
278   // This has already been allocated.
279   if (!SpillLanes.empty())
280     return true;
281 
282   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
283   const SIRegisterInfo *TRI = ST.getRegisterInfo();
284   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
285   MachineRegisterInfo &MRI = MF.getRegInfo();
286   unsigned WaveSize = ST.getWavefrontSize();
287 
288   unsigned Size = FrameInfo.getObjectSize(FI);
289   unsigned NumLanes = Size / 4;
290 
291   if (NumLanes > WaveSize)
292     return false;
293 
294   assert(Size >= 4 && "invalid sgpr spill size");
295   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
296 
297   // Make sure to handle the case where a wide SGPR spill may span between two
298   // VGPRs.
299   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
300     Register LaneVGPR;
301     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
302 
303     if (VGPRIndex == 0) {
304       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
305       if (LaneVGPR == AMDGPU::NoRegister) {
306         // We have no VGPRs left for spilling SGPRs. Reset because we will not
307         // partially spill the SGPR to VGPRs.
308         SGPRToVGPRSpills.erase(FI);
309         NumVGPRSpillLanes -= I;
310 
311         // FIXME: We can run out of free registers with split allocation if
312         // IPRA is enabled and a called function already uses every VGPR.
313 #if 0
314         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
315                                                   "VGPRs for SGPR spilling",
316                                                   0, DS_Error);
317         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
318 #endif
319         return false;
320       }
321 
322       Optional<int> SpillFI;
323       // We need to preserve inactive lanes, so always save, even caller-save
324       // registers.
325       if (!isEntryFunction()) {
326         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
327       }
328 
329       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
330 
331       // Add this register as live-in to all blocks to avoid machine verifier
332       // complaining about use of an undefined physical register.
333       for (MachineBasicBlock &BB : MF)
334         BB.addLiveIn(LaneVGPR);
335     } else {
336       LaneVGPR = SpillVGPRs.back().VGPR;
337     }
338 
339     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
340   }
341 
342   return true;
343 }
344 
345 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
346 /// Either AGPR is spilled to VGPR to vice versa.
347 /// Returns true if a \p FI can be eliminated completely.
348 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
349                                                     int FI,
350                                                     bool isAGPRtoVGPR) {
351   MachineRegisterInfo &MRI = MF.getRegInfo();
352   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
353   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
354 
355   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
356 
357   auto &Spill = VGPRToAGPRSpills[FI];
358 
359   // This has already been allocated.
360   if (!Spill.Lanes.empty())
361     return Spill.FullyAllocated;
362 
363   unsigned Size = FrameInfo.getObjectSize(FI);
364   unsigned NumLanes = Size / 4;
365   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
366 
367   const TargetRegisterClass &RC =
368       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
369   auto Regs = RC.getRegisters();
370 
371   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
372   const SIRegisterInfo *TRI = ST.getRegisterInfo();
373   Spill.FullyAllocated = true;
374 
375   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
376   // once.
377   BitVector OtherUsedRegs;
378   OtherUsedRegs.resize(TRI->getNumRegs());
379 
380   const uint32_t *CSRMask =
381       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
382   if (CSRMask)
383     OtherUsedRegs.setBitsInMask(CSRMask);
384 
385   // TODO: Should include register tuples, but doesn't matter with current
386   // usage.
387   for (MCPhysReg Reg : SpillAGPR)
388     OtherUsedRegs.set(Reg);
389   for (MCPhysReg Reg : SpillVGPR)
390     OtherUsedRegs.set(Reg);
391 
392   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
393   for (int I = NumLanes - 1; I >= 0; --I) {
394     NextSpillReg = std::find_if(
395         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
396           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
397                  !OtherUsedRegs[Reg];
398         });
399 
400     if (NextSpillReg == Regs.end()) { // Registers exhausted
401       Spill.FullyAllocated = false;
402       break;
403     }
404 
405     OtherUsedRegs.set(*NextSpillReg);
406     SpillRegs.push_back(*NextSpillReg);
407     Spill.Lanes[I] = *NextSpillReg++;
408   }
409 
410   return Spill.FullyAllocated;
411 }
412 
413 bool SIMachineFunctionInfo::removeDeadFrameIndices(
414     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
415   // Remove dead frame indices from function frame, however keep FP & BP since
416   // spills for them haven't been inserted yet. And also make sure to remove the
417   // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
418   // result in an unexpected side effect and bug, in case of any re-mapping of
419   // freed frame indices by later pass(es) like "stack slot coloring".
420   for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
421     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
422       MFI.RemoveStackObject(R.first);
423       SGPRToVGPRSpills.erase(R.first);
424     }
425   }
426 
427   bool HaveSGPRToMemory = false;
428 
429   if (ResetSGPRSpillStackIDs) {
430     // All other SPGRs must be allocated on the default stack, so reset the
431     // stack ID.
432     for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
433          ++i) {
434       if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
435         if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
436           MFI.setStackID(i, TargetStackID::Default);
437           HaveSGPRToMemory = true;
438         }
439       }
440     }
441   }
442 
443   for (auto &R : VGPRToAGPRSpills) {
444     if (R.second.IsDead)
445       MFI.RemoveStackObject(R.first);
446   }
447 
448   return HaveSGPRToMemory;
449 }
450 
451 void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
452     MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
453   assert(WWMReservedFrameIndexes.empty());
454 
455   WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
456 
457   int I = 0;
458   for (Register VGPR : WWMReservedRegs) {
459     const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
460     WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
461         TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
462   }
463 }
464 
465 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
466                                          const SIRegisterInfo &TRI) {
467   if (ScavengeFI)
468     return *ScavengeFI;
469   if (isEntryFunction()) {
470     ScavengeFI = MFI.CreateFixedObject(
471         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
472   } else {
473     ScavengeFI = MFI.CreateStackObject(
474         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
475         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
476   }
477   return *ScavengeFI;
478 }
479 
480 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
481   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
482   return AMDGPU::SGPR0 + NumUserSGPRs;
483 }
484 
485 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
486   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
487 }
488 
489 Register
490 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
491   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
492   if (!ST.isAmdPalOS())
493     return Register();
494   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
495   if (ST.hasMergedShaders()) {
496     switch (MF.getFunction().getCallingConv()) {
497     case CallingConv::AMDGPU_HS:
498     case CallingConv::AMDGPU_GS:
499       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
500       // ES+GS merged shader on gfx9+.
501       GitPtrLo = AMDGPU::SGPR8;
502       return GitPtrLo;
503     default:
504       return GitPtrLo;
505     }
506   }
507   return GitPtrLo;
508 }
509 
510 static yaml::StringValue regToString(Register Reg,
511                                      const TargetRegisterInfo &TRI) {
512   yaml::StringValue Dest;
513   {
514     raw_string_ostream OS(Dest.Value);
515     OS << printReg(Reg, &TRI);
516   }
517   return Dest;
518 }
519 
520 static Optional<yaml::SIArgumentInfo>
521 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
522                     const TargetRegisterInfo &TRI) {
523   yaml::SIArgumentInfo AI;
524 
525   auto convertArg = [&](Optional<yaml::SIArgument> &A,
526                         const ArgDescriptor &Arg) {
527     if (!Arg)
528       return false;
529 
530     // Create a register or stack argument.
531     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
532     if (Arg.isRegister()) {
533       raw_string_ostream OS(SA.RegisterName.Value);
534       OS << printReg(Arg.getRegister(), &TRI);
535     } else
536       SA.StackOffset = Arg.getStackOffset();
537     // Check and update the optional mask.
538     if (Arg.isMasked())
539       SA.Mask = Arg.getMask();
540 
541     A = SA;
542     return true;
543   };
544 
545   bool Any = false;
546   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
547   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
548   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
549   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
550   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
551   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
552   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
553   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
554   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
555   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
556   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
557   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
558                     ArgInfo.PrivateSegmentWaveByteOffset);
559   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
560   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
561   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
562   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
563   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
564 
565   if (Any)
566     return AI;
567 
568   return None;
569 }
570 
571 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
572     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
573     const llvm::MachineFunction &MF)
574     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
575       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
576       GDSSize(MFI.getGDSSize()),
577       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
578       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
579       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
580       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
581       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
582       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
583       Occupancy(MFI.getOccupancy()),
584       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
585       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
586       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
587       BytesInStackArgArea(MFI.getBytesInStackArgArea()),
588       ReturnsVoid(MFI.returnsVoid()),
589       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
590   for (Register Reg : MFI.WWMReservedRegs)
591     WWMReservedRegs.push_back(regToString(Reg, TRI));
592 
593   if (MFI.getVGPRForAGPRCopy())
594     VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
595   auto SFI = MFI.getOptionalScavengeFI();
596   if (SFI)
597     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
598 }
599 
600 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
601   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
602 }
603 
604 bool SIMachineFunctionInfo::initializeBaseYamlFields(
605     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
606     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
607   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
608   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
609   LDSSize = YamlMFI.LDSSize;
610   GDSSize = YamlMFI.GDSSize;
611   DynLDSAlign = YamlMFI.DynLDSAlign;
612   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
613   Occupancy = YamlMFI.Occupancy;
614   IsEntryFunction = YamlMFI.IsEntryFunction;
615   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
616   MemoryBound = YamlMFI.MemoryBound;
617   WaveLimiter = YamlMFI.WaveLimiter;
618   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
619   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
620   BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
621   ReturnsVoid = YamlMFI.ReturnsVoid;
622 
623   if (YamlMFI.ScavengeFI) {
624     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
625     if (!FIOrErr) {
626       // Create a diagnostic for a the frame index.
627       const MemoryBuffer &Buffer =
628           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
629 
630       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
631                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
632                            "", None, None);
633       SourceRange = YamlMFI.ScavengeFI->SourceRange;
634       return true;
635     }
636     ScavengeFI = *FIOrErr;
637   } else {
638     ScavengeFI = None;
639   }
640   return false;
641 }
642 
643 bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
644   for (const BasicBlock &BB : MF.getFunction()) {
645     for (const Instruction &I : BB) {
646       const auto *CB = dyn_cast<CallBase>(&I);
647       if (!CB)
648         continue;
649 
650       if (CB->isInlineAsm()) {
651         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
652         for (const auto &CI : IA->ParseConstraints()) {
653           for (StringRef Code : CI.Codes) {
654             Code.consume_front("{");
655             if (Code.startswith("a"))
656               return true;
657           }
658         }
659         continue;
660       }
661 
662       const Function *Callee =
663           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
664       if (!Callee)
665         return true;
666 
667       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
668         return true;
669     }
670   }
671 
672   return false;
673 }
674 
675 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
676   if (UsesAGPRs)
677     return *UsesAGPRs;
678 
679   if (!mayNeedAGPRs()) {
680     UsesAGPRs = false;
681     return false;
682   }
683 
684   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
685       MF.getFrameInfo().hasCalls()) {
686     UsesAGPRs = true;
687     return true;
688   }
689 
690   const MachineRegisterInfo &MRI = MF.getRegInfo();
691 
692   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
693     const Register Reg = Register::index2VirtReg(I);
694     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
695     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
696       UsesAGPRs = true;
697       return true;
698     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
699       // Defer caching UsesAGPRs, function might not yet been regbank selected.
700       return true;
701     }
702   }
703 
704   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
705     if (MRI.isPhysRegUsed(Reg)) {
706       UsesAGPRs = true;
707       return true;
708     }
709   }
710 
711   UsesAGPRs = false;
712   return false;
713 }
714