1 //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Analyzes how many registers and other resources are used by
11 /// functions.
12 ///
13 /// The results of this analysis are used to fill the register usage, flat
14 /// usage, etc. into hardware registers.
15 ///
16 /// The analysis takes callees into account. E.g. if a function A that needs 10
17 /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
18 /// will return 20.
19 /// It is assumed that an indirect call can go into any function except
20 /// hardware-entrypoints. Therefore the register usage of functions with
21 /// indirect calls is estimated as the maximum of all non-entrypoint functions
22 /// in the module.
23 ///
24 //===----------------------------------------------------------------------===//
25 
26 #include "AMDGPUResourceUsageAnalysis.h"
27 #include "AMDGPU.h"
28 #include "AMDGPUTargetMachine.h"
29 #include "GCNSubtarget.h"
30 #include "SIMachineFunctionInfo.h"
31 #include "llvm/Analysis/CallGraph.h"
32 #include "llvm/CodeGen/TargetPassConfig.h"
33 #include "llvm/IR/GlobalAlias.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/Target/TargetMachine.h"
36 
37 using namespace llvm;
38 using namespace llvm::AMDGPU;
39 
40 #define DEBUG_TYPE "amdgpu-resource-usage"
41 
42 char llvm::AMDGPUResourceUsageAnalysis::ID = 0;
43 char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
44 
45 // We need to tell the runtime some amount ahead of time if we don't know the
46 // true stack size. Assume a smaller number if this is only due to dynamic /
47 // non-entry block allocas.
48 static cl::opt<uint32_t> AssumedStackSizeForExternalCall(
49     "amdgpu-assume-external-call-stack-size",
50     cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
51     cl::init(16384));
52 
53 static cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects(
54     "amdgpu-assume-dynamic-stack-object-size",
55     cl::desc("Assumed extra stack use if there are any "
56              "variable sized objects (in bytes)"),
57     cl::Hidden, cl::init(4096));
58 
59 INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE,
60                 "Function register usage analysis", true, true)
61 
62 static const Function *getCalleeFunction(const MachineOperand &Op) {
63   if (Op.isImm()) {
64     assert(Op.getImm() == 0);
65     return nullptr;
66   }
67   if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
68     return cast<Function>(GA->getOperand(0));
69   return cast<Function>(Op.getGlobal());
70 }
71 
72 static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
73                                   const SIInstrInfo &TII, unsigned Reg) {
74   for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
75     if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
76       return true;
77   }
78 
79   return false;
80 }
81 
82 int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs(
83     const GCNSubtarget &ST) const {
84   return NumExplicitSGPR +
85          IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch,
86                                    ST.getTargetID().isXnackOnOrAny());
87 }
88 
89 int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
90     const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
91   if (ST.hasGFX90AInsts() && ArgNumAGPR)
92     return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
93   return std::max(ArgNumVGPR, ArgNumAGPR);
94 }
95 
96 int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
97     const GCNSubtarget &ST) const {
98   return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
99 }
100 
101 bool AMDGPUResourceUsageAnalysis::runOnSCC(CallGraphSCC &SCC) {
102   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
103   if (!TPC)
104     return false;
105 
106   TM = static_cast<const GCNTargetMachine *>(&TPC->getTM<TargetMachine>());
107 
108   for (CallGraphNode *I : SCC) {
109     Function *F = I->getFunction();
110     if (!F || F->isDeclaration())
111       continue;
112 
113     MachineModuleInfo &MMI =
114         getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
115     MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
116 
117     auto CI = CallGraphResourceInfo.insert(
118         std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
119     SIFunctionResourceInfo &Info = CI.first->second;
120     assert(CI.second && "should only be called once per function");
121     Info = analyzeResourceUsage(MF);
122   }
123 
124   return false;
125 }
126 
127 AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
128 AMDGPUResourceUsageAnalysis::analyzeResourceUsage(const MachineFunction &MF) {
129   SIFunctionResourceInfo Info;
130 
131   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
132   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
133   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
134   const MachineRegisterInfo &MRI = MF.getRegInfo();
135   const SIInstrInfo *TII = ST.getInstrInfo();
136   const SIRegisterInfo &TRI = TII->getRegisterInfo();
137 
138   Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
139                          MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
140                          MRI.isLiveIn(MFI->getPreloadedReg(
141                              AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT));
142 
143   // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
144   // instructions aren't used to access the scratch buffer. Inline assembly may
145   // need it though.
146   //
147   // If we only have implicit uses of flat_scr on flat instructions, it is not
148   // really needed.
149   if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
150       (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
151        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
152        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
153     Info.UsesFlatScratch = false;
154   }
155 
156   Info.PrivateSegmentSize = FrameInfo.getStackSize();
157 
158   // Assume a big number if there are any unknown sized objects.
159   Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
160   if (Info.HasDynamicallySizedStack)
161     Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
162 
163   if (MFI->isStackRealigned())
164     Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
165 
166   Info.UsesVCC =
167       MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
168 
169   // If there are no calls, MachineRegisterInfo can tell us the used register
170   // count easily.
171   // A tail call isn't considered a call for MachineFrameInfo's purposes.
172   if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
173     MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
174     for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
175       if (MRI.isPhysRegUsed(Reg)) {
176         HighestVGPRReg = Reg;
177         break;
178       }
179     }
180 
181     if (ST.hasMAIInsts()) {
182       MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
183       for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
184         if (MRI.isPhysRegUsed(Reg)) {
185           HighestAGPRReg = Reg;
186           break;
187         }
188       }
189       Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
190                          ? 0
191                          : TRI.getHWRegIndex(HighestAGPRReg) + 1;
192     }
193 
194     MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
195     for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
196       if (MRI.isPhysRegUsed(Reg)) {
197         HighestSGPRReg = Reg;
198         break;
199       }
200     }
201 
202     // We found the maximum register index. They start at 0, so add one to get
203     // the number of registers.
204     Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
205                        ? 0
206                        : TRI.getHWRegIndex(HighestVGPRReg) + 1;
207     Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
208                                ? 0
209                                : TRI.getHWRegIndex(HighestSGPRReg) + 1;
210 
211     return Info;
212   }
213 
214   int32_t MaxVGPR = -1;
215   int32_t MaxAGPR = -1;
216   int32_t MaxSGPR = -1;
217   uint64_t CalleeFrameSize = 0;
218 
219   for (const MachineBasicBlock &MBB : MF) {
220     for (const MachineInstr &MI : MBB) {
221       // TODO: Check regmasks? Do they occur anywhere except calls?
222       for (const MachineOperand &MO : MI.operands()) {
223         unsigned Width = 0;
224         bool IsSGPR = false;
225         bool IsAGPR = false;
226 
227         if (!MO.isReg())
228           continue;
229 
230         Register Reg = MO.getReg();
231         switch (Reg) {
232         case AMDGPU::EXEC:
233         case AMDGPU::EXEC_LO:
234         case AMDGPU::EXEC_HI:
235         case AMDGPU::SCC:
236         case AMDGPU::M0:
237         case AMDGPU::M0_LO16:
238         case AMDGPU::M0_HI16:
239         case AMDGPU::SRC_SHARED_BASE:
240         case AMDGPU::SRC_SHARED_LIMIT:
241         case AMDGPU::SRC_PRIVATE_BASE:
242         case AMDGPU::SRC_PRIVATE_LIMIT:
243         case AMDGPU::SGPR_NULL:
244         case AMDGPU::MODE:
245           continue;
246 
247         case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
248           llvm_unreachable("src_pops_exiting_wave_id should not be used");
249 
250         case AMDGPU::NoRegister:
251           assert(MI.isDebugInstr() &&
252                  "Instruction uses invalid noreg register");
253           continue;
254 
255         case AMDGPU::VCC:
256         case AMDGPU::VCC_LO:
257         case AMDGPU::VCC_HI:
258         case AMDGPU::VCC_LO_LO16:
259         case AMDGPU::VCC_LO_HI16:
260         case AMDGPU::VCC_HI_LO16:
261         case AMDGPU::VCC_HI_HI16:
262           Info.UsesVCC = true;
263           continue;
264 
265         case AMDGPU::FLAT_SCR:
266         case AMDGPU::FLAT_SCR_LO:
267         case AMDGPU::FLAT_SCR_HI:
268           continue;
269 
270         case AMDGPU::XNACK_MASK:
271         case AMDGPU::XNACK_MASK_LO:
272         case AMDGPU::XNACK_MASK_HI:
273           llvm_unreachable("xnack_mask registers should not be used");
274 
275         case AMDGPU::LDS_DIRECT:
276           llvm_unreachable("lds_direct register should not be used");
277 
278         case AMDGPU::TBA:
279         case AMDGPU::TBA_LO:
280         case AMDGPU::TBA_HI:
281         case AMDGPU::TMA:
282         case AMDGPU::TMA_LO:
283         case AMDGPU::TMA_HI:
284           llvm_unreachable("trap handler registers should not be used");
285 
286         case AMDGPU::SRC_VCCZ:
287           llvm_unreachable("src_vccz register should not be used");
288 
289         case AMDGPU::SRC_EXECZ:
290           llvm_unreachable("src_execz register should not be used");
291 
292         case AMDGPU::SRC_SCC:
293           llvm_unreachable("src_scc register should not be used");
294 
295         default:
296           break;
297         }
298 
299         if (AMDGPU::SReg_32RegClass.contains(Reg) ||
300             AMDGPU::SReg_LO16RegClass.contains(Reg) ||
301             AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
302           assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
303                  "trap handler registers should not be used");
304           IsSGPR = true;
305           Width = 1;
306         } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
307                    AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
308                    AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
309           IsSGPR = false;
310           Width = 1;
311         } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
312                    AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
313           IsSGPR = false;
314           IsAGPR = true;
315           Width = 1;
316         } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
317           assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
318                  "trap handler registers should not be used");
319           IsSGPR = true;
320           Width = 2;
321         } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
322           IsSGPR = false;
323           Width = 2;
324         } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
325           IsSGPR = false;
326           IsAGPR = true;
327           Width = 2;
328         } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
329           IsSGPR = false;
330           Width = 3;
331         } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
332           IsSGPR = true;
333           Width = 3;
334         } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
335           IsSGPR = false;
336           IsAGPR = true;
337           Width = 3;
338         } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
339           assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
340                  "trap handler registers should not be used");
341           IsSGPR = true;
342           Width = 4;
343         } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
344           IsSGPR = false;
345           Width = 4;
346         } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
347           IsSGPR = false;
348           IsAGPR = true;
349           Width = 4;
350         } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
351           IsSGPR = false;
352           Width = 5;
353         } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
354           IsSGPR = true;
355           Width = 5;
356         } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
357           IsSGPR = false;
358           IsAGPR = true;
359           Width = 5;
360         } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
361           IsSGPR = false;
362           Width = 6;
363         } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
364           IsSGPR = true;
365           Width = 6;
366         } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
367           IsSGPR = false;
368           IsAGPR = true;
369           Width = 6;
370         } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
371           IsSGPR = false;
372           Width = 7;
373         } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
374           IsSGPR = true;
375           Width = 7;
376         } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
377           IsSGPR = false;
378           IsAGPR = true;
379           Width = 7;
380         } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
381           assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
382                  "trap handler registers should not be used");
383           IsSGPR = true;
384           Width = 8;
385         } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
386           IsSGPR = false;
387           Width = 8;
388         } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
389           IsSGPR = false;
390           IsAGPR = true;
391           Width = 8;
392         } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
393           assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
394                  "trap handler registers should not be used");
395           IsSGPR = true;
396           Width = 16;
397         } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
398           IsSGPR = false;
399           Width = 16;
400         } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
401           IsSGPR = false;
402           IsAGPR = true;
403           Width = 16;
404         } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
405           IsSGPR = true;
406           Width = 32;
407         } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
408           IsSGPR = false;
409           Width = 32;
410         } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
411           IsSGPR = false;
412           IsAGPR = true;
413           Width = 32;
414         } else {
415           llvm_unreachable("Unknown register class");
416         }
417         unsigned HWReg = TRI.getHWRegIndex(Reg);
418         int MaxUsed = HWReg + Width - 1;
419         if (IsSGPR) {
420           MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
421         } else if (IsAGPR) {
422           MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
423         } else {
424           MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
425         }
426       }
427 
428       if (MI.isCall()) {
429         // Pseudo used just to encode the underlying global. Is there a better
430         // way to track this?
431 
432         const MachineOperand *CalleeOp =
433             TII->getNamedOperand(MI, AMDGPU::OpName::callee);
434 
435         const Function *Callee = getCalleeFunction(*CalleeOp);
436         DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I =
437             CallGraphResourceInfo.end();
438 
439         // Avoid crashing on undefined behavior with an illegal call to a
440         // kernel. If a callsite's calling convention doesn't match the
441         // function's, it's undefined behavior. If the callsite calling
442         // convention does match, that would have errored earlier.
443         if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
444           report_fatal_error("invalid call to entry function");
445 
446         bool IsIndirect = !Callee || Callee->isDeclaration();
447         if (!IsIndirect)
448           I = CallGraphResourceInfo.find(Callee);
449 
450         // FIXME: Call site could have norecurse on it
451         if (!Callee || !Callee->doesNotRecurse()) {
452           Info.HasRecursion = true;
453 
454           // TODO: If we happen to know there is no stack usage in the
455           // callgraph, we don't need to assume an infinitely growing stack.
456           if (!MI.isReturn()) {
457             // We don't need to assume an unknown stack size for tail calls.
458 
459             // FIXME: This only benefits in the case where the kernel does not
460             // directly call the tail called function. If a kernel directly
461             // calls a tail recursive function, we'll assume maximum stack size
462             // based on the regular call instruction.
463             CalleeFrameSize =
464               std::max(CalleeFrameSize,
465                        static_cast<uint64_t>(AssumedStackSizeForExternalCall));
466           }
467         }
468 
469         if (IsIndirect || I == CallGraphResourceInfo.end()) {
470           CalleeFrameSize =
471               std::max(CalleeFrameSize,
472                        static_cast<uint64_t>(AssumedStackSizeForExternalCall));
473 
474           const SIFunctionResourceInfo &WorstCase =
475               getWorstCaseResourceInfo(*MF.getFunction().getParent());
476           MaxSGPR = std::max(WorstCase.NumExplicitSGPR - 1, MaxSGPR);
477           MaxVGPR = std::max(WorstCase.NumVGPR - 1, MaxVGPR);
478           MaxAGPR = std::max(WorstCase.NumAGPR - 1, MaxAGPR);
479 
480           // Register usage of indirect calls gets handled later
481           Info.UsesVCC = true;
482           Info.UsesFlatScratch |=
483               WorstCase.UsesFlatScratch && ST.hasFlatAddressSpace();
484           Info.HasDynamicallySizedStack = true;
485           Info.HasIndirectCall = true;
486         } else {
487           // We force CodeGen to run in SCC order, so the callee's register
488           // usage etc. should be the cumulative usage of all callees.
489           MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
490           MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
491           MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
492           CalleeFrameSize =
493               std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
494           Info.UsesVCC |= I->second.UsesVCC;
495           Info.UsesFlatScratch |= I->second.UsesFlatScratch;
496           Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
497           Info.HasRecursion |= I->second.HasRecursion;
498           Info.HasIndirectCall |= I->second.HasIndirectCall;
499         }
500       }
501     }
502   }
503 
504   Info.NumExplicitSGPR = MaxSGPR + 1;
505   Info.NumVGPR = MaxVGPR + 1;
506   Info.NumAGPR = MaxAGPR + 1;
507   Info.PrivateSegmentSize += CalleeFrameSize;
508 
509   return Info;
510 }
511 
512 const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &
513 AMDGPUResourceUsageAnalysis::getWorstCaseResourceInfo(const Module &M) {
514   if (ModuleWorstCaseInfo)
515     return *ModuleWorstCaseInfo;
516 
517   computeWorstCaseModuleRegisterUsage(M);
518   return *ModuleWorstCaseInfo;
519 }
520 
521 /// Find the worst case register usage for all callable functions in the module,
522 /// assuming all reachable functions are defined in the current module.
523 void AMDGPUResourceUsageAnalysis::computeWorstCaseModuleRegisterUsage(
524     const Module &M) {
525   assert(!ModuleWorstCaseInfo);
526   ModuleWorstCaseInfo = SIFunctionResourceInfo();
527   ModuleWorstCaseInfo->UsesVCC = true;
528   ModuleWorstCaseInfo->HasDynamicallySizedStack = true;
529   ModuleWorstCaseInfo->HasRecursion = true;
530   ModuleWorstCaseInfo->HasIndirectCall = true;
531 
532   for (const Function &F : M) {
533     if (F.isIntrinsic())
534       continue;
535 
536     if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
537       continue;
538 
539     const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
540     const int32_t MaxVGPR = ST.getMaxNumVGPRs(F);
541     const int32_t MaxSGPR = ST.getMaxNumSGPRs(F);
542 
543     ModuleWorstCaseInfo->NumVGPR =
544         std::max(ModuleWorstCaseInfo->NumVGPR, MaxVGPR);
545 
546     if (ST.hasMAIInsts()) {
547       const int32_t MaxAGPR = ST.getMaxNumAGPRs(F);
548       ModuleWorstCaseInfo->NumAGPR =
549           std::max(ModuleWorstCaseInfo->NumAGPR, MaxAGPR);
550     }
551 
552     ModuleWorstCaseInfo->NumExplicitSGPR =
553         std::max(ModuleWorstCaseInfo->NumExplicitSGPR, MaxSGPR);
554 
555     ModuleWorstCaseInfo->UsesFlatScratch |= ST.hasFlatAddressSpace();
556   }
557 }
558