1 //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Analyzes how many registers and other resources are used by
11 /// functions.
12 ///
13 /// The results of this analysis are used to fill the register usage, flat
14 /// usage, etc. into hardware registers.
15 ///
16 /// The analysis takes callees into account. E.g. if a function A that needs 10
17 /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
18 /// will return 20.
19 /// It is assumed that an indirect call can go into any function except
20 /// hardware-entrypoints. Therefore the register usage of functions with
21 /// indirect calls is estimated as the maximum of all non-entrypoint functions
22 /// in the module.
23 ///
24 //===----------------------------------------------------------------------===//
25
26 #include "AMDGPUResourceUsageAnalysis.h"
27 #include "AMDGPU.h"
28 #include "GCNSubtarget.h"
29 #include "SIMachineFunctionInfo.h"
30 #include "llvm/ADT/PostOrderIterator.h"
31 #include "llvm/Analysis/CallGraph.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/TargetPassConfig.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/Target/TargetMachine.h"
37
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40
41 #define DEBUG_TYPE "amdgpu-resource-usage"
42
43 char llvm::AMDGPUResourceUsageAnalysis::ID = 0;
44 char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
45
46 // We need to tell the runtime some amount ahead of time if we don't know the
47 // true stack size. Assume a smaller number if this is only due to dynamic /
48 // non-entry block allocas.
49 static cl::opt<uint32_t> AssumedStackSizeForExternalCall(
50 "amdgpu-assume-external-call-stack-size",
51 cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
52 cl::init(16384));
53
54 static cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects(
55 "amdgpu-assume-dynamic-stack-object-size",
56 cl::desc("Assumed extra stack use if there are any "
57 "variable sized objects (in bytes)"),
58 cl::Hidden, cl::init(4096));
59
60 INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE,
61 "Function register usage analysis", true, true)
62
getCalleeFunction(const MachineOperand & Op)63 static const Function *getCalleeFunction(const MachineOperand &Op) {
64 if (Op.isImm()) {
65 assert(Op.getImm() == 0);
66 return nullptr;
67 }
68 if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
69 return cast<Function>(GA->getOperand(0));
70 return cast<Function>(Op.getGlobal());
71 }
72
hasAnyNonFlatUseOfReg(const MachineRegisterInfo & MRI,const SIInstrInfo & TII,unsigned Reg)73 static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
74 const SIInstrInfo &TII, unsigned Reg) {
75 for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
76 if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
77 return true;
78 }
79
80 return false;
81 }
82
getTotalNumSGPRs(const GCNSubtarget & ST) const83 int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs(
84 const GCNSubtarget &ST) const {
85 return NumExplicitSGPR +
86 IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch,
87 ST.getTargetID().isXnackOnOrAny());
88 }
89
getTotalNumVGPRs(const GCNSubtarget & ST,int32_t ArgNumAGPR,int32_t ArgNumVGPR) const90 int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
91 const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
92 return AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), ArgNumAGPR, ArgNumVGPR);
93 }
94
getTotalNumVGPRs(const GCNSubtarget & ST) const95 int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
96 const GCNSubtarget &ST) const {
97 return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
98 }
99
runOnModule(Module & M)100 bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
101 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
102 if (!TPC)
103 return false;
104
105 MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
106 const TargetMachine &TM = TPC->getTM<TargetMachine>();
107 bool HasIndirectCall = false;
108
109 CallGraph CG = CallGraph(M);
110 auto End = po_end(&CG);
111
112 for (auto IT = po_begin(&CG); IT != End; ++IT) {
113 Function *F = IT->getFunction();
114 if (!F || F->isDeclaration())
115 continue;
116
117 MachineFunction *MF = MMI.getMachineFunction(*F);
118 assert(MF && "function must have been generated already");
119
120 auto CI = CallGraphResourceInfo.insert(
121 std::make_pair(F, SIFunctionResourceInfo()));
122 SIFunctionResourceInfo &Info = CI.first->second;
123 assert(CI.second && "should only be called once per function");
124 Info = analyzeResourceUsage(*MF, TM);
125 HasIndirectCall |= Info.HasIndirectCall;
126 }
127
128 if (HasIndirectCall)
129 propagateIndirectCallRegisterUsage();
130
131 return false;
132 }
133
134 AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
analyzeResourceUsage(const MachineFunction & MF,const TargetMachine & TM) const135 AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
136 const MachineFunction &MF, const TargetMachine &TM) const {
137 SIFunctionResourceInfo Info;
138
139 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
140 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
141 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
142 const MachineRegisterInfo &MRI = MF.getRegInfo();
143 const SIInstrInfo *TII = ST.getInstrInfo();
144 const SIRegisterInfo &TRI = TII->getRegisterInfo();
145
146 Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
147 MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
148 MRI.isLiveIn(MFI->getPreloadedReg(
149 AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT));
150
151 // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
152 // instructions aren't used to access the scratch buffer. Inline assembly may
153 // need it though.
154 //
155 // If we only have implicit uses of flat_scr on flat instructions, it is not
156 // really needed.
157 if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
158 (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
159 !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
160 !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
161 Info.UsesFlatScratch = false;
162 }
163
164 Info.PrivateSegmentSize = FrameInfo.getStackSize();
165
166 // Assume a big number if there are any unknown sized objects.
167 Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
168 if (Info.HasDynamicallySizedStack)
169 Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
170
171 if (MFI->isStackRealigned())
172 Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
173
174 Info.UsesVCC =
175 MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
176
177 // If there are no calls, MachineRegisterInfo can tell us the used register
178 // count easily.
179 // A tail call isn't considered a call for MachineFrameInfo's purposes.
180 if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
181 MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
182 for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
183 if (MRI.isPhysRegUsed(Reg)) {
184 HighestVGPRReg = Reg;
185 break;
186 }
187 }
188
189 if (ST.hasMAIInsts()) {
190 MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
191 for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
192 if (MRI.isPhysRegUsed(Reg)) {
193 HighestAGPRReg = Reg;
194 break;
195 }
196 }
197 Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
198 ? 0
199 : TRI.getHWRegIndex(HighestAGPRReg) + 1;
200 }
201
202 MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
203 for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
204 if (MRI.isPhysRegUsed(Reg)) {
205 HighestSGPRReg = Reg;
206 break;
207 }
208 }
209
210 // We found the maximum register index. They start at 0, so add one to get
211 // the number of registers.
212 Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
213 ? 0
214 : TRI.getHWRegIndex(HighestVGPRReg) + 1;
215 Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
216 ? 0
217 : TRI.getHWRegIndex(HighestSGPRReg) + 1;
218
219 return Info;
220 }
221
222 int32_t MaxVGPR = -1;
223 int32_t MaxAGPR = -1;
224 int32_t MaxSGPR = -1;
225 uint64_t CalleeFrameSize = 0;
226
227 for (const MachineBasicBlock &MBB : MF) {
228 for (const MachineInstr &MI : MBB) {
229 // TODO: Check regmasks? Do they occur anywhere except calls?
230 for (const MachineOperand &MO : MI.operands()) {
231 unsigned Width = 0;
232 bool IsSGPR = false;
233 bool IsAGPR = false;
234
235 if (!MO.isReg())
236 continue;
237
238 Register Reg = MO.getReg();
239 switch (Reg) {
240 case AMDGPU::EXEC:
241 case AMDGPU::EXEC_LO:
242 case AMDGPU::EXEC_HI:
243 case AMDGPU::SCC:
244 case AMDGPU::M0:
245 case AMDGPU::M0_LO16:
246 case AMDGPU::M0_HI16:
247 case AMDGPU::SRC_SHARED_BASE:
248 case AMDGPU::SRC_SHARED_LIMIT:
249 case AMDGPU::SRC_PRIVATE_BASE:
250 case AMDGPU::SRC_PRIVATE_LIMIT:
251 case AMDGPU::SGPR_NULL:
252 case AMDGPU::SGPR_NULL64:
253 case AMDGPU::MODE:
254 continue;
255
256 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
257 llvm_unreachable("src_pops_exiting_wave_id should not be used");
258
259 case AMDGPU::NoRegister:
260 assert(MI.isDebugInstr() &&
261 "Instruction uses invalid noreg register");
262 continue;
263
264 case AMDGPU::VCC:
265 case AMDGPU::VCC_LO:
266 case AMDGPU::VCC_HI:
267 case AMDGPU::VCC_LO_LO16:
268 case AMDGPU::VCC_LO_HI16:
269 case AMDGPU::VCC_HI_LO16:
270 case AMDGPU::VCC_HI_HI16:
271 Info.UsesVCC = true;
272 continue;
273
274 case AMDGPU::FLAT_SCR:
275 case AMDGPU::FLAT_SCR_LO:
276 case AMDGPU::FLAT_SCR_HI:
277 continue;
278
279 case AMDGPU::XNACK_MASK:
280 case AMDGPU::XNACK_MASK_LO:
281 case AMDGPU::XNACK_MASK_HI:
282 llvm_unreachable("xnack_mask registers should not be used");
283
284 case AMDGPU::LDS_DIRECT:
285 llvm_unreachable("lds_direct register should not be used");
286
287 case AMDGPU::TBA:
288 case AMDGPU::TBA_LO:
289 case AMDGPU::TBA_HI:
290 case AMDGPU::TMA:
291 case AMDGPU::TMA_LO:
292 case AMDGPU::TMA_HI:
293 llvm_unreachable("trap handler registers should not be used");
294
295 case AMDGPU::SRC_VCCZ:
296 llvm_unreachable("src_vccz register should not be used");
297
298 case AMDGPU::SRC_EXECZ:
299 llvm_unreachable("src_execz register should not be used");
300
301 case AMDGPU::SRC_SCC:
302 llvm_unreachable("src_scc register should not be used");
303
304 default:
305 break;
306 }
307
308 if (AMDGPU::SReg_32RegClass.contains(Reg) ||
309 AMDGPU::SReg_LO16RegClass.contains(Reg) ||
310 AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
311 assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
312 "trap handler registers should not be used");
313 IsSGPR = true;
314 Width = 1;
315 } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
316 AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
317 AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
318 IsSGPR = false;
319 Width = 1;
320 } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
321 AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
322 IsSGPR = false;
323 IsAGPR = true;
324 Width = 1;
325 } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
326 assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
327 "trap handler registers should not be used");
328 IsSGPR = true;
329 Width = 2;
330 } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
331 IsSGPR = false;
332 Width = 2;
333 } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
334 IsSGPR = false;
335 IsAGPR = true;
336 Width = 2;
337 } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
338 IsSGPR = false;
339 Width = 3;
340 } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
341 IsSGPR = true;
342 Width = 3;
343 } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
344 IsSGPR = false;
345 IsAGPR = true;
346 Width = 3;
347 } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
348 assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
349 "trap handler registers should not be used");
350 IsSGPR = true;
351 Width = 4;
352 } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
353 IsSGPR = false;
354 Width = 4;
355 } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
356 IsSGPR = false;
357 IsAGPR = true;
358 Width = 4;
359 } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
360 IsSGPR = false;
361 Width = 5;
362 } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
363 IsSGPR = true;
364 Width = 5;
365 } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
366 IsSGPR = false;
367 IsAGPR = true;
368 Width = 5;
369 } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
370 IsSGPR = false;
371 Width = 6;
372 } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
373 IsSGPR = true;
374 Width = 6;
375 } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
376 IsSGPR = false;
377 IsAGPR = true;
378 Width = 6;
379 } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
380 IsSGPR = false;
381 Width = 7;
382 } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
383 IsSGPR = true;
384 Width = 7;
385 } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
386 IsSGPR = false;
387 IsAGPR = true;
388 Width = 7;
389 } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
390 assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
391 "trap handler registers should not be used");
392 IsSGPR = true;
393 Width = 8;
394 } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
395 IsSGPR = false;
396 Width = 8;
397 } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
398 IsSGPR = false;
399 IsAGPR = true;
400 Width = 8;
401 } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
402 assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
403 "trap handler registers should not be used");
404 IsSGPR = true;
405 Width = 16;
406 } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
407 IsSGPR = false;
408 Width = 16;
409 } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
410 IsSGPR = false;
411 IsAGPR = true;
412 Width = 16;
413 } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
414 IsSGPR = true;
415 Width = 32;
416 } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
417 IsSGPR = false;
418 Width = 32;
419 } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
420 IsSGPR = false;
421 IsAGPR = true;
422 Width = 32;
423 } else {
424 llvm_unreachable("Unknown register class");
425 }
426 unsigned HWReg = TRI.getHWRegIndex(Reg);
427 int MaxUsed = HWReg + Width - 1;
428 if (IsSGPR) {
429 MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
430 } else if (IsAGPR) {
431 MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
432 } else {
433 MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
434 }
435 }
436
437 if (MI.isCall()) {
438 // Pseudo used just to encode the underlying global. Is there a better
439 // way to track this?
440
441 const MachineOperand *CalleeOp =
442 TII->getNamedOperand(MI, AMDGPU::OpName::callee);
443
444 const Function *Callee = getCalleeFunction(*CalleeOp);
445 DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I =
446 CallGraphResourceInfo.end();
447
448 // Avoid crashing on undefined behavior with an illegal call to a
449 // kernel. If a callsite's calling convention doesn't match the
450 // function's, it's undefined behavior. If the callsite calling
451 // convention does match, that would have errored earlier.
452 if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
453 report_fatal_error("invalid call to entry function");
454
455 bool IsIndirect = !Callee || Callee->isDeclaration();
456 if (!IsIndirect)
457 I = CallGraphResourceInfo.find(Callee);
458
459 // FIXME: Call site could have norecurse on it
460 if (!Callee || !Callee->doesNotRecurse()) {
461 Info.HasRecursion = true;
462
463 // TODO: If we happen to know there is no stack usage in the
464 // callgraph, we don't need to assume an infinitely growing stack.
465 if (!MI.isReturn()) {
466 // We don't need to assume an unknown stack size for tail calls.
467
468 // FIXME: This only benefits in the case where the kernel does not
469 // directly call the tail called function. If a kernel directly
470 // calls a tail recursive function, we'll assume maximum stack size
471 // based on the regular call instruction.
472 CalleeFrameSize =
473 std::max(CalleeFrameSize,
474 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
475 }
476 }
477
478 if (IsIndirect || I == CallGraphResourceInfo.end()) {
479 CalleeFrameSize =
480 std::max(CalleeFrameSize,
481 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
482
483 // Register usage of indirect calls gets handled later
484 Info.UsesVCC = true;
485 Info.UsesFlatScratch = ST.hasFlatAddressSpace();
486 Info.HasDynamicallySizedStack = true;
487 Info.HasIndirectCall = true;
488 } else {
489 // We force CodeGen to run in SCC order, so the callee's register
490 // usage etc. should be the cumulative usage of all callees.
491 MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
492 MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
493 MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
494 CalleeFrameSize =
495 std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
496 Info.UsesVCC |= I->second.UsesVCC;
497 Info.UsesFlatScratch |= I->second.UsesFlatScratch;
498 Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
499 Info.HasRecursion |= I->second.HasRecursion;
500 Info.HasIndirectCall |= I->second.HasIndirectCall;
501 }
502 }
503 }
504 }
505
506 Info.NumExplicitSGPR = MaxSGPR + 1;
507 Info.NumVGPR = MaxVGPR + 1;
508 Info.NumAGPR = MaxAGPR + 1;
509 Info.PrivateSegmentSize += CalleeFrameSize;
510
511 return Info;
512 }
513
propagateIndirectCallRegisterUsage()514 void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
515 // Collect the maximum number of registers from non-hardware-entrypoints.
516 // All these functions are potential targets for indirect calls.
517 int32_t NonKernelMaxSGPRs = 0;
518 int32_t NonKernelMaxVGPRs = 0;
519 int32_t NonKernelMaxAGPRs = 0;
520
521 for (const auto &I : CallGraphResourceInfo) {
522 if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
523 auto &Info = I.getSecond();
524 NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
525 NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
526 NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
527 }
528 }
529
530 // Add register usage for functions with indirect calls.
531 // For calls to unknown functions, we assume the maximum register usage of
532 // all non-hardware-entrypoints in the current module.
533 for (auto &I : CallGraphResourceInfo) {
534 auto &Info = I.getSecond();
535 if (Info.HasIndirectCall) {
536 Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
537 Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
538 Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
539 }
540 }
541 }
542