1 //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief Analyzes how many registers and other resources are used by 11 /// functions. 12 /// 13 /// The results of this analysis are used to fill the register usage, flat 14 /// usage, etc. into hardware registers. 15 /// 16 /// The analysis takes callees into account. E.g. if a function A that needs 10 17 /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A 18 /// will return 20. 19 /// It is assumed that an indirect call can go into any function except 20 /// hardware-entrypoints. Therefore the register usage of functions with 21 /// indirect calls is estimated as the maximum of all non-entrypoint functions 22 /// in the module. 23 /// 24 //===----------------------------------------------------------------------===// 25 26 #include "AMDGPUResourceUsageAnalysis.h" 27 #include "AMDGPU.h" 28 #include "AMDGPUTargetMachine.h" 29 #include "GCNSubtarget.h" 30 #include "SIMachineFunctionInfo.h" 31 #include "llvm/Analysis/CallGraph.h" 32 #include "llvm/CodeGen/TargetPassConfig.h" 33 #include "llvm/IR/GlobalAlias.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/Target/TargetMachine.h" 36 37 using namespace llvm; 38 using namespace llvm::AMDGPU; 39 40 #define DEBUG_TYPE "amdgpu-resource-usage" 41 42 char llvm::AMDGPUResourceUsageAnalysis::ID = 0; 43 char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID; 44 45 // We need to tell the runtime some amount ahead of time if we don't know the 46 // true stack size. Assume a smaller number if this is only due to dynamic / 47 // non-entry block allocas. 48 static cl::opt<uint32_t> AssumedStackSizeForExternalCall( 49 "amdgpu-assume-external-call-stack-size", 50 cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, 51 cl::init(16384)); 52 53 static cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects( 54 "amdgpu-assume-dynamic-stack-object-size", 55 cl::desc("Assumed extra stack use if there are any " 56 "variable sized objects (in bytes)"), 57 cl::Hidden, cl::init(4096)); 58 59 INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE, 60 "Function register usage analysis", true, true) 61 62 static const Function *getCalleeFunction(const MachineOperand &Op) { 63 if (Op.isImm()) { 64 assert(Op.getImm() == 0); 65 return nullptr; 66 } 67 if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal())) 68 return cast<Function>(GA->getOperand(0)); 69 return cast<Function>(Op.getGlobal()); 70 } 71 72 static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, 73 const SIInstrInfo &TII, unsigned Reg) { 74 for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) { 75 if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent())) 76 return true; 77 } 78 79 return false; 80 } 81 82 int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs( 83 const GCNSubtarget &ST) const { 84 return NumExplicitSGPR + 85 IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch, 86 ST.getTargetID().isXnackOnOrAny()); 87 } 88 89 int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs( 90 const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const { 91 if (ST.hasGFX90AInsts() && ArgNumAGPR) 92 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; 93 return std::max(ArgNumVGPR, ArgNumAGPR); 94 } 95 96 int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs( 97 const GCNSubtarget &ST) const { 98 return getTotalNumVGPRs(ST, NumAGPR, NumVGPR); 99 } 100 101 bool AMDGPUResourceUsageAnalysis::runOnSCC(CallGraphSCC &SCC) { 102 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 103 if (!TPC) 104 return false; 105 106 TM = static_cast<const GCNTargetMachine *>(&TPC->getTM<TargetMachine>()); 107 108 for (CallGraphNode *I : SCC) { 109 Function *F = I->getFunction(); 110 if (!F || F->isDeclaration()) 111 continue; 112 113 MachineModuleInfo &MMI = 114 getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); 115 MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); 116 117 auto CI = CallGraphResourceInfo.insert( 118 std::make_pair(&MF.getFunction(), SIFunctionResourceInfo())); 119 SIFunctionResourceInfo &Info = CI.first->second; 120 assert(CI.second && "should only be called once per function"); 121 Info = analyzeResourceUsage(MF); 122 } 123 124 return false; 125 } 126 127 AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo 128 AMDGPUResourceUsageAnalysis::analyzeResourceUsage(const MachineFunction &MF) { 129 SIFunctionResourceInfo Info; 130 131 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 132 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 133 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 134 const MachineRegisterInfo &MRI = MF.getRegInfo(); 135 const SIInstrInfo *TII = ST.getInstrInfo(); 136 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 137 138 Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) || 139 MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) || 140 MRI.isLiveIn(MFI->getPreloadedReg( 141 AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT)); 142 143 // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat 144 // instructions aren't used to access the scratch buffer. Inline assembly may 145 // need it though. 146 // 147 // If we only have implicit uses of flat_scr on flat instructions, it is not 148 // really needed. 149 if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() && 150 (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) && 151 !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) && 152 !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) { 153 Info.UsesFlatScratch = false; 154 } 155 156 Info.PrivateSegmentSize = FrameInfo.getStackSize(); 157 158 // Assume a big number if there are any unknown sized objects. 159 Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects(); 160 if (Info.HasDynamicallySizedStack) 161 Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects; 162 163 if (MFI->isStackRealigned()) 164 Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value(); 165 166 Info.UsesVCC = 167 MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI); 168 169 // If there are no calls, MachineRegisterInfo can tell us the used register 170 // count easily. 171 // A tail call isn't considered a call for MachineFrameInfo's purposes. 172 if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) { 173 MCPhysReg HighestVGPRReg = AMDGPU::NoRegister; 174 for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) { 175 if (MRI.isPhysRegUsed(Reg)) { 176 HighestVGPRReg = Reg; 177 break; 178 } 179 } 180 181 if (ST.hasMAIInsts()) { 182 MCPhysReg HighestAGPRReg = AMDGPU::NoRegister; 183 for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) { 184 if (MRI.isPhysRegUsed(Reg)) { 185 HighestAGPRReg = Reg; 186 break; 187 } 188 } 189 Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister 190 ? 0 191 : TRI.getHWRegIndex(HighestAGPRReg) + 1; 192 } 193 194 MCPhysReg HighestSGPRReg = AMDGPU::NoRegister; 195 for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) { 196 if (MRI.isPhysRegUsed(Reg)) { 197 HighestSGPRReg = Reg; 198 break; 199 } 200 } 201 202 // We found the maximum register index. They start at 0, so add one to get 203 // the number of registers. 204 Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister 205 ? 0 206 : TRI.getHWRegIndex(HighestVGPRReg) + 1; 207 Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister 208 ? 0 209 : TRI.getHWRegIndex(HighestSGPRReg) + 1; 210 211 return Info; 212 } 213 214 int32_t MaxVGPR = -1; 215 int32_t MaxAGPR = -1; 216 int32_t MaxSGPR = -1; 217 uint64_t CalleeFrameSize = 0; 218 219 for (const MachineBasicBlock &MBB : MF) { 220 for (const MachineInstr &MI : MBB) { 221 // TODO: Check regmasks? Do they occur anywhere except calls? 222 for (const MachineOperand &MO : MI.operands()) { 223 unsigned Width = 0; 224 bool IsSGPR = false; 225 bool IsAGPR = false; 226 227 if (!MO.isReg()) 228 continue; 229 230 Register Reg = MO.getReg(); 231 switch (Reg) { 232 case AMDGPU::EXEC: 233 case AMDGPU::EXEC_LO: 234 case AMDGPU::EXEC_HI: 235 case AMDGPU::SCC: 236 case AMDGPU::M0: 237 case AMDGPU::M0_LO16: 238 case AMDGPU::M0_HI16: 239 case AMDGPU::SRC_SHARED_BASE: 240 case AMDGPU::SRC_SHARED_LIMIT: 241 case AMDGPU::SRC_PRIVATE_BASE: 242 case AMDGPU::SRC_PRIVATE_LIMIT: 243 case AMDGPU::SGPR_NULL: 244 case AMDGPU::MODE: 245 continue; 246 247 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 248 llvm_unreachable("src_pops_exiting_wave_id should not be used"); 249 250 case AMDGPU::NoRegister: 251 assert(MI.isDebugInstr() && 252 "Instruction uses invalid noreg register"); 253 continue; 254 255 case AMDGPU::VCC: 256 case AMDGPU::VCC_LO: 257 case AMDGPU::VCC_HI: 258 case AMDGPU::VCC_LO_LO16: 259 case AMDGPU::VCC_LO_HI16: 260 case AMDGPU::VCC_HI_LO16: 261 case AMDGPU::VCC_HI_HI16: 262 Info.UsesVCC = true; 263 continue; 264 265 case AMDGPU::FLAT_SCR: 266 case AMDGPU::FLAT_SCR_LO: 267 case AMDGPU::FLAT_SCR_HI: 268 continue; 269 270 case AMDGPU::XNACK_MASK: 271 case AMDGPU::XNACK_MASK_LO: 272 case AMDGPU::XNACK_MASK_HI: 273 llvm_unreachable("xnack_mask registers should not be used"); 274 275 case AMDGPU::LDS_DIRECT: 276 llvm_unreachable("lds_direct register should not be used"); 277 278 case AMDGPU::TBA: 279 case AMDGPU::TBA_LO: 280 case AMDGPU::TBA_HI: 281 case AMDGPU::TMA: 282 case AMDGPU::TMA_LO: 283 case AMDGPU::TMA_HI: 284 llvm_unreachable("trap handler registers should not be used"); 285 286 case AMDGPU::SRC_VCCZ: 287 llvm_unreachable("src_vccz register should not be used"); 288 289 case AMDGPU::SRC_EXECZ: 290 llvm_unreachable("src_execz register should not be used"); 291 292 case AMDGPU::SRC_SCC: 293 llvm_unreachable("src_scc register should not be used"); 294 295 default: 296 break; 297 } 298 299 if (AMDGPU::SReg_32RegClass.contains(Reg) || 300 AMDGPU::SReg_LO16RegClass.contains(Reg) || 301 AMDGPU::SGPR_HI16RegClass.contains(Reg)) { 302 assert(!AMDGPU::TTMP_32RegClass.contains(Reg) && 303 "trap handler registers should not be used"); 304 IsSGPR = true; 305 Width = 1; 306 } else if (AMDGPU::VGPR_32RegClass.contains(Reg) || 307 AMDGPU::VGPR_LO16RegClass.contains(Reg) || 308 AMDGPU::VGPR_HI16RegClass.contains(Reg)) { 309 IsSGPR = false; 310 Width = 1; 311 } else if (AMDGPU::AGPR_32RegClass.contains(Reg) || 312 AMDGPU::AGPR_LO16RegClass.contains(Reg)) { 313 IsSGPR = false; 314 IsAGPR = true; 315 Width = 1; 316 } else if (AMDGPU::SReg_64RegClass.contains(Reg)) { 317 assert(!AMDGPU::TTMP_64RegClass.contains(Reg) && 318 "trap handler registers should not be used"); 319 IsSGPR = true; 320 Width = 2; 321 } else if (AMDGPU::VReg_64RegClass.contains(Reg)) { 322 IsSGPR = false; 323 Width = 2; 324 } else if (AMDGPU::AReg_64RegClass.contains(Reg)) { 325 IsSGPR = false; 326 IsAGPR = true; 327 Width = 2; 328 } else if (AMDGPU::VReg_96RegClass.contains(Reg)) { 329 IsSGPR = false; 330 Width = 3; 331 } else if (AMDGPU::SReg_96RegClass.contains(Reg)) { 332 IsSGPR = true; 333 Width = 3; 334 } else if (AMDGPU::AReg_96RegClass.contains(Reg)) { 335 IsSGPR = false; 336 IsAGPR = true; 337 Width = 3; 338 } else if (AMDGPU::SReg_128RegClass.contains(Reg)) { 339 assert(!AMDGPU::TTMP_128RegClass.contains(Reg) && 340 "trap handler registers should not be used"); 341 IsSGPR = true; 342 Width = 4; 343 } else if (AMDGPU::VReg_128RegClass.contains(Reg)) { 344 IsSGPR = false; 345 Width = 4; 346 } else if (AMDGPU::AReg_128RegClass.contains(Reg)) { 347 IsSGPR = false; 348 IsAGPR = true; 349 Width = 4; 350 } else if (AMDGPU::VReg_160RegClass.contains(Reg)) { 351 IsSGPR = false; 352 Width = 5; 353 } else if (AMDGPU::SReg_160RegClass.contains(Reg)) { 354 IsSGPR = true; 355 Width = 5; 356 } else if (AMDGPU::AReg_160RegClass.contains(Reg)) { 357 IsSGPR = false; 358 IsAGPR = true; 359 Width = 5; 360 } else if (AMDGPU::VReg_192RegClass.contains(Reg)) { 361 IsSGPR = false; 362 Width = 6; 363 } else if (AMDGPU::SReg_192RegClass.contains(Reg)) { 364 IsSGPR = true; 365 Width = 6; 366 } else if (AMDGPU::AReg_192RegClass.contains(Reg)) { 367 IsSGPR = false; 368 IsAGPR = true; 369 Width = 6; 370 } else if (AMDGPU::VReg_224RegClass.contains(Reg)) { 371 IsSGPR = false; 372 Width = 7; 373 } else if (AMDGPU::SReg_224RegClass.contains(Reg)) { 374 IsSGPR = true; 375 Width = 7; 376 } else if (AMDGPU::AReg_224RegClass.contains(Reg)) { 377 IsSGPR = false; 378 IsAGPR = true; 379 Width = 7; 380 } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { 381 assert(!AMDGPU::TTMP_256RegClass.contains(Reg) && 382 "trap handler registers should not be used"); 383 IsSGPR = true; 384 Width = 8; 385 } else if (AMDGPU::VReg_256RegClass.contains(Reg)) { 386 IsSGPR = false; 387 Width = 8; 388 } else if (AMDGPU::AReg_256RegClass.contains(Reg)) { 389 IsSGPR = false; 390 IsAGPR = true; 391 Width = 8; 392 } else if (AMDGPU::SReg_512RegClass.contains(Reg)) { 393 assert(!AMDGPU::TTMP_512RegClass.contains(Reg) && 394 "trap handler registers should not be used"); 395 IsSGPR = true; 396 Width = 16; 397 } else if (AMDGPU::VReg_512RegClass.contains(Reg)) { 398 IsSGPR = false; 399 Width = 16; 400 } else if (AMDGPU::AReg_512RegClass.contains(Reg)) { 401 IsSGPR = false; 402 IsAGPR = true; 403 Width = 16; 404 } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) { 405 IsSGPR = true; 406 Width = 32; 407 } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) { 408 IsSGPR = false; 409 Width = 32; 410 } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) { 411 IsSGPR = false; 412 IsAGPR = true; 413 Width = 32; 414 } else { 415 llvm_unreachable("Unknown register class"); 416 } 417 unsigned HWReg = TRI.getHWRegIndex(Reg); 418 int MaxUsed = HWReg + Width - 1; 419 if (IsSGPR) { 420 MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR; 421 } else if (IsAGPR) { 422 MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR; 423 } else { 424 MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR; 425 } 426 } 427 428 if (MI.isCall()) { 429 // Pseudo used just to encode the underlying global. Is there a better 430 // way to track this? 431 432 const MachineOperand *CalleeOp = 433 TII->getNamedOperand(MI, AMDGPU::OpName::callee); 434 435 const Function *Callee = getCalleeFunction(*CalleeOp); 436 DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I = 437 CallGraphResourceInfo.end(); 438 439 // Avoid crashing on undefined behavior with an illegal call to a 440 // kernel. If a callsite's calling convention doesn't match the 441 // function's, it's undefined behavior. If the callsite calling 442 // convention does match, that would have errored earlier. 443 if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv())) 444 report_fatal_error("invalid call to entry function"); 445 446 bool IsIndirect = !Callee || Callee->isDeclaration(); 447 if (!IsIndirect) 448 I = CallGraphResourceInfo.find(Callee); 449 450 // FIXME: Call site could have norecurse on it 451 if (!Callee || !Callee->doesNotRecurse()) { 452 Info.HasRecursion = true; 453 454 // TODO: If we happen to know there is no stack usage in the 455 // callgraph, we don't need to assume an infinitely growing stack. 456 if (!MI.isReturn()) { 457 // We don't need to assume an unknown stack size for tail calls. 458 459 // FIXME: This only benefits in the case where the kernel does not 460 // directly call the tail called function. If a kernel directly 461 // calls a tail recursive function, we'll assume maximum stack size 462 // based on the regular call instruction. 463 CalleeFrameSize = 464 std::max(CalleeFrameSize, 465 static_cast<uint64_t>(AssumedStackSizeForExternalCall)); 466 } 467 } 468 469 if (IsIndirect || I == CallGraphResourceInfo.end()) { 470 CalleeFrameSize = 471 std::max(CalleeFrameSize, 472 static_cast<uint64_t>(AssumedStackSizeForExternalCall)); 473 474 const SIFunctionResourceInfo &WorstCase = 475 getWorstCaseResourceInfo(*MF.getFunction().getParent()); 476 MaxSGPR = std::max(WorstCase.NumExplicitSGPR - 1, MaxSGPR); 477 MaxVGPR = std::max(WorstCase.NumVGPR - 1, MaxVGPR); 478 MaxAGPR = std::max(WorstCase.NumAGPR - 1, MaxAGPR); 479 480 // Register usage of indirect calls gets handled later 481 Info.UsesVCC = true; 482 Info.UsesFlatScratch |= 483 WorstCase.UsesFlatScratch && ST.hasFlatAddressSpace(); 484 Info.HasDynamicallySizedStack = true; 485 Info.HasIndirectCall = true; 486 } else { 487 // We force CodeGen to run in SCC order, so the callee's register 488 // usage etc. should be the cumulative usage of all callees. 489 MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR); 490 MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR); 491 MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR); 492 CalleeFrameSize = 493 std::max(I->second.PrivateSegmentSize, CalleeFrameSize); 494 Info.UsesVCC |= I->second.UsesVCC; 495 Info.UsesFlatScratch |= I->second.UsesFlatScratch; 496 Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack; 497 Info.HasRecursion |= I->second.HasRecursion; 498 Info.HasIndirectCall |= I->second.HasIndirectCall; 499 } 500 } 501 } 502 } 503 504 Info.NumExplicitSGPR = MaxSGPR + 1; 505 Info.NumVGPR = MaxVGPR + 1; 506 Info.NumAGPR = MaxAGPR + 1; 507 Info.PrivateSegmentSize += CalleeFrameSize; 508 509 return Info; 510 } 511 512 const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo & 513 AMDGPUResourceUsageAnalysis::getWorstCaseResourceInfo(const Module &M) { 514 if (ModuleWorstCaseInfo) 515 return *ModuleWorstCaseInfo; 516 517 computeWorstCaseModuleRegisterUsage(M); 518 return *ModuleWorstCaseInfo; 519 } 520 521 /// Find the worst case register usage for all callable functions in the module, 522 /// assuming all reachable functions are defined in the current module. 523 void AMDGPUResourceUsageAnalysis::computeWorstCaseModuleRegisterUsage( 524 const Module &M) { 525 assert(!ModuleWorstCaseInfo); 526 ModuleWorstCaseInfo = SIFunctionResourceInfo(); 527 ModuleWorstCaseInfo->UsesVCC = true; 528 ModuleWorstCaseInfo->HasDynamicallySizedStack = true; 529 ModuleWorstCaseInfo->HasRecursion = true; 530 ModuleWorstCaseInfo->HasIndirectCall = true; 531 532 for (const Function &F : M) { 533 if (F.isIntrinsic()) 534 continue; 535 536 if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) 537 continue; 538 539 const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F); 540 const int32_t MaxVGPR = ST.getMaxNumVGPRs(F); 541 const int32_t MaxSGPR = ST.getMaxNumSGPRs(F); 542 543 ModuleWorstCaseInfo->NumVGPR = 544 std::max(ModuleWorstCaseInfo->NumVGPR, MaxVGPR); 545 546 if (ST.hasMAIInsts()) { 547 const int32_t MaxAGPR = ST.getMaxNumAGPRs(F); 548 ModuleWorstCaseInfo->NumAGPR = 549 std::max(ModuleWorstCaseInfo->NumAGPR, MaxAGPR); 550 } 551 552 ModuleWorstCaseInfo->NumExplicitSGPR = 553 std::max(ModuleWorstCaseInfo->NumExplicitSGPR, MaxSGPR); 554 555 ModuleWorstCaseInfo->UsesFlatScratch |= ST.hasFlatAddressSpace(); 556 } 557 } 558