12b08f6afSSebastian Neubauer //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
22b08f6afSSebastian Neubauer //
32b08f6afSSebastian Neubauer // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42b08f6afSSebastian Neubauer // See https://llvm.org/LICENSE.txt for license information.
52b08f6afSSebastian Neubauer // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
62b08f6afSSebastian Neubauer //
72b08f6afSSebastian Neubauer //===----------------------------------------------------------------------===//
82b08f6afSSebastian Neubauer //
92b08f6afSSebastian Neubauer /// \file
102b08f6afSSebastian Neubauer /// \brief Analyzes how many registers and other resources are used by
112b08f6afSSebastian Neubauer /// functions.
122b08f6afSSebastian Neubauer ///
132b08f6afSSebastian Neubauer /// The results of this analysis are used to fill the register usage, flat
142b08f6afSSebastian Neubauer /// usage, etc. into hardware registers.
152b08f6afSSebastian Neubauer ///
162b08f6afSSebastian Neubauer /// The analysis takes callees into account. E.g. if a function A that needs 10
172b08f6afSSebastian Neubauer /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
182b08f6afSSebastian Neubauer /// will return 20.
192b08f6afSSebastian Neubauer /// It is assumed that an indirect call can go into any function except
202b08f6afSSebastian Neubauer /// hardware-entrypoints. Therefore the register usage of functions with
212b08f6afSSebastian Neubauer /// indirect calls is estimated as the maximum of all non-entrypoint functions
222b08f6afSSebastian Neubauer /// in the module.
232b08f6afSSebastian Neubauer ///
242b08f6afSSebastian Neubauer //===----------------------------------------------------------------------===//
252b08f6afSSebastian Neubauer 
262b08f6afSSebastian Neubauer #include "AMDGPUResourceUsageAnalysis.h"
272b08f6afSSebastian Neubauer #include "AMDGPU.h"
282b08f6afSSebastian Neubauer #include "GCNSubtarget.h"
292b08f6afSSebastian Neubauer #include "SIMachineFunctionInfo.h"
30814a0abcSJacob Weightman #include "llvm/ADT/PostOrderIterator.h"
312b08f6afSSebastian Neubauer #include "llvm/Analysis/CallGraph.h"
32989f1c72Sserge-sans-paille #include "llvm/CodeGen/MachineFrameInfo.h"
332b08f6afSSebastian Neubauer #include "llvm/CodeGen/TargetPassConfig.h"
340567f033SAnshil Gandhi #include "llvm/IR/GlobalAlias.h"
350567f033SAnshil Gandhi #include "llvm/IR/GlobalValue.h"
362b08f6afSSebastian Neubauer #include "llvm/Target/TargetMachine.h"
372b08f6afSSebastian Neubauer 
382b08f6afSSebastian Neubauer using namespace llvm;
392b08f6afSSebastian Neubauer using namespace llvm::AMDGPU;
402b08f6afSSebastian Neubauer 
412b08f6afSSebastian Neubauer #define DEBUG_TYPE "amdgpu-resource-usage"
422b08f6afSSebastian Neubauer 
432b08f6afSSebastian Neubauer char llvm::AMDGPUResourceUsageAnalysis::ID = 0;
442b08f6afSSebastian Neubauer char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
452b08f6afSSebastian Neubauer 
462b08f6afSSebastian Neubauer // We need to tell the runtime some amount ahead of time if we don't know the
472b08f6afSSebastian Neubauer // true stack size. Assume a smaller number if this is only due to dynamic /
482b08f6afSSebastian Neubauer // non-entry block allocas.
492b08f6afSSebastian Neubauer static cl::opt<uint32_t> AssumedStackSizeForExternalCall(
502b08f6afSSebastian Neubauer     "amdgpu-assume-external-call-stack-size",
512b08f6afSSebastian Neubauer     cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
522b08f6afSSebastian Neubauer     cl::init(16384));
532b08f6afSSebastian Neubauer 
542b08f6afSSebastian Neubauer static cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects(
552b08f6afSSebastian Neubauer     "amdgpu-assume-dynamic-stack-object-size",
562b08f6afSSebastian Neubauer     cl::desc("Assumed extra stack use if there are any "
572b08f6afSSebastian Neubauer              "variable sized objects (in bytes)"),
582b08f6afSSebastian Neubauer     cl::Hidden, cl::init(4096));
592b08f6afSSebastian Neubauer 
602b08f6afSSebastian Neubauer INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE,
612b08f6afSSebastian Neubauer                 "Function register usage analysis", true, true)
622b08f6afSSebastian Neubauer 
getCalleeFunction(const MachineOperand & Op)632b08f6afSSebastian Neubauer static const Function *getCalleeFunction(const MachineOperand &Op) {
642b08f6afSSebastian Neubauer   if (Op.isImm()) {
652b08f6afSSebastian Neubauer     assert(Op.getImm() == 0);
662b08f6afSSebastian Neubauer     return nullptr;
672b08f6afSSebastian Neubauer   }
680567f033SAnshil Gandhi   if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
690567f033SAnshil Gandhi     return cast<Function>(GA->getOperand(0));
702b08f6afSSebastian Neubauer   return cast<Function>(Op.getGlobal());
712b08f6afSSebastian Neubauer }
722b08f6afSSebastian Neubauer 
hasAnyNonFlatUseOfReg(const MachineRegisterInfo & MRI,const SIInstrInfo & TII,unsigned Reg)732b08f6afSSebastian Neubauer static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
742b08f6afSSebastian Neubauer                                   const SIInstrInfo &TII, unsigned Reg) {
752b08f6afSSebastian Neubauer   for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
762b08f6afSSebastian Neubauer     if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
772b08f6afSSebastian Neubauer       return true;
782b08f6afSSebastian Neubauer   }
792b08f6afSSebastian Neubauer 
802b08f6afSSebastian Neubauer   return false;
812b08f6afSSebastian Neubauer }
822b08f6afSSebastian Neubauer 
getTotalNumSGPRs(const GCNSubtarget & ST) const832b08f6afSSebastian Neubauer int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs(
842b08f6afSSebastian Neubauer     const GCNSubtarget &ST) const {
852b08f6afSSebastian Neubauer   return NumExplicitSGPR +
862b08f6afSSebastian Neubauer          IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch,
872b08f6afSSebastian Neubauer                                    ST.getTargetID().isXnackOnOrAny());
882b08f6afSSebastian Neubauer }
892b08f6afSSebastian Neubauer 
getTotalNumVGPRs(const GCNSubtarget & ST,int32_t ArgNumAGPR,int32_t ArgNumVGPR) const902b08f6afSSebastian Neubauer int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
9169f7d81dSDavid Stuttard     const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
920bad7cb5SJacob Lambert   return AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), ArgNumAGPR, ArgNumVGPR);
9369f7d81dSDavid Stuttard }
9469f7d81dSDavid Stuttard 
getTotalNumVGPRs(const GCNSubtarget & ST) const9569f7d81dSDavid Stuttard int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
962b08f6afSSebastian Neubauer     const GCNSubtarget &ST) const {
9769f7d81dSDavid Stuttard   return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
982b08f6afSSebastian Neubauer }
992b08f6afSSebastian Neubauer 
runOnModule(Module & M)1004622afa9SMatt Arsenault bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
1012b08f6afSSebastian Neubauer   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
1022b08f6afSSebastian Neubauer   if (!TPC)
1032b08f6afSSebastian Neubauer     return false;
1042b08f6afSSebastian Neubauer 
1054622afa9SMatt Arsenault   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
1064622afa9SMatt Arsenault   const TargetMachine &TM = TPC->getTM<TargetMachine>();
1074622afa9SMatt Arsenault   bool HasIndirectCall = false;
1082b08f6afSSebastian Neubauer 
109814a0abcSJacob Weightman   CallGraph CG = CallGraph(M);
110814a0abcSJacob Weightman   auto End = po_end(&CG);
111814a0abcSJacob Weightman 
112814a0abcSJacob Weightman   for (auto IT = po_begin(&CG); IT != End; ++IT) {
113814a0abcSJacob Weightman     Function *F = IT->getFunction();
114814a0abcSJacob Weightman     if (!F || F->isDeclaration())
1152b08f6afSSebastian Neubauer       continue;
1162b08f6afSSebastian Neubauer 
117814a0abcSJacob Weightman     MachineFunction *MF = MMI.getMachineFunction(*F);
1184622afa9SMatt Arsenault     assert(MF && "function must have been generated already");
1192b08f6afSSebastian Neubauer 
1202b08f6afSSebastian Neubauer     auto CI = CallGraphResourceInfo.insert(
121814a0abcSJacob Weightman         std::make_pair(F, SIFunctionResourceInfo()));
1222b08f6afSSebastian Neubauer     SIFunctionResourceInfo &Info = CI.first->second;
1232b08f6afSSebastian Neubauer     assert(CI.second && "should only be called once per function");
1244622afa9SMatt Arsenault     Info = analyzeResourceUsage(*MF, TM);
1254622afa9SMatt Arsenault     HasIndirectCall |= Info.HasIndirectCall;
1262b08f6afSSebastian Neubauer   }
1272b08f6afSSebastian Neubauer 
1284622afa9SMatt Arsenault   if (HasIndirectCall)
1294622afa9SMatt Arsenault     propagateIndirectCallRegisterUsage();
1304622afa9SMatt Arsenault 
1312b08f6afSSebastian Neubauer   return false;
1322b08f6afSSebastian Neubauer }
1332b08f6afSSebastian Neubauer 
1342b08f6afSSebastian Neubauer AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
analyzeResourceUsage(const MachineFunction & MF,const TargetMachine & TM) const1354622afa9SMatt Arsenault AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
1364622afa9SMatt Arsenault     const MachineFunction &MF, const TargetMachine &TM) const {
1372b08f6afSSebastian Neubauer   SIFunctionResourceInfo Info;
1382b08f6afSSebastian Neubauer 
1392b08f6afSSebastian Neubauer   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1402b08f6afSSebastian Neubauer   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1412b08f6afSSebastian Neubauer   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1422b08f6afSSebastian Neubauer   const MachineRegisterInfo &MRI = MF.getRegInfo();
1432b08f6afSSebastian Neubauer   const SIInstrInfo *TII = ST.getInstrInfo();
1442b08f6afSSebastian Neubauer   const SIRegisterInfo &TRI = TII->getRegisterInfo();
1452b08f6afSSebastian Neubauer 
1462b08f6afSSebastian Neubauer   Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
1472b08f6afSSebastian Neubauer                          MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
1482b08f6afSSebastian Neubauer                          MRI.isLiveIn(MFI->getPreloadedReg(
1492b08f6afSSebastian Neubauer                              AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT));
1502b08f6afSSebastian Neubauer 
1512b08f6afSSebastian Neubauer   // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
1522b08f6afSSebastian Neubauer   // instructions aren't used to access the scratch buffer. Inline assembly may
1532b08f6afSSebastian Neubauer   // need it though.
1542b08f6afSSebastian Neubauer   //
1552b08f6afSSebastian Neubauer   // If we only have implicit uses of flat_scr on flat instructions, it is not
1562b08f6afSSebastian Neubauer   // really needed.
1572b08f6afSSebastian Neubauer   if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
1582b08f6afSSebastian Neubauer       (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
1592b08f6afSSebastian Neubauer        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
1602b08f6afSSebastian Neubauer        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
1612b08f6afSSebastian Neubauer     Info.UsesFlatScratch = false;
1622b08f6afSSebastian Neubauer   }
1632b08f6afSSebastian Neubauer 
1642b08f6afSSebastian Neubauer   Info.PrivateSegmentSize = FrameInfo.getStackSize();
1652b08f6afSSebastian Neubauer 
1662b08f6afSSebastian Neubauer   // Assume a big number if there are any unknown sized objects.
1672b08f6afSSebastian Neubauer   Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
1682b08f6afSSebastian Neubauer   if (Info.HasDynamicallySizedStack)
1692b08f6afSSebastian Neubauer     Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
1702b08f6afSSebastian Neubauer 
1712b08f6afSSebastian Neubauer   if (MFI->isStackRealigned())
1722b08f6afSSebastian Neubauer     Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
1732b08f6afSSebastian Neubauer 
1742b08f6afSSebastian Neubauer   Info.UsesVCC =
1752b08f6afSSebastian Neubauer       MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
1762b08f6afSSebastian Neubauer 
1772b08f6afSSebastian Neubauer   // If there are no calls, MachineRegisterInfo can tell us the used register
1782b08f6afSSebastian Neubauer   // count easily.
1792b08f6afSSebastian Neubauer   // A tail call isn't considered a call for MachineFrameInfo's purposes.
1802b08f6afSSebastian Neubauer   if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
1812b08f6afSSebastian Neubauer     MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
1822b08f6afSSebastian Neubauer     for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
1832b08f6afSSebastian Neubauer       if (MRI.isPhysRegUsed(Reg)) {
1842b08f6afSSebastian Neubauer         HighestVGPRReg = Reg;
1852b08f6afSSebastian Neubauer         break;
1862b08f6afSSebastian Neubauer       }
1872b08f6afSSebastian Neubauer     }
1882b08f6afSSebastian Neubauer 
1892b08f6afSSebastian Neubauer     if (ST.hasMAIInsts()) {
1902b08f6afSSebastian Neubauer       MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
1912b08f6afSSebastian Neubauer       for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
1922b08f6afSSebastian Neubauer         if (MRI.isPhysRegUsed(Reg)) {
1932b08f6afSSebastian Neubauer           HighestAGPRReg = Reg;
1942b08f6afSSebastian Neubauer           break;
1952b08f6afSSebastian Neubauer         }
1962b08f6afSSebastian Neubauer       }
1972b08f6afSSebastian Neubauer       Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
1982b08f6afSSebastian Neubauer                          ? 0
1992b08f6afSSebastian Neubauer                          : TRI.getHWRegIndex(HighestAGPRReg) + 1;
2002b08f6afSSebastian Neubauer     }
2012b08f6afSSebastian Neubauer 
2022b08f6afSSebastian Neubauer     MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
2032b08f6afSSebastian Neubauer     for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
2042b08f6afSSebastian Neubauer       if (MRI.isPhysRegUsed(Reg)) {
2052b08f6afSSebastian Neubauer         HighestSGPRReg = Reg;
2062b08f6afSSebastian Neubauer         break;
2072b08f6afSSebastian Neubauer       }
2082b08f6afSSebastian Neubauer     }
2092b08f6afSSebastian Neubauer 
2102b08f6afSSebastian Neubauer     // We found the maximum register index. They start at 0, so add one to get
2112b08f6afSSebastian Neubauer     // the number of registers.
2122b08f6afSSebastian Neubauer     Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
2132b08f6afSSebastian Neubauer                        ? 0
2142b08f6afSSebastian Neubauer                        : TRI.getHWRegIndex(HighestVGPRReg) + 1;
2152b08f6afSSebastian Neubauer     Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
2162b08f6afSSebastian Neubauer                                ? 0
2172b08f6afSSebastian Neubauer                                : TRI.getHWRegIndex(HighestSGPRReg) + 1;
2182b08f6afSSebastian Neubauer 
2192b08f6afSSebastian Neubauer     return Info;
2202b08f6afSSebastian Neubauer   }
2212b08f6afSSebastian Neubauer 
2222b08f6afSSebastian Neubauer   int32_t MaxVGPR = -1;
2232b08f6afSSebastian Neubauer   int32_t MaxAGPR = -1;
2242b08f6afSSebastian Neubauer   int32_t MaxSGPR = -1;
2252b08f6afSSebastian Neubauer   uint64_t CalleeFrameSize = 0;
2262b08f6afSSebastian Neubauer 
2272b08f6afSSebastian Neubauer   for (const MachineBasicBlock &MBB : MF) {
2282b08f6afSSebastian Neubauer     for (const MachineInstr &MI : MBB) {
2292b08f6afSSebastian Neubauer       // TODO: Check regmasks? Do they occur anywhere except calls?
2302b08f6afSSebastian Neubauer       for (const MachineOperand &MO : MI.operands()) {
2312b08f6afSSebastian Neubauer         unsigned Width = 0;
2322b08f6afSSebastian Neubauer         bool IsSGPR = false;
2332b08f6afSSebastian Neubauer         bool IsAGPR = false;
2342b08f6afSSebastian Neubauer 
2352b08f6afSSebastian Neubauer         if (!MO.isReg())
2362b08f6afSSebastian Neubauer           continue;
2372b08f6afSSebastian Neubauer 
2382b08f6afSSebastian Neubauer         Register Reg = MO.getReg();
2392b08f6afSSebastian Neubauer         switch (Reg) {
2402b08f6afSSebastian Neubauer         case AMDGPU::EXEC:
2412b08f6afSSebastian Neubauer         case AMDGPU::EXEC_LO:
2422b08f6afSSebastian Neubauer         case AMDGPU::EXEC_HI:
2432b08f6afSSebastian Neubauer         case AMDGPU::SCC:
2442b08f6afSSebastian Neubauer         case AMDGPU::M0:
2452b08f6afSSebastian Neubauer         case AMDGPU::M0_LO16:
2462b08f6afSSebastian Neubauer         case AMDGPU::M0_HI16:
2472b08f6afSSebastian Neubauer         case AMDGPU::SRC_SHARED_BASE:
2482b08f6afSSebastian Neubauer         case AMDGPU::SRC_SHARED_LIMIT:
2492b08f6afSSebastian Neubauer         case AMDGPU::SRC_PRIVATE_BASE:
2502b08f6afSSebastian Neubauer         case AMDGPU::SRC_PRIVATE_LIMIT:
2512b08f6afSSebastian Neubauer         case AMDGPU::SGPR_NULL:
252*cb9ae937SStanislav Mekhanoshin         case AMDGPU::SGPR_NULL64:
2532b08f6afSSebastian Neubauer         case AMDGPU::MODE:
2542b08f6afSSebastian Neubauer           continue;
2552b08f6afSSebastian Neubauer 
2562b08f6afSSebastian Neubauer         case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2572b08f6afSSebastian Neubauer           llvm_unreachable("src_pops_exiting_wave_id should not be used");
2582b08f6afSSebastian Neubauer 
2592b08f6afSSebastian Neubauer         case AMDGPU::NoRegister:
2602b08f6afSSebastian Neubauer           assert(MI.isDebugInstr() &&
2612b08f6afSSebastian Neubauer                  "Instruction uses invalid noreg register");
2622b08f6afSSebastian Neubauer           continue;
2632b08f6afSSebastian Neubauer 
2642b08f6afSSebastian Neubauer         case AMDGPU::VCC:
2652b08f6afSSebastian Neubauer         case AMDGPU::VCC_LO:
2662b08f6afSSebastian Neubauer         case AMDGPU::VCC_HI:
2672b08f6afSSebastian Neubauer         case AMDGPU::VCC_LO_LO16:
2682b08f6afSSebastian Neubauer         case AMDGPU::VCC_LO_HI16:
2692b08f6afSSebastian Neubauer         case AMDGPU::VCC_HI_LO16:
2702b08f6afSSebastian Neubauer         case AMDGPU::VCC_HI_HI16:
2712b08f6afSSebastian Neubauer           Info.UsesVCC = true;
2722b08f6afSSebastian Neubauer           continue;
2732b08f6afSSebastian Neubauer 
2742b08f6afSSebastian Neubauer         case AMDGPU::FLAT_SCR:
2752b08f6afSSebastian Neubauer         case AMDGPU::FLAT_SCR_LO:
2762b08f6afSSebastian Neubauer         case AMDGPU::FLAT_SCR_HI:
2772b08f6afSSebastian Neubauer           continue;
2782b08f6afSSebastian Neubauer 
2792b08f6afSSebastian Neubauer         case AMDGPU::XNACK_MASK:
2802b08f6afSSebastian Neubauer         case AMDGPU::XNACK_MASK_LO:
2812b08f6afSSebastian Neubauer         case AMDGPU::XNACK_MASK_HI:
2822b08f6afSSebastian Neubauer           llvm_unreachable("xnack_mask registers should not be used");
2832b08f6afSSebastian Neubauer 
2842b08f6afSSebastian Neubauer         case AMDGPU::LDS_DIRECT:
2852b08f6afSSebastian Neubauer           llvm_unreachable("lds_direct register should not be used");
2862b08f6afSSebastian Neubauer 
2872b08f6afSSebastian Neubauer         case AMDGPU::TBA:
2882b08f6afSSebastian Neubauer         case AMDGPU::TBA_LO:
2892b08f6afSSebastian Neubauer         case AMDGPU::TBA_HI:
2902b08f6afSSebastian Neubauer         case AMDGPU::TMA:
2912b08f6afSSebastian Neubauer         case AMDGPU::TMA_LO:
2922b08f6afSSebastian Neubauer         case AMDGPU::TMA_HI:
2932b08f6afSSebastian Neubauer           llvm_unreachable("trap handler registers should not be used");
2942b08f6afSSebastian Neubauer 
2952b08f6afSSebastian Neubauer         case AMDGPU::SRC_VCCZ:
2962b08f6afSSebastian Neubauer           llvm_unreachable("src_vccz register should not be used");
2972b08f6afSSebastian Neubauer 
2982b08f6afSSebastian Neubauer         case AMDGPU::SRC_EXECZ:
2992b08f6afSSebastian Neubauer           llvm_unreachable("src_execz register should not be used");
3002b08f6afSSebastian Neubauer 
3012b08f6afSSebastian Neubauer         case AMDGPU::SRC_SCC:
3022b08f6afSSebastian Neubauer           llvm_unreachable("src_scc register should not be used");
3032b08f6afSSebastian Neubauer 
3042b08f6afSSebastian Neubauer         default:
3052b08f6afSSebastian Neubauer           break;
3062b08f6afSSebastian Neubauer         }
3072b08f6afSSebastian Neubauer 
3082b08f6afSSebastian Neubauer         if (AMDGPU::SReg_32RegClass.contains(Reg) ||
3092b08f6afSSebastian Neubauer             AMDGPU::SReg_LO16RegClass.contains(Reg) ||
3102b08f6afSSebastian Neubauer             AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
3112b08f6afSSebastian Neubauer           assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
3122b08f6afSSebastian Neubauer                  "trap handler registers should not be used");
3132b08f6afSSebastian Neubauer           IsSGPR = true;
3142b08f6afSSebastian Neubauer           Width = 1;
3152b08f6afSSebastian Neubauer         } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
3162b08f6afSSebastian Neubauer                    AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
3172b08f6afSSebastian Neubauer                    AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
3182b08f6afSSebastian Neubauer           IsSGPR = false;
3192b08f6afSSebastian Neubauer           Width = 1;
3202b08f6afSSebastian Neubauer         } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
3212b08f6afSSebastian Neubauer                    AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
3222b08f6afSSebastian Neubauer           IsSGPR = false;
3232b08f6afSSebastian Neubauer           IsAGPR = true;
3242b08f6afSSebastian Neubauer           Width = 1;
3252b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
3262b08f6afSSebastian Neubauer           assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
3272b08f6afSSebastian Neubauer                  "trap handler registers should not be used");
3282b08f6afSSebastian Neubauer           IsSGPR = true;
3292b08f6afSSebastian Neubauer           Width = 2;
3302b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
3312b08f6afSSebastian Neubauer           IsSGPR = false;
3322b08f6afSSebastian Neubauer           Width = 2;
3332b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
3342b08f6afSSebastian Neubauer           IsSGPR = false;
3352b08f6afSSebastian Neubauer           IsAGPR = true;
3362b08f6afSSebastian Neubauer           Width = 2;
3372b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
3382b08f6afSSebastian Neubauer           IsSGPR = false;
3392b08f6afSSebastian Neubauer           Width = 3;
3402b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
3412b08f6afSSebastian Neubauer           IsSGPR = true;
3422b08f6afSSebastian Neubauer           Width = 3;
3432b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
3442b08f6afSSebastian Neubauer           IsSGPR = false;
3452b08f6afSSebastian Neubauer           IsAGPR = true;
3462b08f6afSSebastian Neubauer           Width = 3;
3472b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
3482b08f6afSSebastian Neubauer           assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
3492b08f6afSSebastian Neubauer                  "trap handler registers should not be used");
3502b08f6afSSebastian Neubauer           IsSGPR = true;
3512b08f6afSSebastian Neubauer           Width = 4;
3522b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
3532b08f6afSSebastian Neubauer           IsSGPR = false;
3542b08f6afSSebastian Neubauer           Width = 4;
3552b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
3562b08f6afSSebastian Neubauer           IsSGPR = false;
3572b08f6afSSebastian Neubauer           IsAGPR = true;
3582b08f6afSSebastian Neubauer           Width = 4;
3592b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
3602b08f6afSSebastian Neubauer           IsSGPR = false;
3612b08f6afSSebastian Neubauer           Width = 5;
3622b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
3632b08f6afSSebastian Neubauer           IsSGPR = true;
3642b08f6afSSebastian Neubauer           Width = 5;
3652b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
3662b08f6afSSebastian Neubauer           IsSGPR = false;
3672b08f6afSSebastian Neubauer           IsAGPR = true;
3682b08f6afSSebastian Neubauer           Width = 5;
3692b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
3702b08f6afSSebastian Neubauer           IsSGPR = false;
3712b08f6afSSebastian Neubauer           Width = 6;
3722b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
3732b08f6afSSebastian Neubauer           IsSGPR = true;
3742b08f6afSSebastian Neubauer           Width = 6;
3752b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
3762b08f6afSSebastian Neubauer           IsSGPR = false;
3772b08f6afSSebastian Neubauer           IsAGPR = true;
3782b08f6afSSebastian Neubauer           Width = 6;
3792b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
3802b08f6afSSebastian Neubauer           IsSGPR = false;
3812b08f6afSSebastian Neubauer           Width = 7;
3822b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
3832b08f6afSSebastian Neubauer           IsSGPR = true;
3842b08f6afSSebastian Neubauer           Width = 7;
3852b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
3862b08f6afSSebastian Neubauer           IsSGPR = false;
3872b08f6afSSebastian Neubauer           IsAGPR = true;
3882b08f6afSSebastian Neubauer           Width = 7;
3892b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
3902b08f6afSSebastian Neubauer           assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
3912b08f6afSSebastian Neubauer                  "trap handler registers should not be used");
3922b08f6afSSebastian Neubauer           IsSGPR = true;
3932b08f6afSSebastian Neubauer           Width = 8;
3942b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
3952b08f6afSSebastian Neubauer           IsSGPR = false;
3962b08f6afSSebastian Neubauer           Width = 8;
3972b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
3982b08f6afSSebastian Neubauer           IsSGPR = false;
3992b08f6afSSebastian Neubauer           IsAGPR = true;
4002b08f6afSSebastian Neubauer           Width = 8;
4012b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
4022b08f6afSSebastian Neubauer           assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
4032b08f6afSSebastian Neubauer                  "trap handler registers should not be used");
4042b08f6afSSebastian Neubauer           IsSGPR = true;
4052b08f6afSSebastian Neubauer           Width = 16;
4062b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
4072b08f6afSSebastian Neubauer           IsSGPR = false;
4082b08f6afSSebastian Neubauer           Width = 16;
4092b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
4102b08f6afSSebastian Neubauer           IsSGPR = false;
4112b08f6afSSebastian Neubauer           IsAGPR = true;
4122b08f6afSSebastian Neubauer           Width = 16;
4132b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
4142b08f6afSSebastian Neubauer           IsSGPR = true;
4152b08f6afSSebastian Neubauer           Width = 32;
4162b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
4172b08f6afSSebastian Neubauer           IsSGPR = false;
4182b08f6afSSebastian Neubauer           Width = 32;
4192b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
4202b08f6afSSebastian Neubauer           IsSGPR = false;
4212b08f6afSSebastian Neubauer           IsAGPR = true;
4222b08f6afSSebastian Neubauer           Width = 32;
4232b08f6afSSebastian Neubauer         } else {
4242b08f6afSSebastian Neubauer           llvm_unreachable("Unknown register class");
4252b08f6afSSebastian Neubauer         }
4262b08f6afSSebastian Neubauer         unsigned HWReg = TRI.getHWRegIndex(Reg);
4272b08f6afSSebastian Neubauer         int MaxUsed = HWReg + Width - 1;
4282b08f6afSSebastian Neubauer         if (IsSGPR) {
4292b08f6afSSebastian Neubauer           MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
4302b08f6afSSebastian Neubauer         } else if (IsAGPR) {
4312b08f6afSSebastian Neubauer           MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
4322b08f6afSSebastian Neubauer         } else {
4332b08f6afSSebastian Neubauer           MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
4342b08f6afSSebastian Neubauer         }
4352b08f6afSSebastian Neubauer       }
4362b08f6afSSebastian Neubauer 
4372b08f6afSSebastian Neubauer       if (MI.isCall()) {
4382b08f6afSSebastian Neubauer         // Pseudo used just to encode the underlying global. Is there a better
4392b08f6afSSebastian Neubauer         // way to track this?
4402b08f6afSSebastian Neubauer 
4412b08f6afSSebastian Neubauer         const MachineOperand *CalleeOp =
4422b08f6afSSebastian Neubauer             TII->getNamedOperand(MI, AMDGPU::OpName::callee);
4432b08f6afSSebastian Neubauer 
4442b08f6afSSebastian Neubauer         const Function *Callee = getCalleeFunction(*CalleeOp);
4452b08f6afSSebastian Neubauer         DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I =
4462b08f6afSSebastian Neubauer             CallGraphResourceInfo.end();
4472b08f6afSSebastian Neubauer 
4482b08f6afSSebastian Neubauer         // Avoid crashing on undefined behavior with an illegal call to a
4492b08f6afSSebastian Neubauer         // kernel. If a callsite's calling convention doesn't match the
4502b08f6afSSebastian Neubauer         // function's, it's undefined behavior. If the callsite calling
4512b08f6afSSebastian Neubauer         // convention does match, that would have errored earlier.
4522b08f6afSSebastian Neubauer         if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
4532b08f6afSSebastian Neubauer           report_fatal_error("invalid call to entry function");
4542b08f6afSSebastian Neubauer 
4552b08f6afSSebastian Neubauer         bool IsIndirect = !Callee || Callee->isDeclaration();
4562b08f6afSSebastian Neubauer         if (!IsIndirect)
4572b08f6afSSebastian Neubauer           I = CallGraphResourceInfo.find(Callee);
4582b08f6afSSebastian Neubauer 
459c7a0c2d0SMatt Arsenault         // FIXME: Call site could have norecurse on it
460c7a0c2d0SMatt Arsenault         if (!Callee || !Callee->doesNotRecurse()) {
461c7a0c2d0SMatt Arsenault           Info.HasRecursion = true;
462c7a0c2d0SMatt Arsenault 
463c7a0c2d0SMatt Arsenault           // TODO: If we happen to know there is no stack usage in the
464c7a0c2d0SMatt Arsenault           // callgraph, we don't need to assume an infinitely growing stack.
465c7a0c2d0SMatt Arsenault           if (!MI.isReturn()) {
466c7a0c2d0SMatt Arsenault             // We don't need to assume an unknown stack size for tail calls.
467c7a0c2d0SMatt Arsenault 
468c7a0c2d0SMatt Arsenault             // FIXME: This only benefits in the case where the kernel does not
469c7a0c2d0SMatt Arsenault             // directly call the tail called function. If a kernel directly
470c7a0c2d0SMatt Arsenault             // calls a tail recursive function, we'll assume maximum stack size
471c7a0c2d0SMatt Arsenault             // based on the regular call instruction.
472c7a0c2d0SMatt Arsenault             CalleeFrameSize =
473c7a0c2d0SMatt Arsenault               std::max(CalleeFrameSize,
474c7a0c2d0SMatt Arsenault                        static_cast<uint64_t>(AssumedStackSizeForExternalCall));
475c7a0c2d0SMatt Arsenault           }
476c7a0c2d0SMatt Arsenault         }
477c7a0c2d0SMatt Arsenault 
4782b08f6afSSebastian Neubauer         if (IsIndirect || I == CallGraphResourceInfo.end()) {
4792b08f6afSSebastian Neubauer           CalleeFrameSize =
4802b08f6afSSebastian Neubauer               std::max(CalleeFrameSize,
4812b08f6afSSebastian Neubauer                        static_cast<uint64_t>(AssumedStackSizeForExternalCall));
4822b08f6afSSebastian Neubauer 
4832b08f6afSSebastian Neubauer           // Register usage of indirect calls gets handled later
4842b08f6afSSebastian Neubauer           Info.UsesVCC = true;
4854622afa9SMatt Arsenault           Info.UsesFlatScratch = ST.hasFlatAddressSpace();
4862b08f6afSSebastian Neubauer           Info.HasDynamicallySizedStack = true;
4872b08f6afSSebastian Neubauer           Info.HasIndirectCall = true;
4882b08f6afSSebastian Neubauer         } else {
4892b08f6afSSebastian Neubauer           // We force CodeGen to run in SCC order, so the callee's register
4902b08f6afSSebastian Neubauer           // usage etc. should be the cumulative usage of all callees.
4912b08f6afSSebastian Neubauer           MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
4922b08f6afSSebastian Neubauer           MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
4932b08f6afSSebastian Neubauer           MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
4942b08f6afSSebastian Neubauer           CalleeFrameSize =
4952b08f6afSSebastian Neubauer               std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
4962b08f6afSSebastian Neubauer           Info.UsesVCC |= I->second.UsesVCC;
4972b08f6afSSebastian Neubauer           Info.UsesFlatScratch |= I->second.UsesFlatScratch;
4982b08f6afSSebastian Neubauer           Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
4992b08f6afSSebastian Neubauer           Info.HasRecursion |= I->second.HasRecursion;
5002b08f6afSSebastian Neubauer           Info.HasIndirectCall |= I->second.HasIndirectCall;
5012b08f6afSSebastian Neubauer         }
5022b08f6afSSebastian Neubauer       }
5032b08f6afSSebastian Neubauer     }
5042b08f6afSSebastian Neubauer   }
5052b08f6afSSebastian Neubauer 
5062b08f6afSSebastian Neubauer   Info.NumExplicitSGPR = MaxSGPR + 1;
5072b08f6afSSebastian Neubauer   Info.NumVGPR = MaxVGPR + 1;
5082b08f6afSSebastian Neubauer   Info.NumAGPR = MaxAGPR + 1;
5092b08f6afSSebastian Neubauer   Info.PrivateSegmentSize += CalleeFrameSize;
5102b08f6afSSebastian Neubauer 
5112b08f6afSSebastian Neubauer   return Info;
5122b08f6afSSebastian Neubauer }
5132b08f6afSSebastian Neubauer 
propagateIndirectCallRegisterUsage()5144622afa9SMatt Arsenault void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
5154622afa9SMatt Arsenault   // Collect the maximum number of registers from non-hardware-entrypoints.
5164622afa9SMatt Arsenault   // All these functions are potential targets for indirect calls.
5174622afa9SMatt Arsenault   int32_t NonKernelMaxSGPRs = 0;
5184622afa9SMatt Arsenault   int32_t NonKernelMaxVGPRs = 0;
5194622afa9SMatt Arsenault   int32_t NonKernelMaxAGPRs = 0;
5202b08f6afSSebastian Neubauer 
5214622afa9SMatt Arsenault   for (const auto &I : CallGraphResourceInfo) {
5224622afa9SMatt Arsenault     if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
5234622afa9SMatt Arsenault       auto &Info = I.getSecond();
5244622afa9SMatt Arsenault       NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
5254622afa9SMatt Arsenault       NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
5264622afa9SMatt Arsenault       NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
5274622afa9SMatt Arsenault     }
5282b08f6afSSebastian Neubauer   }
5292b08f6afSSebastian Neubauer 
5304622afa9SMatt Arsenault   // Add register usage for functions with indirect calls.
5314622afa9SMatt Arsenault   // For calls to unknown functions, we assume the maximum register usage of
5324622afa9SMatt Arsenault   // all non-hardware-entrypoints in the current module.
5334622afa9SMatt Arsenault   for (auto &I : CallGraphResourceInfo) {
5344622afa9SMatt Arsenault     auto &Info = I.getSecond();
5354622afa9SMatt Arsenault     if (Info.HasIndirectCall) {
5364622afa9SMatt Arsenault       Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
5374622afa9SMatt Arsenault       Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
5384622afa9SMatt Arsenault       Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
5392b08f6afSSebastian Neubauer     }
5402b08f6afSSebastian Neubauer   }
5412b08f6afSSebastian Neubauer }
542