1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SIMachineFunctionInfo.h" 10 #include "AMDGPUArgumentUsageInfo.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIRegisterInfo.h" 13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14 #include "Utils/AMDGPUBaseInfo.h" 15 #include "llvm/ADT/Optional.h" 16 #include "llvm/CodeGen/MachineBasicBlock.h" 17 #include "llvm/CodeGen/MachineFrameInfo.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/IR/CallingConv.h" 21 #include "llvm/IR/Function.h" 22 #include <cassert> 23 #include <vector> 24 25 #define MAX_LANES 64 26 27 using namespace llvm; 28 29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 30 : AMDGPUMachineFunction(MF), 31 PrivateSegmentBuffer(false), 32 DispatchPtr(false), 33 QueuePtr(false), 34 KernargSegmentPtr(false), 35 DispatchID(false), 36 FlatScratchInit(false), 37 WorkGroupIDX(false), 38 WorkGroupIDY(false), 39 WorkGroupIDZ(false), 40 WorkGroupInfo(false), 41 PrivateSegmentWaveByteOffset(false), 42 WorkItemIDX(false), 43 WorkItemIDY(false), 44 WorkItemIDZ(false), 45 ImplicitBufferPtr(false), 46 ImplicitArgPtr(false), 47 GITPtrHigh(0xffffffff), 48 HighBitsOf32BitAddress(0) { 49 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 50 const Function &F = MF.getFunction(); 51 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); 52 WavesPerEU = ST.getWavesPerEU(F); 53 54 Occupancy = getMaxWavesPerEU(); 55 limitOccupancy(MF); 56 CallingConv::ID CC = F.getCallingConv(); 57 58 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { 59 if (!F.arg_empty()) 60 KernargSegmentPtr = true; 61 WorkGroupIDX = true; 62 WorkItemIDX = true; 63 } else if (CC == CallingConv::AMDGPU_PS) { 64 PSInputAddr = AMDGPU::getInitialPSInputAddr(F); 65 } 66 67 if (!isEntryFunction()) { 68 // Non-entry functions have no special inputs for now, other registers 69 // required for scratch access. 70 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 71 ScratchWaveOffsetReg = AMDGPU::SGPR4; 72 FrameOffsetReg = AMDGPU::SGPR5; 73 StackPtrOffsetReg = AMDGPU::SGPR32; 74 75 ArgInfo.PrivateSegmentBuffer = 76 ArgDescriptor::createRegister(ScratchRSrcReg); 77 ArgInfo.PrivateSegmentWaveByteOffset = 78 ArgDescriptor::createRegister(ScratchWaveOffsetReg); 79 80 if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) 81 ImplicitArgPtr = true; 82 } else { 83 if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) { 84 KernargSegmentPtr = true; 85 MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), 86 MaxKernArgAlign); 87 } 88 } 89 90 if (F.hasFnAttribute("amdgpu-work-group-id-x")) 91 WorkGroupIDX = true; 92 93 if (F.hasFnAttribute("amdgpu-work-group-id-y")) 94 WorkGroupIDY = true; 95 96 if (F.hasFnAttribute("amdgpu-work-group-id-z")) 97 WorkGroupIDZ = true; 98 99 if (F.hasFnAttribute("amdgpu-work-item-id-x")) 100 WorkItemIDX = true; 101 102 if (F.hasFnAttribute("amdgpu-work-item-id-y")) 103 WorkItemIDY = true; 104 105 if (F.hasFnAttribute("amdgpu-work-item-id-z")) 106 WorkItemIDZ = true; 107 108 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 109 bool HasStackObjects = FrameInfo.hasStackObjects(); 110 111 if (isEntryFunction()) { 112 // X, XY, and XYZ are the only supported combinations, so make sure Y is 113 // enabled if Z is. 114 if (WorkItemIDZ) 115 WorkItemIDY = true; 116 117 PrivateSegmentWaveByteOffset = true; 118 119 // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 120 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 121 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 122 ArgInfo.PrivateSegmentWaveByteOffset = 123 ArgDescriptor::createRegister(AMDGPU::SGPR5); 124 } 125 126 bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); 127 if (isAmdHsaOrMesa) { 128 PrivateSegmentBuffer = true; 129 130 if (F.hasFnAttribute("amdgpu-dispatch-ptr")) 131 DispatchPtr = true; 132 133 if (F.hasFnAttribute("amdgpu-queue-ptr")) 134 QueuePtr = true; 135 136 if (F.hasFnAttribute("amdgpu-dispatch-id")) 137 DispatchID = true; 138 } else if (ST.isMesaGfxShader(F)) { 139 ImplicitBufferPtr = true; 140 } 141 142 if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) 143 KernargSegmentPtr = true; 144 145 if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) { 146 // TODO: This could be refined a lot. The attribute is a poor way of 147 // detecting calls that may require it before argument lowering. 148 if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch")) 149 FlatScratchInit = true; 150 } 151 152 Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); 153 StringRef S = A.getValueAsString(); 154 if (!S.empty()) 155 S.consumeInteger(0, GITPtrHigh); 156 157 A = F.getFnAttribute("amdgpu-32bit-address-high-bits"); 158 S = A.getValueAsString(); 159 if (!S.empty()) 160 S.consumeInteger(0, HighBitsOf32BitAddress); 161 } 162 163 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { 164 limitOccupancy(getMaxWavesPerEU()); 165 const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); 166 limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), 167 MF.getFunction())); 168 } 169 170 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 171 const SIRegisterInfo &TRI) { 172 ArgInfo.PrivateSegmentBuffer = 173 ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 174 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass)); 175 NumUserSGPRs += 4; 176 return ArgInfo.PrivateSegmentBuffer.getRegister(); 177 } 178 179 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 180 ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 181 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 182 NumUserSGPRs += 2; 183 return ArgInfo.DispatchPtr.getRegister(); 184 } 185 186 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 187 ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 188 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 189 NumUserSGPRs += 2; 190 return ArgInfo.QueuePtr.getRegister(); 191 } 192 193 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 194 ArgInfo.KernargSegmentPtr 195 = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 196 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 197 NumUserSGPRs += 2; 198 return ArgInfo.KernargSegmentPtr.getRegister(); 199 } 200 201 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 202 ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 203 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 204 NumUserSGPRs += 2; 205 return ArgInfo.DispatchID.getRegister(); 206 } 207 208 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 209 ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 210 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 211 NumUserSGPRs += 2; 212 return ArgInfo.FlatScratchInit.getRegister(); 213 } 214 215 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 216 ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 217 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 218 NumUserSGPRs += 2; 219 return ArgInfo.ImplicitBufferPtr.getRegister(); 220 } 221 222 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { 223 for (unsigned I = 0; CSRegs[I]; ++I) { 224 if (CSRegs[I] == Reg) 225 return true; 226 } 227 228 return false; 229 } 230 231 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 232 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 233 int FI) { 234 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 235 236 // This has already been allocated. 237 if (!SpillLanes.empty()) 238 return true; 239 240 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 241 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 242 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 243 MachineRegisterInfo &MRI = MF.getRegInfo(); 244 unsigned WaveSize = ST.getWavefrontSize(); 245 246 unsigned Size = FrameInfo.getObjectSize(FI); 247 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 248 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 249 250 int NumLanes = Size / 4; 251 252 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); 253 254 // Make sure to handle the case where a wide SGPR spill may span between two 255 // VGPRs. 256 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 257 unsigned LaneVGPR; 258 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 259 260 if (VGPRIndex == 0) { 261 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 262 if (LaneVGPR == AMDGPU::NoRegister) { 263 // We have no VGPRs left for spilling SGPRs. Reset because we will not 264 // partially spill the SGPR to VGPRs. 265 SGPRToVGPRSpills.erase(FI); 266 NumVGPRSpillLanes -= I; 267 return false; 268 } 269 270 Optional<int> CSRSpillFI; 271 if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs && 272 isCalleeSavedReg(CSRegs, LaneVGPR)) { 273 CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4); 274 } 275 276 SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); 277 278 // Add this register as live-in to all blocks to avoid machine verifer 279 // complaining about use of an undefined physical register. 280 for (MachineBasicBlock &BB : MF) 281 BB.addLiveIn(LaneVGPR); 282 } else { 283 LaneVGPR = SpillVGPRs.back().VGPR; 284 } 285 286 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 287 } 288 289 return true; 290 } 291 292 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { 293 for (auto &R : SGPRToVGPRSpills) 294 MFI.RemoveStackObject(R.first); 295 } 296 297 298 /// \returns VGPR used for \p Dim' work item ID. 299 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const { 300 switch (Dim) { 301 case 0: 302 assert(hasWorkItemIDX()); 303 return AMDGPU::VGPR0; 304 case 1: 305 assert(hasWorkItemIDY()); 306 return AMDGPU::VGPR1; 307 case 2: 308 assert(hasWorkItemIDZ()); 309 return AMDGPU::VGPR2; 310 } 311 llvm_unreachable("unexpected dimension"); 312 } 313 314 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { 315 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 316 return AMDGPU::SGPR0 + NumUserSGPRs; 317 } 318 319 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { 320 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 321 } 322 323 static yaml::StringValue regToString(unsigned Reg, 324 const TargetRegisterInfo &TRI) { 325 yaml::StringValue Dest; 326 { 327 raw_string_ostream OS(Dest.Value); 328 OS << printReg(Reg, &TRI); 329 } 330 return Dest; 331 } 332 333 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( 334 const llvm::SIMachineFunctionInfo& MFI, 335 const TargetRegisterInfo &TRI) 336 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), 337 MaxKernArgAlign(MFI.getMaxKernArgAlign()), 338 LDSSize(MFI.getLDSSize()), 339 IsEntryFunction(MFI.isEntryFunction()), 340 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), 341 MemoryBound(MFI.isMemoryBound()), 342 WaveLimiter(MFI.needsWaveLimiter()), 343 ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), 344 ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)), 345 FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), 346 StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)) {} 347 348 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { 349 MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this); 350 } 351 352 bool SIMachineFunctionInfo::initializeBaseYamlFields( 353 const yaml::SIMachineFunctionInfo &YamlMFI) { 354 ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; 355 MaxKernArgAlign = YamlMFI.MaxKernArgAlign; 356 LDSSize = YamlMFI.LDSSize; 357 IsEntryFunction = YamlMFI.IsEntryFunction; 358 NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; 359 MemoryBound = YamlMFI.MemoryBound; 360 WaveLimiter = YamlMFI.WaveLimiter; 361 return false; 362 } 363