1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "SIMachineFunctionInfo.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIInstrInfo.h" 13 #include "llvm/CodeGen/MachineFrameInfo.h" 14 #include "llvm/CodeGen/MachineInstrBuilder.h" 15 #include "llvm/CodeGen/MachineRegisterInfo.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/LLVMContext.h" 18 19 #define MAX_LANES 64 20 21 using namespace llvm; 22 23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 24 : AMDGPUMachineFunction(MF), 25 TIDReg(AMDGPU::NoRegister), 26 ScratchRSrcReg(AMDGPU::NoRegister), 27 ScratchWaveOffsetReg(AMDGPU::NoRegister), 28 FrameOffsetReg(AMDGPU::NoRegister), 29 StackPtrOffsetReg(AMDGPU::NoRegister), 30 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 31 DispatchPtrUserSGPR(AMDGPU::NoRegister), 32 QueuePtrUserSGPR(AMDGPU::NoRegister), 33 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 34 DispatchIDUserSGPR(AMDGPU::NoRegister), 35 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 36 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 37 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 38 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 39 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 40 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 41 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 42 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 43 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 44 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 45 PSInputAddr(0), 46 PSInputEnable(0), 47 ReturnsVoid(true), 48 FlatWorkGroupSizes(0, 0), 49 WavesPerEU(0, 0), 50 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), 51 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), 52 LDSWaveSpillSize(0), 53 NumUserSGPRs(0), 54 NumSystemSGPRs(0), 55 HasSpilledSGPRs(false), 56 HasSpilledVGPRs(false), 57 HasNonSpillStackObjects(false), 58 NumSpilledSGPRs(0), 59 NumSpilledVGPRs(0), 60 PrivateSegmentBuffer(false), 61 DispatchPtr(false), 62 QueuePtr(false), 63 KernargSegmentPtr(false), 64 DispatchID(false), 65 FlatScratchInit(false), 66 GridWorkgroupCountX(false), 67 GridWorkgroupCountY(false), 68 GridWorkgroupCountZ(false), 69 WorkGroupIDX(false), 70 WorkGroupIDY(false), 71 WorkGroupIDZ(false), 72 WorkGroupInfo(false), 73 PrivateSegmentWaveByteOffset(false), 74 WorkItemIDX(false), 75 WorkItemIDY(false), 76 WorkItemIDZ(false), 77 ImplicitBufferPtr(false) { 78 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 79 const Function *F = MF.getFunction(); 80 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); 81 WavesPerEU = ST.getWavesPerEU(*F); 82 83 if (!isEntryFunction()) { 84 // Non-entry functions have no special inputs for now, other registers 85 // required for scratch access. 86 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 87 ScratchWaveOffsetReg = AMDGPU::SGPR4; 88 FrameOffsetReg = AMDGPU::SGPR5; 89 StackPtrOffsetReg = AMDGPU::SGPR32; 90 return; 91 } 92 93 CallingConv::ID CC = F->getCallingConv(); 94 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { 95 KernargSegmentPtr = true; 96 WorkGroupIDX = true; 97 WorkItemIDX = true; 98 } else if (CC == CallingConv::AMDGPU_PS) { 99 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 100 } 101 102 if (ST.debuggerEmitPrologue()) { 103 // Enable everything. 104 WorkGroupIDY = true; 105 WorkGroupIDZ = true; 106 WorkItemIDY = true; 107 WorkItemIDZ = true; 108 } else { 109 if (F->hasFnAttribute("amdgpu-work-group-id-y")) 110 WorkGroupIDY = true; 111 112 if (F->hasFnAttribute("amdgpu-work-group-id-z")) 113 WorkGroupIDZ = true; 114 115 if (F->hasFnAttribute("amdgpu-work-item-id-y")) 116 WorkItemIDY = true; 117 118 if (F->hasFnAttribute("amdgpu-work-item-id-z")) 119 WorkItemIDZ = true; 120 } 121 122 // X, XY, and XYZ are the only supported combinations, so make sure Y is 123 // enabled if Z is. 124 if (WorkItemIDZ) 125 WorkItemIDY = true; 126 127 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 128 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 129 bool HasStackObjects = FrameInfo.hasStackObjects() || FrameInfo.hasCalls(); 130 131 if (HasStackObjects || MaySpill) { 132 PrivateSegmentWaveByteOffset = true; 133 134 // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 135 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 136 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 137 PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; 138 } 139 140 if (ST.isAmdCodeObjectV2(MF)) { 141 if (HasStackObjects || MaySpill) 142 PrivateSegmentBuffer = true; 143 144 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 145 DispatchPtr = true; 146 147 if (F->hasFnAttribute("amdgpu-queue-ptr")) 148 QueuePtr = true; 149 150 if (F->hasFnAttribute("amdgpu-dispatch-id")) 151 DispatchID = true; 152 } else if (ST.isMesaGfxShader(MF)) { 153 if (HasStackObjects || MaySpill) 154 ImplicitBufferPtr = true; 155 } 156 157 // We don't need to worry about accessing spills with flat instructions. 158 // TODO: On VI where we must use flat for global, we should be able to omit 159 // this if it is never used for generic access. 160 if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS()) 161 FlatScratchInit = true; 162 } 163 164 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 165 const SIRegisterInfo &TRI) { 166 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 167 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 168 NumUserSGPRs += 4; 169 return PrivateSegmentBufferUserSGPR; 170 } 171 172 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 173 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 174 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 175 NumUserSGPRs += 2; 176 return DispatchPtrUserSGPR; 177 } 178 179 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 180 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 181 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 182 NumUserSGPRs += 2; 183 return QueuePtrUserSGPR; 184 } 185 186 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 187 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 188 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 189 NumUserSGPRs += 2; 190 return KernargSegmentPtrUserSGPR; 191 } 192 193 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 194 DispatchIDUserSGPR = TRI.getMatchingSuperReg( 195 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 196 NumUserSGPRs += 2; 197 return DispatchIDUserSGPR; 198 } 199 200 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 201 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 202 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 203 NumUserSGPRs += 2; 204 return FlatScratchInitUserSGPR; 205 } 206 207 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 208 ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg( 209 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 210 NumUserSGPRs += 2; 211 return ImplicitBufferPtrUserSGPR; 212 } 213 214 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 215 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 216 int FI) { 217 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 218 219 // This has already been allocated. 220 if (!SpillLanes.empty()) 221 return true; 222 223 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 224 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 225 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 226 MachineRegisterInfo &MRI = MF.getRegInfo(); 227 unsigned WaveSize = ST.getWavefrontSize(); 228 229 unsigned Size = FrameInfo.getObjectSize(FI); 230 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 231 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 232 233 int NumLanes = Size / 4; 234 235 // Make sure to handle the case where a wide SGPR spill may span between two 236 // VGPRs. 237 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 238 unsigned LaneVGPR; 239 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 240 241 if (VGPRIndex == 0) { 242 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 243 if (LaneVGPR == AMDGPU::NoRegister) { 244 // We have no VGPRs left for spilling SGPRs. Reset because we won't 245 // partially spill the SGPR to VGPRs. 246 SGPRToVGPRSpills.erase(FI); 247 NumVGPRSpillLanes -= I; 248 return false; 249 } 250 251 SpillVGPRs.push_back(LaneVGPR); 252 253 // Add this register as live-in to all blocks to avoid machine verifer 254 // complaining about use of an undefined physical register. 255 for (MachineBasicBlock &BB : MF) 256 BB.addLiveIn(LaneVGPR); 257 } else { 258 LaneVGPR = SpillVGPRs.back(); 259 } 260 261 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 262 } 263 264 return true; 265 } 266 267 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { 268 for (auto &R : SGPRToVGPRSpills) 269 MFI.RemoveStackObject(R.first); 270 } 271