1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "SIMachineFunctionInfo.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIInstrInfo.h" 13 #include "llvm/CodeGen/MachineFrameInfo.h" 14 #include "llvm/CodeGen/MachineInstrBuilder.h" 15 #include "llvm/CodeGen/MachineRegisterInfo.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/LLVMContext.h" 18 19 #define MAX_LANES 64 20 21 using namespace llvm; 22 23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 24 : AMDGPUMachineFunction(MF), 25 TIDReg(AMDGPU::NoRegister), 26 ScratchRSrcReg(AMDGPU::NoRegister), 27 ScratchWaveOffsetReg(AMDGPU::NoRegister), 28 FrameOffsetReg(AMDGPU::NoRegister), 29 StackPtrOffsetReg(AMDGPU::NoRegister), 30 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 31 DispatchPtrUserSGPR(AMDGPU::NoRegister), 32 QueuePtrUserSGPR(AMDGPU::NoRegister), 33 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 34 DispatchIDUserSGPR(AMDGPU::NoRegister), 35 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 36 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 37 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 38 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 39 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 40 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 41 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 42 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 43 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 44 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 45 PSInputAddr(0), 46 PSInputEnable(0), 47 ReturnsVoid(true), 48 FlatWorkGroupSizes(0, 0), 49 WavesPerEU(0, 0), 50 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), 51 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), 52 LDSWaveSpillSize(0), 53 NumUserSGPRs(0), 54 NumSystemSGPRs(0), 55 HasSpilledSGPRs(false), 56 HasSpilledVGPRs(false), 57 HasNonSpillStackObjects(false), 58 NumSpilledSGPRs(0), 59 NumSpilledVGPRs(0), 60 PrivateSegmentBuffer(false), 61 DispatchPtr(false), 62 QueuePtr(false), 63 KernargSegmentPtr(false), 64 DispatchID(false), 65 FlatScratchInit(false), 66 GridWorkgroupCountX(false), 67 GridWorkgroupCountY(false), 68 GridWorkgroupCountZ(false), 69 WorkGroupIDX(false), 70 WorkGroupIDY(false), 71 WorkGroupIDZ(false), 72 WorkGroupInfo(false), 73 PrivateSegmentWaveByteOffset(false), 74 WorkItemIDX(false), 75 WorkItemIDY(false), 76 WorkItemIDZ(false), 77 PrivateMemoryInputPtr(false) { 78 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 79 const Function *F = MF.getFunction(); 80 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); 81 WavesPerEU = ST.getWavesPerEU(*F); 82 83 if (!isEntryFunction()) { 84 // Non-entry functions have no special inputs for now, other registers 85 // required for scratch access. 86 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 87 ScratchWaveOffsetReg = AMDGPU::SGPR4; 88 FrameOffsetReg = AMDGPU::SGPR5; 89 return; 90 } 91 92 CallingConv::ID CC = F->getCallingConv(); 93 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { 94 KernargSegmentPtr = true; 95 WorkGroupIDX = true; 96 WorkItemIDX = true; 97 } else if (CC == CallingConv::AMDGPU_PS) { 98 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 99 } 100 101 if (ST.debuggerEmitPrologue()) { 102 // Enable everything. 103 WorkGroupIDY = true; 104 WorkGroupIDZ = true; 105 WorkItemIDY = true; 106 WorkItemIDZ = true; 107 } else { 108 if (F->hasFnAttribute("amdgpu-work-group-id-y")) 109 WorkGroupIDY = true; 110 111 if (F->hasFnAttribute("amdgpu-work-group-id-z")) 112 WorkGroupIDZ = true; 113 114 if (F->hasFnAttribute("amdgpu-work-item-id-y")) 115 WorkItemIDY = true; 116 117 if (F->hasFnAttribute("amdgpu-work-item-id-z")) 118 WorkItemIDZ = true; 119 } 120 121 // X, XY, and XYZ are the only supported combinations, so make sure Y is 122 // enabled if Z is. 123 if (WorkItemIDZ) 124 WorkItemIDY = true; 125 126 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 127 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 128 bool HasStackObjects = FrameInfo.hasStackObjects() || FrameInfo.hasCalls(); 129 130 if (HasStackObjects || MaySpill) { 131 PrivateSegmentWaveByteOffset = true; 132 133 // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 134 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 135 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 136 PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; 137 } 138 139 if (ST.isAmdCodeObjectV2(MF)) { 140 if (HasStackObjects || MaySpill) 141 PrivateSegmentBuffer = true; 142 143 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 144 DispatchPtr = true; 145 146 if (F->hasFnAttribute("amdgpu-queue-ptr")) 147 QueuePtr = true; 148 149 if (F->hasFnAttribute("amdgpu-dispatch-id")) 150 DispatchID = true; 151 } else if (ST.isMesaGfxShader(MF)) { 152 if (HasStackObjects || MaySpill) 153 PrivateMemoryInputPtr = true; 154 } 155 156 // We don't need to worry about accessing spills with flat instructions. 157 // TODO: On VI where we must use flat for global, we should be able to omit 158 // this if it is never used for generic access. 159 if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS()) 160 FlatScratchInit = true; 161 } 162 163 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 164 const SIRegisterInfo &TRI) { 165 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 166 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 167 NumUserSGPRs += 4; 168 return PrivateSegmentBufferUserSGPR; 169 } 170 171 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 172 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 173 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 174 NumUserSGPRs += 2; 175 return DispatchPtrUserSGPR; 176 } 177 178 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 179 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 180 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 181 NumUserSGPRs += 2; 182 return QueuePtrUserSGPR; 183 } 184 185 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 186 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 187 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 188 NumUserSGPRs += 2; 189 return KernargSegmentPtrUserSGPR; 190 } 191 192 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 193 DispatchIDUserSGPR = TRI.getMatchingSuperReg( 194 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 195 NumUserSGPRs += 2; 196 return DispatchIDUserSGPR; 197 } 198 199 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 200 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 201 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 202 NumUserSGPRs += 2; 203 return FlatScratchInitUserSGPR; 204 } 205 206 unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { 207 PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( 208 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 209 NumUserSGPRs += 2; 210 return PrivateMemoryPtrUserSGPR; 211 } 212 213 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 214 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 215 int FI) { 216 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 217 218 // This has already been allocated. 219 if (!SpillLanes.empty()) 220 return true; 221 222 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 223 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 224 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 225 MachineRegisterInfo &MRI = MF.getRegInfo(); 226 unsigned WaveSize = ST.getWavefrontSize(); 227 228 unsigned Size = FrameInfo.getObjectSize(FI); 229 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 230 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 231 232 int NumLanes = Size / 4; 233 234 // Make sure to handle the case where a wide SGPR spill may span between two 235 // VGPRs. 236 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 237 unsigned LaneVGPR; 238 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 239 240 if (VGPRIndex == 0) { 241 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 242 if (LaneVGPR == AMDGPU::NoRegister) { 243 // We have no VGPRs left for spilling SGPRs. Reset because we won't 244 // partially spill the SGPR to VGPRs. 245 SGPRToVGPRSpills.erase(FI); 246 NumVGPRSpillLanes -= I; 247 return false; 248 } 249 250 SpillVGPRs.push_back(LaneVGPR); 251 252 // Add this register as live-in to all blocks to avoid machine verifer 253 // complaining about use of an undefined physical register. 254 for (MachineBasicBlock &BB : MF) 255 BB.addLiveIn(LaneVGPR); 256 } else { 257 LaneVGPR = SpillVGPRs.back(); 258 } 259 260 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 261 } 262 263 return true; 264 } 265 266 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { 267 for (auto &R : SGPRToVGPRSpills) 268 MFI.RemoveStackObject(R.first); 269 } 270