1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "SIMachineFunctionInfo.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIInstrInfo.h" 13 #include "llvm/CodeGen/MachineFrameInfo.h" 14 #include "llvm/CodeGen/MachineInstrBuilder.h" 15 #include "llvm/CodeGen/MachineRegisterInfo.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/LLVMContext.h" 18 19 #define MAX_LANES 64 20 21 using namespace llvm; 22 23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 24 : AMDGPUMachineFunction(MF), 25 TIDReg(AMDGPU::NoRegister), 26 ScratchRSrcReg(AMDGPU::NoRegister), 27 ScratchWaveOffsetReg(AMDGPU::NoRegister), 28 FrameOffsetReg(AMDGPU::NoRegister), 29 StackPtrOffsetReg(AMDGPU::NoRegister), 30 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 31 DispatchPtrUserSGPR(AMDGPU::NoRegister), 32 QueuePtrUserSGPR(AMDGPU::NoRegister), 33 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 34 DispatchIDUserSGPR(AMDGPU::NoRegister), 35 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 36 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 37 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 38 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 39 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 40 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 41 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 42 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 43 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 44 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 45 PSInputAddr(0), 46 PSInputEnable(0), 47 ReturnsVoid(true), 48 FlatWorkGroupSizes(0, 0), 49 WavesPerEU(0, 0), 50 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), 51 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), 52 LDSWaveSpillSize(0), 53 NumUserSGPRs(0), 54 NumSystemSGPRs(0), 55 HasSpilledSGPRs(false), 56 HasSpilledVGPRs(false), 57 HasNonSpillStackObjects(false), 58 NumSpilledSGPRs(0), 59 NumSpilledVGPRs(0), 60 PrivateSegmentBuffer(false), 61 DispatchPtr(false), 62 QueuePtr(false), 63 KernargSegmentPtr(false), 64 DispatchID(false), 65 FlatScratchInit(false), 66 GridWorkgroupCountX(false), 67 GridWorkgroupCountY(false), 68 GridWorkgroupCountZ(false), 69 WorkGroupIDX(false), 70 WorkGroupIDY(false), 71 WorkGroupIDZ(false), 72 WorkGroupInfo(false), 73 PrivateSegmentWaveByteOffset(false), 74 WorkItemIDX(false), 75 WorkItemIDY(false), 76 WorkItemIDZ(false), 77 PrivateMemoryInputPtr(false) { 78 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 79 const Function *F = MF.getFunction(); 80 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); 81 WavesPerEU = ST.getWavesPerEU(*F); 82 83 // Non-entry functions have no special inputs for now. 84 // TODO: Return early for non-entry CCs. 85 86 CallingConv::ID CC = F->getCallingConv(); 87 if (CC == CallingConv::AMDGPU_PS) 88 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 89 90 if (AMDGPU::isKernel(CC)) { 91 KernargSegmentPtr = true; 92 WorkGroupIDX = true; 93 WorkItemIDX = true; 94 } 95 96 if (ST.debuggerEmitPrologue()) { 97 // Enable everything. 98 WorkGroupIDY = true; 99 WorkGroupIDZ = true; 100 WorkItemIDY = true; 101 WorkItemIDZ = true; 102 } else { 103 if (F->hasFnAttribute("amdgpu-work-group-id-y")) 104 WorkGroupIDY = true; 105 106 if (F->hasFnAttribute("amdgpu-work-group-id-z")) 107 WorkGroupIDZ = true; 108 109 if (F->hasFnAttribute("amdgpu-work-item-id-y")) 110 WorkItemIDY = true; 111 112 if (F->hasFnAttribute("amdgpu-work-item-id-z")) 113 WorkItemIDZ = true; 114 } 115 116 // X, XY, and XYZ are the only supported combinations, so make sure Y is 117 // enabled if Z is. 118 if (WorkItemIDZ) 119 WorkItemIDY = true; 120 121 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 122 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 123 bool HasStackObjects = FrameInfo.hasStackObjects(); 124 125 if (HasStackObjects || MaySpill) { 126 PrivateSegmentWaveByteOffset = true; 127 128 // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 129 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 130 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 131 PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; 132 } 133 134 if (ST.isAmdCodeObjectV2(MF)) { 135 if (HasStackObjects || MaySpill) 136 PrivateSegmentBuffer = true; 137 138 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 139 DispatchPtr = true; 140 141 if (F->hasFnAttribute("amdgpu-queue-ptr")) 142 QueuePtr = true; 143 144 if (F->hasFnAttribute("amdgpu-dispatch-id")) 145 DispatchID = true; 146 } else if (ST.isMesaGfxShader(MF)) { 147 if (HasStackObjects || MaySpill) 148 PrivateMemoryInputPtr = true; 149 } 150 151 // We don't need to worry about accessing spills with flat instructions. 152 // TODO: On VI where we must use flat for global, we should be able to omit 153 // this if it is never used for generic access. 154 if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS()) 155 FlatScratchInit = true; 156 } 157 158 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 159 const SIRegisterInfo &TRI) { 160 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 161 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 162 NumUserSGPRs += 4; 163 return PrivateSegmentBufferUserSGPR; 164 } 165 166 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 167 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 168 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 169 NumUserSGPRs += 2; 170 return DispatchPtrUserSGPR; 171 } 172 173 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 174 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 175 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 176 NumUserSGPRs += 2; 177 return QueuePtrUserSGPR; 178 } 179 180 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 181 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 182 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 183 NumUserSGPRs += 2; 184 return KernargSegmentPtrUserSGPR; 185 } 186 187 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 188 DispatchIDUserSGPR = TRI.getMatchingSuperReg( 189 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 190 NumUserSGPRs += 2; 191 return DispatchIDUserSGPR; 192 } 193 194 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 195 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 196 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 197 NumUserSGPRs += 2; 198 return FlatScratchInitUserSGPR; 199 } 200 201 unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { 202 PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( 203 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 204 NumUserSGPRs += 2; 205 return PrivateMemoryPtrUserSGPR; 206 } 207 208 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 209 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 210 int FI) { 211 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 212 213 // This has already been allocated. 214 if (!SpillLanes.empty()) 215 return true; 216 217 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 218 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 219 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 220 MachineRegisterInfo &MRI = MF.getRegInfo(); 221 unsigned WaveSize = ST.getWavefrontSize(); 222 223 unsigned Size = FrameInfo.getObjectSize(FI); 224 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 225 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 226 227 int NumLanes = Size / 4; 228 229 // Make sure to handle the case where a wide SGPR spill may span between two 230 // VGPRs. 231 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 232 unsigned LaneVGPR; 233 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 234 235 if (VGPRIndex == 0) { 236 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 237 if (LaneVGPR == AMDGPU::NoRegister) { 238 // We have no VGPRs left for spilling SGPRs. Reset because we won't 239 // partially spill the SGPR to VGPRs. 240 SGPRToVGPRSpills.erase(FI); 241 NumVGPRSpillLanes -= I; 242 return false; 243 } 244 245 SpillVGPRs.push_back(LaneVGPR); 246 247 // Add this register as live-in to all blocks to avoid machine verifer 248 // complaining about use of an undefined physical register. 249 for (MachineBasicBlock &BB : MF) 250 BB.addLiveIn(LaneVGPR); 251 } else { 252 LaneVGPR = SpillVGPRs.back(); 253 } 254 255 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 256 } 257 258 return true; 259 } 260 261 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { 262 for (auto &R : SGPRToVGPRSpills) 263 MFI.RemoveStackObject(R.first); 264 } 265