1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "SIMachineFunctionInfo.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIInstrInfo.h" 13 #include "llvm/CodeGen/MachineFrameInfo.h" 14 #include "llvm/CodeGen/MachineInstrBuilder.h" 15 #include "llvm/CodeGen/MachineRegisterInfo.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/LLVMContext.h" 18 19 #define MAX_LANES 64 20 21 using namespace llvm; 22 23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 24 : AMDGPUMachineFunction(MF), 25 TIDReg(AMDGPU::NoRegister), 26 ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG), 27 ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG), 28 FrameOffsetReg(AMDGPU::FP_REG), 29 StackPtrOffsetReg(AMDGPU::SP_REG), 30 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 31 DispatchPtrUserSGPR(AMDGPU::NoRegister), 32 QueuePtrUserSGPR(AMDGPU::NoRegister), 33 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 34 DispatchIDUserSGPR(AMDGPU::NoRegister), 35 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 36 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 37 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 38 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 39 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 40 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 41 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 42 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 43 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 44 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 45 WorkItemIDXVGPR(AMDGPU::NoRegister), 46 WorkItemIDYVGPR(AMDGPU::NoRegister), 47 WorkItemIDZVGPR(AMDGPU::NoRegister), 48 PSInputAddr(0), 49 PSInputEnable(0), 50 ReturnsVoid(true), 51 FlatWorkGroupSizes(0, 0), 52 WavesPerEU(0, 0), 53 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), 54 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), 55 LDSWaveSpillSize(0), 56 NumUserSGPRs(0), 57 NumSystemSGPRs(0), 58 HasSpilledSGPRs(false), 59 HasSpilledVGPRs(false), 60 HasNonSpillStackObjects(false), 61 NumSpilledSGPRs(0), 62 NumSpilledVGPRs(0), 63 PrivateSegmentBuffer(false), 64 DispatchPtr(false), 65 QueuePtr(false), 66 KernargSegmentPtr(false), 67 DispatchID(false), 68 FlatScratchInit(false), 69 GridWorkgroupCountX(false), 70 GridWorkgroupCountY(false), 71 GridWorkgroupCountZ(false), 72 WorkGroupIDX(false), 73 WorkGroupIDY(false), 74 WorkGroupIDZ(false), 75 WorkGroupInfo(false), 76 PrivateSegmentWaveByteOffset(false), 77 WorkItemIDX(false), 78 WorkItemIDY(false), 79 WorkItemIDZ(false), 80 ImplicitBufferPtr(false) { 81 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 82 const Function *F = MF.getFunction(); 83 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); 84 WavesPerEU = ST.getWavesPerEU(*F); 85 86 if (!isEntryFunction()) { 87 // Non-entry functions have no special inputs for now, other registers 88 // required for scratch access. 89 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 90 ScratchWaveOffsetReg = AMDGPU::SGPR4; 91 FrameOffsetReg = AMDGPU::SGPR5; 92 StackPtrOffsetReg = AMDGPU::SGPR32; 93 94 // FIXME: Not really a system SGPR. 95 PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg; 96 } 97 98 CallingConv::ID CC = F->getCallingConv(); 99 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { 100 KernargSegmentPtr = !F->arg_empty(); 101 WorkGroupIDX = true; 102 WorkItemIDX = true; 103 } else if (CC == CallingConv::AMDGPU_PS) { 104 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 105 } 106 107 if (ST.debuggerEmitPrologue()) { 108 // Enable everything. 109 WorkGroupIDX = true; 110 WorkGroupIDY = true; 111 WorkGroupIDZ = true; 112 WorkItemIDX = true; 113 WorkItemIDY = true; 114 WorkItemIDZ = true; 115 } else { 116 if (F->hasFnAttribute("amdgpu-work-group-id-x")) 117 WorkGroupIDX = true; 118 119 if (F->hasFnAttribute("amdgpu-work-group-id-y")) 120 WorkGroupIDY = true; 121 122 if (F->hasFnAttribute("amdgpu-work-group-id-z")) 123 WorkGroupIDZ = true; 124 125 if (F->hasFnAttribute("amdgpu-work-item-id-x")) 126 WorkItemIDX = true; 127 128 if (F->hasFnAttribute("amdgpu-work-item-id-y")) 129 WorkItemIDY = true; 130 131 if (F->hasFnAttribute("amdgpu-work-item-id-z")) 132 WorkItemIDZ = true; 133 } 134 135 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 136 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 137 bool HasStackObjects = FrameInfo.hasStackObjects(); 138 139 if (isEntryFunction()) { 140 // X, XY, and XYZ are the only supported combinations, so make sure Y is 141 // enabled if Z is. 142 if (WorkItemIDZ) 143 WorkItemIDY = true; 144 145 if (HasStackObjects || MaySpill) { 146 PrivateSegmentWaveByteOffset = true; 147 148 // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 149 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 150 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 151 PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; 152 } 153 } 154 155 bool IsCOV2 = ST.isAmdCodeObjectV2(MF); 156 if (IsCOV2) { 157 if (HasStackObjects || MaySpill) 158 PrivateSegmentBuffer = true; 159 160 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 161 DispatchPtr = true; 162 163 if (F->hasFnAttribute("amdgpu-queue-ptr")) 164 QueuePtr = true; 165 166 if (F->hasFnAttribute("amdgpu-dispatch-id")) 167 DispatchID = true; 168 } else if (ST.isMesaGfxShader(MF)) { 169 if (HasStackObjects || MaySpill) 170 ImplicitBufferPtr = true; 171 } 172 173 if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr")) 174 KernargSegmentPtr = true; 175 176 if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) { 177 // TODO: This could be refined a lot. The attribute is a poor way of 178 // detecting calls that may require it before argument lowering. 179 if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch")) 180 FlatScratchInit = true; 181 } 182 } 183 184 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 185 const SIRegisterInfo &TRI) { 186 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 187 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 188 NumUserSGPRs += 4; 189 return PrivateSegmentBufferUserSGPR; 190 } 191 192 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 193 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 194 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 195 NumUserSGPRs += 2; 196 return DispatchPtrUserSGPR; 197 } 198 199 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 200 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 201 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 202 NumUserSGPRs += 2; 203 return QueuePtrUserSGPR; 204 } 205 206 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 207 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 208 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 209 NumUserSGPRs += 2; 210 return KernargSegmentPtrUserSGPR; 211 } 212 213 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 214 DispatchIDUserSGPR = TRI.getMatchingSuperReg( 215 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 216 NumUserSGPRs += 2; 217 return DispatchIDUserSGPR; 218 } 219 220 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 221 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 222 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 223 NumUserSGPRs += 2; 224 return FlatScratchInitUserSGPR; 225 } 226 227 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 228 ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg( 229 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 230 NumUserSGPRs += 2; 231 return ImplicitBufferPtrUserSGPR; 232 } 233 234 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 235 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 236 int FI) { 237 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 238 239 // This has already been allocated. 240 if (!SpillLanes.empty()) 241 return true; 242 243 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 244 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 245 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 246 MachineRegisterInfo &MRI = MF.getRegInfo(); 247 unsigned WaveSize = ST.getWavefrontSize(); 248 249 unsigned Size = FrameInfo.getObjectSize(FI); 250 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 251 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 252 253 int NumLanes = Size / 4; 254 255 // Make sure to handle the case where a wide SGPR spill may span between two 256 // VGPRs. 257 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 258 unsigned LaneVGPR; 259 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 260 261 if (VGPRIndex == 0) { 262 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 263 if (LaneVGPR == AMDGPU::NoRegister) { 264 // We have no VGPRs left for spilling SGPRs. Reset because we won't 265 // partially spill the SGPR to VGPRs. 266 SGPRToVGPRSpills.erase(FI); 267 NumVGPRSpillLanes -= I; 268 return false; 269 } 270 271 SpillVGPRs.push_back(LaneVGPR); 272 273 // Add this register as live-in to all blocks to avoid machine verifer 274 // complaining about use of an undefined physical register. 275 for (MachineBasicBlock &BB : MF) 276 BB.addLiveIn(LaneVGPR); 277 } else { 278 LaneVGPR = SpillVGPRs.back(); 279 } 280 281 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 282 } 283 284 return true; 285 } 286 287 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { 288 for (auto &R : SGPRToVGPRSpills) 289 MFI.RemoveStackObject(R.first); 290 } 291