1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "SIMachineFunctionInfo.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIInstrInfo.h" 13 #include "llvm/CodeGen/MachineFrameInfo.h" 14 #include "llvm/CodeGen/MachineInstrBuilder.h" 15 #include "llvm/CodeGen/MachineRegisterInfo.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/LLVMContext.h" 18 19 #define MAX_LANES 64 20 21 using namespace llvm; 22 23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 24 : AMDGPUMachineFunction(MF), 25 TIDReg(AMDGPU::NoRegister), 26 ScratchRSrcReg(AMDGPU::NoRegister), 27 ScratchWaveOffsetReg(AMDGPU::NoRegister), 28 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 29 DispatchPtrUserSGPR(AMDGPU::NoRegister), 30 QueuePtrUserSGPR(AMDGPU::NoRegister), 31 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 32 DispatchIDUserSGPR(AMDGPU::NoRegister), 33 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 34 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 35 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 36 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 37 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 38 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 39 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 40 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 41 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 42 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 43 PSInputAddr(0), 44 PSInputEnable(0), 45 ReturnsVoid(true), 46 FlatWorkGroupSizes(0, 0), 47 WavesPerEU(0, 0), 48 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), 49 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), 50 LDSWaveSpillSize(0), 51 NumUserSGPRs(0), 52 NumSystemSGPRs(0), 53 HasSpilledSGPRs(false), 54 HasSpilledVGPRs(false), 55 HasNonSpillStackObjects(false), 56 NumSpilledSGPRs(0), 57 NumSpilledVGPRs(0), 58 PrivateSegmentBuffer(false), 59 DispatchPtr(false), 60 QueuePtr(false), 61 KernargSegmentPtr(false), 62 DispatchID(false), 63 FlatScratchInit(false), 64 GridWorkgroupCountX(false), 65 GridWorkgroupCountY(false), 66 GridWorkgroupCountZ(false), 67 WorkGroupIDX(false), 68 WorkGroupIDY(false), 69 WorkGroupIDZ(false), 70 WorkGroupInfo(false), 71 PrivateSegmentWaveByteOffset(false), 72 WorkItemIDX(false), 73 WorkItemIDY(false), 74 WorkItemIDZ(false), 75 PrivateMemoryInputPtr(false) { 76 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 77 const Function *F = MF.getFunction(); 78 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); 79 WavesPerEU = ST.getWavesPerEU(*F); 80 81 // Non-entry functions have no special inputs for now. 82 // TODO: Return early for non-entry CCs. 83 84 CallingConv::ID CC = F->getCallingConv(); 85 if (CC == CallingConv::AMDGPU_PS) 86 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 87 88 if (AMDGPU::isKernel(CC)) { 89 KernargSegmentPtr = true; 90 WorkGroupIDX = true; 91 WorkItemIDX = true; 92 } 93 94 if (ST.debuggerEmitPrologue()) { 95 // Enable everything. 96 WorkGroupIDY = true; 97 WorkGroupIDZ = true; 98 WorkItemIDY = true; 99 WorkItemIDZ = true; 100 } else { 101 if (F->hasFnAttribute("amdgpu-work-group-id-y")) 102 WorkGroupIDY = true; 103 104 if (F->hasFnAttribute("amdgpu-work-group-id-z")) 105 WorkGroupIDZ = true; 106 107 if (F->hasFnAttribute("amdgpu-work-item-id-y")) 108 WorkItemIDY = true; 109 110 if (F->hasFnAttribute("amdgpu-work-item-id-z")) 111 WorkItemIDZ = true; 112 } 113 114 // X, XY, and XYZ are the only supported combinations, so make sure Y is 115 // enabled if Z is. 116 if (WorkItemIDZ) 117 WorkItemIDY = true; 118 119 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 120 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 121 bool HasStackObjects = FrameInfo.hasStackObjects(); 122 123 if (HasStackObjects || MaySpill) 124 PrivateSegmentWaveByteOffset = true; 125 126 if (ST.isAmdCodeObjectV2(MF)) { 127 if (HasStackObjects || MaySpill) 128 PrivateSegmentBuffer = true; 129 130 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 131 DispatchPtr = true; 132 133 if (F->hasFnAttribute("amdgpu-queue-ptr")) 134 QueuePtr = true; 135 136 if (F->hasFnAttribute("amdgpu-dispatch-id")) 137 DispatchID = true; 138 } else if (ST.isMesaGfxShader(MF)) { 139 if (HasStackObjects || MaySpill) 140 PrivateMemoryInputPtr = true; 141 } 142 143 // We don't need to worry about accessing spills with flat instructions. 144 // TODO: On VI where we must use flat for global, we should be able to omit 145 // this if it is never used for generic access. 146 if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS()) 147 FlatScratchInit = true; 148 } 149 150 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 151 const SIRegisterInfo &TRI) { 152 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 153 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 154 NumUserSGPRs += 4; 155 return PrivateSegmentBufferUserSGPR; 156 } 157 158 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 159 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 160 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 161 NumUserSGPRs += 2; 162 return DispatchPtrUserSGPR; 163 } 164 165 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 166 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 167 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 168 NumUserSGPRs += 2; 169 return QueuePtrUserSGPR; 170 } 171 172 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 173 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 174 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 175 NumUserSGPRs += 2; 176 return KernargSegmentPtrUserSGPR; 177 } 178 179 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 180 DispatchIDUserSGPR = TRI.getMatchingSuperReg( 181 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 182 NumUserSGPRs += 2; 183 return DispatchIDUserSGPR; 184 } 185 186 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 187 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 188 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 189 NumUserSGPRs += 2; 190 return FlatScratchInitUserSGPR; 191 } 192 193 unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { 194 PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( 195 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 196 NumUserSGPRs += 2; 197 return PrivateMemoryPtrUserSGPR; 198 } 199 200 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 201 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 202 int FI) { 203 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 204 205 // This has already been allocated. 206 if (!SpillLanes.empty()) 207 return true; 208 209 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 210 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 211 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 212 MachineRegisterInfo &MRI = MF.getRegInfo(); 213 unsigned WaveSize = ST.getWavefrontSize(); 214 215 unsigned Size = FrameInfo.getObjectSize(FI); 216 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 217 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 218 219 int NumLanes = Size / 4; 220 221 // Make sure to handle the case where a wide SGPR spill may span between two 222 // VGPRs. 223 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 224 unsigned LaneVGPR; 225 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 226 227 if (VGPRIndex == 0) { 228 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 229 if (LaneVGPR == AMDGPU::NoRegister) { 230 // We have no VGPRs left for spilling SGPRs. Reset because we won't 231 // partially spill the SGPR to VGPRs. 232 SGPRToVGPRSpills.erase(FI); 233 NumVGPRSpillLanes -= I; 234 return false; 235 } 236 237 SpillVGPRs.push_back(LaneVGPR); 238 239 // Add this register as live-in to all blocks to avoid machine verifer 240 // complaining about use of an undefined physical register. 241 for (MachineBasicBlock &BB : MF) 242 BB.addLiveIn(LaneVGPR); 243 } else { 244 LaneVGPR = SpillVGPRs.back(); 245 } 246 247 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 248 } 249 250 return true; 251 } 252 253 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { 254 for (auto &R : SGPRToVGPRSpills) 255 MFI.RemoveStackObject(R.first); 256 } 257