1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "SIMachineFunctionInfo.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIInstrInfo.h" 13 #include "llvm/CodeGen/MachineFrameInfo.h" 14 #include "llvm/CodeGen/MachineInstrBuilder.h" 15 #include "llvm/CodeGen/MachineRegisterInfo.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/LLVMContext.h" 18 19 #define MAX_LANES 64 20 21 using namespace llvm; 22 23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 24 : AMDGPUMachineFunction(MF), 25 TIDReg(AMDGPU::NoRegister), 26 ScratchRSrcReg(AMDGPU::NoRegister), 27 ScratchWaveOffsetReg(AMDGPU::NoRegister), 28 FrameOffsetReg(AMDGPU::NoRegister), 29 StackPtrOffsetReg(AMDGPU::NoRegister), 30 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 31 DispatchPtrUserSGPR(AMDGPU::NoRegister), 32 QueuePtrUserSGPR(AMDGPU::NoRegister), 33 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 34 DispatchIDUserSGPR(AMDGPU::NoRegister), 35 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 36 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 37 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 38 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 39 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 40 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 41 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 42 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 43 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 44 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 45 PSInputAddr(0), 46 PSInputEnable(0), 47 ReturnsVoid(true), 48 FlatWorkGroupSizes(0, 0), 49 WavesPerEU(0, 0), 50 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), 51 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), 52 LDSWaveSpillSize(0), 53 NumUserSGPRs(0), 54 NumSystemSGPRs(0), 55 HasSpilledSGPRs(false), 56 HasSpilledVGPRs(false), 57 HasNonSpillStackObjects(false), 58 NumSpilledSGPRs(0), 59 NumSpilledVGPRs(0), 60 PrivateSegmentBuffer(false), 61 DispatchPtr(false), 62 QueuePtr(false), 63 KernargSegmentPtr(false), 64 DispatchID(false), 65 FlatScratchInit(false), 66 GridWorkgroupCountX(false), 67 GridWorkgroupCountY(false), 68 GridWorkgroupCountZ(false), 69 WorkGroupIDX(false), 70 WorkGroupIDY(false), 71 WorkGroupIDZ(false), 72 WorkGroupInfo(false), 73 PrivateSegmentWaveByteOffset(false), 74 WorkItemIDX(false), 75 WorkItemIDY(false), 76 WorkItemIDZ(false), 77 PrivateMemoryInputPtr(false) { 78 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 79 const Function *F = MF.getFunction(); 80 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); 81 WavesPerEU = ST.getWavesPerEU(*F); 82 83 // Non-entry functions have no special inputs for now. 84 // TODO: Return early for non-entry CCs. 85 86 CallingConv::ID CC = F->getCallingConv(); 87 if (CC == CallingConv::AMDGPU_PS) 88 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 89 90 if (AMDGPU::isKernel(CC)) { 91 KernargSegmentPtr = true; 92 WorkGroupIDX = true; 93 WorkItemIDX = true; 94 } 95 96 if (ST.debuggerEmitPrologue()) { 97 // Enable everything. 98 WorkGroupIDY = true; 99 WorkGroupIDZ = true; 100 WorkItemIDY = true; 101 WorkItemIDZ = true; 102 } else { 103 if (F->hasFnAttribute("amdgpu-work-group-id-y")) 104 WorkGroupIDY = true; 105 106 if (F->hasFnAttribute("amdgpu-work-group-id-z")) 107 WorkGroupIDZ = true; 108 109 if (F->hasFnAttribute("amdgpu-work-item-id-y")) 110 WorkItemIDY = true; 111 112 if (F->hasFnAttribute("amdgpu-work-item-id-z")) 113 WorkItemIDZ = true; 114 } 115 116 // X, XY, and XYZ are the only supported combinations, so make sure Y is 117 // enabled if Z is. 118 if (WorkItemIDZ) 119 WorkItemIDY = true; 120 121 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 122 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 123 bool HasStackObjects = FrameInfo.hasStackObjects(); 124 125 if (HasStackObjects || MaySpill) 126 PrivateSegmentWaveByteOffset = true; 127 128 if (ST.isAmdCodeObjectV2(MF)) { 129 if (HasStackObjects || MaySpill) 130 PrivateSegmentBuffer = true; 131 132 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 133 DispatchPtr = true; 134 135 if (F->hasFnAttribute("amdgpu-queue-ptr")) 136 QueuePtr = true; 137 138 if (F->hasFnAttribute("amdgpu-dispatch-id")) 139 DispatchID = true; 140 } else if (ST.isMesaGfxShader(MF)) { 141 if (HasStackObjects || MaySpill) 142 PrivateMemoryInputPtr = true; 143 } 144 145 // We don't need to worry about accessing spills with flat instructions. 146 // TODO: On VI where we must use flat for global, we should be able to omit 147 // this if it is never used for generic access. 148 if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS()) 149 FlatScratchInit = true; 150 } 151 152 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 153 const SIRegisterInfo &TRI) { 154 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 155 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 156 NumUserSGPRs += 4; 157 return PrivateSegmentBufferUserSGPR; 158 } 159 160 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 161 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 162 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 163 NumUserSGPRs += 2; 164 return DispatchPtrUserSGPR; 165 } 166 167 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 168 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 169 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 170 NumUserSGPRs += 2; 171 return QueuePtrUserSGPR; 172 } 173 174 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 175 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 176 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 177 NumUserSGPRs += 2; 178 return KernargSegmentPtrUserSGPR; 179 } 180 181 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 182 DispatchIDUserSGPR = TRI.getMatchingSuperReg( 183 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 184 NumUserSGPRs += 2; 185 return DispatchIDUserSGPR; 186 } 187 188 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 189 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 190 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 191 NumUserSGPRs += 2; 192 return FlatScratchInitUserSGPR; 193 } 194 195 unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { 196 PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( 197 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 198 NumUserSGPRs += 2; 199 return PrivateMemoryPtrUserSGPR; 200 } 201 202 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 203 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 204 int FI) { 205 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 206 207 // This has already been allocated. 208 if (!SpillLanes.empty()) 209 return true; 210 211 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 212 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 213 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 214 MachineRegisterInfo &MRI = MF.getRegInfo(); 215 unsigned WaveSize = ST.getWavefrontSize(); 216 217 unsigned Size = FrameInfo.getObjectSize(FI); 218 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 219 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 220 221 int NumLanes = Size / 4; 222 223 // Make sure to handle the case where a wide SGPR spill may span between two 224 // VGPRs. 225 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 226 unsigned LaneVGPR; 227 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 228 229 if (VGPRIndex == 0) { 230 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 231 if (LaneVGPR == AMDGPU::NoRegister) { 232 // We have no VGPRs left for spilling SGPRs. Reset because we won't 233 // partially spill the SGPR to VGPRs. 234 SGPRToVGPRSpills.erase(FI); 235 NumVGPRSpillLanes -= I; 236 return false; 237 } 238 239 SpillVGPRs.push_back(LaneVGPR); 240 241 // Add this register as live-in to all blocks to avoid machine verifer 242 // complaining about use of an undefined physical register. 243 for (MachineBasicBlock &BB : MF) 244 BB.addLiveIn(LaneVGPR); 245 } else { 246 LaneVGPR = SpillVGPRs.back(); 247 } 248 249 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 250 } 251 252 return true; 253 } 254 255 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { 256 for (auto &R : SGPRToVGPRSpills) 257 MFI.RemoveStackObject(R.first); 258 } 259