1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "SIMachineFunctionInfo.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIInstrInfo.h" 13 #include "llvm/CodeGen/MachineFrameInfo.h" 14 #include "llvm/CodeGen/MachineInstrBuilder.h" 15 #include "llvm/CodeGen/MachineRegisterInfo.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/LLVMContext.h" 18 19 #define MAX_LANES 64 20 21 using namespace llvm; 22 23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 24 : AMDGPUMachineFunction(MF), 25 TIDReg(AMDGPU::NoRegister), 26 ScratchRSrcReg(AMDGPU::NoRegister), 27 ScratchWaveOffsetReg(AMDGPU::NoRegister), 28 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 29 DispatchPtrUserSGPR(AMDGPU::NoRegister), 30 QueuePtrUserSGPR(AMDGPU::NoRegister), 31 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 32 DispatchIDUserSGPR(AMDGPU::NoRegister), 33 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 34 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 35 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 36 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 37 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 38 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 39 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 40 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 41 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 42 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 43 PSInputAddr(0), 44 ReturnsVoid(true), 45 FlatWorkGroupSizes(0, 0), 46 WavesPerEU(0, 0), 47 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), 48 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), 49 LDSWaveSpillSize(0), 50 PSInputEna(0), 51 NumUserSGPRs(0), 52 NumSystemSGPRs(0), 53 HasSpilledSGPRs(false), 54 HasSpilledVGPRs(false), 55 HasNonSpillStackObjects(false), 56 NumSpilledSGPRs(0), 57 NumSpilledVGPRs(0), 58 PrivateSegmentBuffer(false), 59 DispatchPtr(false), 60 QueuePtr(false), 61 KernargSegmentPtr(false), 62 DispatchID(false), 63 FlatScratchInit(false), 64 GridWorkgroupCountX(false), 65 GridWorkgroupCountY(false), 66 GridWorkgroupCountZ(false), 67 WorkGroupIDX(false), 68 WorkGroupIDY(false), 69 WorkGroupIDZ(false), 70 WorkGroupInfo(false), 71 PrivateSegmentWaveByteOffset(false), 72 WorkItemIDX(false), 73 WorkItemIDY(false), 74 WorkItemIDZ(false), 75 PrivateMemoryInputPtr(false) { 76 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 77 const Function *F = MF.getFunction(); 78 79 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 80 81 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 82 83 if (!AMDGPU::isShader(F->getCallingConv())) { 84 KernargSegmentPtr = true; 85 WorkGroupIDX = true; 86 WorkItemIDX = true; 87 } 88 89 if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue()) 90 WorkGroupIDY = true; 91 92 if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue()) 93 WorkGroupIDZ = true; 94 95 if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue()) 96 WorkItemIDY = true; 97 98 if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue()) 99 WorkItemIDZ = true; 100 101 // X, XY, and XYZ are the only supported combinations, so make sure Y is 102 // enabled if Z is. 103 if (WorkItemIDZ) 104 WorkItemIDY = true; 105 106 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 107 bool HasStackObjects = FrameInfo.hasStackObjects(); 108 109 if (HasStackObjects || MaySpill) 110 PrivateSegmentWaveByteOffset = true; 111 112 if (ST.isAmdCodeObjectV2(MF)) { 113 if (HasStackObjects || MaySpill) 114 PrivateSegmentBuffer = true; 115 116 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 117 DispatchPtr = true; 118 119 if (F->hasFnAttribute("amdgpu-queue-ptr")) 120 QueuePtr = true; 121 122 if (F->hasFnAttribute("amdgpu-dispatch-id")) 123 DispatchID = true; 124 } else if (ST.isMesaGfxShader(MF)) { 125 if (HasStackObjects || MaySpill) 126 PrivateMemoryInputPtr = true; 127 } 128 129 // We don't need to worry about accessing spills with flat instructions. 130 // TODO: On VI where we must use flat for global, we should be able to omit 131 // this if it is never used for generic access. 132 if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS && 133 ST.isAmdHsaOS()) 134 FlatScratchInit = true; 135 136 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); 137 WavesPerEU = ST.getWavesPerEU(*F); 138 } 139 140 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 141 const SIRegisterInfo &TRI) { 142 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 143 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 144 NumUserSGPRs += 4; 145 return PrivateSegmentBufferUserSGPR; 146 } 147 148 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 149 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 150 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 151 NumUserSGPRs += 2; 152 return DispatchPtrUserSGPR; 153 } 154 155 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 156 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 157 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 158 NumUserSGPRs += 2; 159 return QueuePtrUserSGPR; 160 } 161 162 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 163 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 164 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 165 NumUserSGPRs += 2; 166 return KernargSegmentPtrUserSGPR; 167 } 168 169 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 170 DispatchIDUserSGPR = TRI.getMatchingSuperReg( 171 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 172 NumUserSGPRs += 2; 173 return DispatchIDUserSGPR; 174 } 175 176 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 177 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 178 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 179 NumUserSGPRs += 2; 180 return FlatScratchInitUserSGPR; 181 } 182 183 unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { 184 PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( 185 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 186 NumUserSGPRs += 2; 187 return PrivateMemoryPtrUserSGPR; 188 } 189 190 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 191 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 192 int FI) { 193 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 194 195 // This has already been allocated. 196 if (!SpillLanes.empty()) 197 return true; 198 199 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 200 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 201 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 202 MachineRegisterInfo &MRI = MF.getRegInfo(); 203 unsigned WaveSize = ST.getWavefrontSize(); 204 205 unsigned Size = FrameInfo.getObjectSize(FI); 206 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 207 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 208 209 int NumLanes = Size / 4; 210 211 // Make sure to handle the case where a wide SGPR spill may span between two 212 // VGPRs. 213 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 214 unsigned LaneVGPR; 215 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 216 217 if (VGPRIndex == 0) { 218 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 219 if (LaneVGPR == AMDGPU::NoRegister) { 220 // We have no VGPRs left for spilling SGPRs. Reset because we won't 221 // partially spill the SGPR to VGPRs. 222 SGPRToVGPRSpills.erase(FI); 223 NumVGPRSpillLanes -= I; 224 return false; 225 } 226 227 SpillVGPRs.push_back(LaneVGPR); 228 229 // Add this register as live-in to all blocks to avoid machine verifer 230 // complaining about use of an undefined physical register. 231 for (MachineBasicBlock &BB : MF) 232 BB.addLiveIn(LaneVGPR); 233 } else { 234 LaneVGPR = SpillVGPRs.back(); 235 } 236 237 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 238 } 239 240 return true; 241 } 242 243 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { 244 for (auto &R : SGPRToVGPRSpills) 245 MFI.RemoveStackObject(R.first); 246 } 247