1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 11 #include "SIMachineFunctionInfo.h" 12 #include "AMDGPUSubtarget.h" 13 #include "SIInstrInfo.h" 14 #include "llvm/CodeGen/MachineInstrBuilder.h" 15 #include "llvm/CodeGen/MachineFrameInfo.h" 16 #include "llvm/CodeGen/MachineRegisterInfo.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/LLVMContext.h" 19 20 #define MAX_LANES 64 21 22 using namespace llvm; 23 24 static cl::opt<bool> EnableSpillSGPRToVGPR( 25 "amdgpu-spill-sgpr-to-vgpr", 26 cl::desc("Enable spilling VGPRs to SGPRs"), 27 cl::ReallyHidden, 28 cl::init(true)); 29 30 // Pin the vtable to this file. 31 void SIMachineFunctionInfo::anchor() {} 32 33 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 34 : AMDGPUMachineFunction(MF), 35 TIDReg(AMDGPU::NoRegister), 36 ScratchRSrcReg(AMDGPU::NoRegister), 37 ScratchWaveOffsetReg(AMDGPU::NoRegister), 38 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 39 DispatchPtrUserSGPR(AMDGPU::NoRegister), 40 QueuePtrUserSGPR(AMDGPU::NoRegister), 41 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 42 DispatchIDUserSGPR(AMDGPU::NoRegister), 43 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 44 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 45 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 46 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 47 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 48 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 49 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 50 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 51 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 52 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 53 PSInputAddr(0), 54 ReturnsVoid(true), 55 MaximumWorkGroupSize(0), 56 DebuggerReservedVGPRCount(0), 57 DebuggerWorkGroupIDStackObjectIndices{0, 0, 0}, 58 DebuggerWorkItemIDStackObjectIndices{0, 0, 0}, 59 LDSWaveSpillSize(0), 60 PSInputEna(0), 61 NumUserSGPRs(0), 62 NumSystemSGPRs(0), 63 HasSpilledSGPRs(false), 64 HasSpilledVGPRs(false), 65 HasNonSpillStackObjects(false), 66 HasFlatInstructions(false), 67 PrivateSegmentBuffer(false), 68 DispatchPtr(false), 69 QueuePtr(false), 70 DispatchID(false), 71 KernargSegmentPtr(false), 72 FlatScratchInit(false), 73 GridWorkgroupCountX(false), 74 GridWorkgroupCountY(false), 75 GridWorkgroupCountZ(false), 76 WorkGroupIDX(false), 77 WorkGroupIDY(false), 78 WorkGroupIDZ(false), 79 WorkGroupInfo(false), 80 PrivateSegmentWaveByteOffset(false), 81 WorkItemIDX(false), 82 WorkItemIDY(false), 83 WorkItemIDZ(false) { 84 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 85 const Function *F = MF.getFunction(); 86 87 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 88 89 const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 90 91 if (!AMDGPU::isShader(F->getCallingConv())) { 92 KernargSegmentPtr = true; 93 WorkGroupIDX = true; 94 WorkItemIDX = true; 95 } 96 97 if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue()) 98 WorkGroupIDY = true; 99 100 if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue()) 101 WorkGroupIDZ = true; 102 103 if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue()) 104 WorkItemIDY = true; 105 106 if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue()) 107 WorkItemIDZ = true; 108 109 // X, XY, and XYZ are the only supported combinations, so make sure Y is 110 // enabled if Z is. 111 if (WorkItemIDZ) 112 WorkItemIDY = true; 113 114 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 115 bool HasStackObjects = FrameInfo->hasStackObjects(); 116 117 if (HasStackObjects || MaySpill) 118 PrivateSegmentWaveByteOffset = true; 119 120 if (ST.isAmdHsaOS()) { 121 if (HasStackObjects || MaySpill) 122 PrivateSegmentBuffer = true; 123 124 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 125 DispatchPtr = true; 126 127 if (F->hasFnAttribute("amdgpu-queue-ptr")) 128 QueuePtr = true; 129 } 130 131 // We don't need to worry about accessing spills with flat instructions. 132 // TODO: On VI where we must use flat for global, we should be able to omit 133 // this if it is never used for generic access. 134 if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS && 135 ST.isAmdHsaOS()) 136 FlatScratchInit = true; 137 138 if (AMDGPU::isCompute(F->getCallingConv())) 139 MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F); 140 else 141 MaximumWorkGroupSize = ST.getWavefrontSize(); 142 143 if (ST.debuggerReserveRegs()) 144 DebuggerReservedVGPRCount = 4; 145 } 146 147 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 148 const SIRegisterInfo &TRI) { 149 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 150 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 151 NumUserSGPRs += 4; 152 return PrivateSegmentBufferUserSGPR; 153 } 154 155 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 156 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 157 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 158 NumUserSGPRs += 2; 159 return DispatchPtrUserSGPR; 160 } 161 162 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 163 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 164 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 165 NumUserSGPRs += 2; 166 return QueuePtrUserSGPR; 167 } 168 169 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 170 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 171 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 172 NumUserSGPRs += 2; 173 return KernargSegmentPtrUserSGPR; 174 } 175 176 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 177 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 178 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 179 NumUserSGPRs += 2; 180 return FlatScratchInitUserSGPR; 181 } 182 183 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( 184 MachineFunction *MF, 185 unsigned FrameIndex, 186 unsigned SubIdx) { 187 if (!EnableSpillSGPRToVGPR) 188 return SpilledReg(); 189 190 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 191 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 192 193 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 194 MachineRegisterInfo &MRI = MF->getRegInfo(); 195 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex); 196 Offset += SubIdx * 4; 197 198 unsigned LaneVGPRIdx = Offset / (64 * 4); 199 unsigned Lane = (Offset / 4) % 64; 200 201 struct SpilledReg Spill; 202 Spill.Lane = Lane; 203 204 if (!LaneVGPRs.count(LaneVGPRIdx)) { 205 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); 206 207 if (LaneVGPR == AMDGPU::NoRegister) 208 // We have no VGPRs left for spilling SGPRs. 209 return Spill; 210 211 212 LaneVGPRs[LaneVGPRIdx] = LaneVGPR; 213 214 // Add this register as live-in to all blocks to avoid machine verifer 215 // complaining about use of an undefined physical register. 216 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); 217 BI != BE; ++BI) { 218 BI->addLiveIn(LaneVGPR); 219 } 220 } 221 222 Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; 223 return Spill; 224 } 225 226 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize( 227 const MachineFunction &MF) const { 228 return MaximumWorkGroupSize; 229 } 230