1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "SIMachineFunctionInfo.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIInstrInfo.h" 13 #include "llvm/CodeGen/MachineFrameInfo.h" 14 #include "llvm/CodeGen/MachineInstrBuilder.h" 15 #include "llvm/CodeGen/MachineRegisterInfo.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/LLVMContext.h" 18 19 #define MAX_LANES 64 20 21 using namespace llvm; 22 23 static cl::opt<bool> EnableSpillSGPRToVGPR( 24 "amdgpu-spill-sgpr-to-vgpr", 25 cl::desc("Enable spilling VGPRs to SGPRs"), 26 cl::ReallyHidden, 27 cl::init(true)); 28 29 // Pin the vtable to this file. 30 void SIMachineFunctionInfo::anchor() {} 31 32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 33 : AMDGPUMachineFunction(MF), 34 TIDReg(AMDGPU::NoRegister), 35 ScratchRSrcReg(AMDGPU::NoRegister), 36 ScratchWaveOffsetReg(AMDGPU::NoRegister), 37 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 38 DispatchPtrUserSGPR(AMDGPU::NoRegister), 39 QueuePtrUserSGPR(AMDGPU::NoRegister), 40 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 41 DispatchIDUserSGPR(AMDGPU::NoRegister), 42 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 43 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 44 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 45 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 46 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 47 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 48 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 49 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 50 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 51 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 52 PSInputAddr(0), 53 ReturnsVoid(true), 54 MaximumWorkGroupSize(0), 55 DebuggerReservedVGPRCount(0), 56 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), 57 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), 58 LDSWaveSpillSize(0), 59 PSInputEna(0), 60 NumUserSGPRs(0), 61 NumSystemSGPRs(0), 62 HasSpilledSGPRs(false), 63 HasSpilledVGPRs(false), 64 HasNonSpillStackObjects(false), 65 HasFlatInstructions(false), 66 PrivateSegmentBuffer(false), 67 DispatchPtr(false), 68 QueuePtr(false), 69 DispatchID(false), 70 KernargSegmentPtr(false), 71 FlatScratchInit(false), 72 GridWorkgroupCountX(false), 73 GridWorkgroupCountY(false), 74 GridWorkgroupCountZ(false), 75 WorkGroupIDX(false), 76 WorkGroupIDY(false), 77 WorkGroupIDZ(false), 78 WorkGroupInfo(false), 79 PrivateSegmentWaveByteOffset(false), 80 WorkItemIDX(false), 81 WorkItemIDY(false), 82 WorkItemIDZ(false) { 83 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 84 const Function *F = MF.getFunction(); 85 86 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 87 88 const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 89 90 if (!AMDGPU::isShader(F->getCallingConv())) { 91 KernargSegmentPtr = true; 92 WorkGroupIDX = true; 93 WorkItemIDX = true; 94 } 95 96 if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue()) 97 WorkGroupIDY = true; 98 99 if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue()) 100 WorkGroupIDZ = true; 101 102 if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue()) 103 WorkItemIDY = true; 104 105 if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue()) 106 WorkItemIDZ = true; 107 108 // X, XY, and XYZ are the only supported combinations, so make sure Y is 109 // enabled if Z is. 110 if (WorkItemIDZ) 111 WorkItemIDY = true; 112 113 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 114 bool HasStackObjects = FrameInfo->hasStackObjects(); 115 116 if (HasStackObjects || MaySpill) 117 PrivateSegmentWaveByteOffset = true; 118 119 if (ST.isAmdHsaOS()) { 120 if (HasStackObjects || MaySpill) 121 PrivateSegmentBuffer = true; 122 123 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 124 DispatchPtr = true; 125 126 if (F->hasFnAttribute("amdgpu-queue-ptr")) 127 QueuePtr = true; 128 } 129 130 // We don't need to worry about accessing spills with flat instructions. 131 // TODO: On VI where we must use flat for global, we should be able to omit 132 // this if it is never used for generic access. 133 if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS && 134 ST.isAmdHsaOS()) 135 FlatScratchInit = true; 136 137 if (AMDGPU::isCompute(F->getCallingConv())) 138 MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F); 139 else 140 MaximumWorkGroupSize = ST.getWavefrontSize(); 141 142 if (ST.debuggerReserveRegs()) 143 DebuggerReservedVGPRCount = 4; 144 } 145 146 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 147 const SIRegisterInfo &TRI) { 148 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 149 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 150 NumUserSGPRs += 4; 151 return PrivateSegmentBufferUserSGPR; 152 } 153 154 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 155 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 156 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 157 NumUserSGPRs += 2; 158 return DispatchPtrUserSGPR; 159 } 160 161 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 162 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 163 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 164 NumUserSGPRs += 2; 165 return QueuePtrUserSGPR; 166 } 167 168 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 169 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 170 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 171 NumUserSGPRs += 2; 172 return KernargSegmentPtrUserSGPR; 173 } 174 175 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 176 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 177 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 178 NumUserSGPRs += 2; 179 return FlatScratchInitUserSGPR; 180 } 181 182 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( 183 MachineFunction *MF, 184 unsigned FrameIndex, 185 unsigned SubIdx) { 186 if (!EnableSpillSGPRToVGPR) 187 return SpilledReg(); 188 189 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 190 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 191 192 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 193 MachineRegisterInfo &MRI = MF->getRegInfo(); 194 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex); 195 Offset += SubIdx * 4; 196 197 unsigned LaneVGPRIdx = Offset / (64 * 4); 198 unsigned Lane = (Offset / 4) % 64; 199 200 struct SpilledReg Spill; 201 Spill.Lane = Lane; 202 203 if (!LaneVGPRs.count(LaneVGPRIdx)) { 204 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); 205 206 if (LaneVGPR == AMDGPU::NoRegister) 207 // We have no VGPRs left for spilling SGPRs. 208 return Spill; 209 210 LaneVGPRs[LaneVGPRIdx] = LaneVGPR; 211 212 // Add this register as live-in to all blocks to avoid machine verifer 213 // complaining about use of an undefined physical register. 214 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); 215 BI != BE; ++BI) { 216 BI->addLiveIn(LaneVGPR); 217 } 218 } 219 220 Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; 221 return Spill; 222 } 223 224 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize( 225 const MachineFunction &MF) const { 226 return MaximumWorkGroupSize; 227 } 228