1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "SIMachineFunctionInfo.h" 11 #include "AMDGPUSubtarget.h" 12 #include "SIInstrInfo.h" 13 #include "llvm/CodeGen/MachineFrameInfo.h" 14 #include "llvm/CodeGen/MachineInstrBuilder.h" 15 #include "llvm/CodeGen/MachineRegisterInfo.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/LLVMContext.h" 18 19 #define MAX_LANES 64 20 21 using namespace llvm; 22 23 static cl::opt<bool> EnableSpillSGPRToVGPR( 24 "amdgpu-spill-sgpr-to-vgpr", 25 cl::desc("Enable spilling VGPRs to SGPRs"), 26 cl::ReallyHidden, 27 cl::init(true)); 28 29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 30 : AMDGPUMachineFunction(MF), 31 TIDReg(AMDGPU::NoRegister), 32 ScratchRSrcReg(AMDGPU::NoRegister), 33 ScratchWaveOffsetReg(AMDGPU::NoRegister), 34 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 35 DispatchPtrUserSGPR(AMDGPU::NoRegister), 36 QueuePtrUserSGPR(AMDGPU::NoRegister), 37 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 38 DispatchIDUserSGPR(AMDGPU::NoRegister), 39 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 40 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 41 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 42 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 43 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 44 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 45 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 46 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 47 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 48 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 49 PSInputAddr(0), 50 ReturnsVoid(true), 51 FlatWorkGroupSizes(0, 0), 52 WavesPerEU(0, 0), 53 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), 54 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), 55 LDSWaveSpillSize(0), 56 PSInputEna(0), 57 NumUserSGPRs(0), 58 NumSystemSGPRs(0), 59 HasSpilledSGPRs(false), 60 HasSpilledVGPRs(false), 61 HasNonSpillStackObjects(false), 62 NumSpilledSGPRs(0), 63 NumSpilledVGPRs(0), 64 PrivateSegmentBuffer(false), 65 DispatchPtr(false), 66 QueuePtr(false), 67 KernargSegmentPtr(false), 68 DispatchID(false), 69 FlatScratchInit(false), 70 GridWorkgroupCountX(false), 71 GridWorkgroupCountY(false), 72 GridWorkgroupCountZ(false), 73 WorkGroupIDX(false), 74 WorkGroupIDY(false), 75 WorkGroupIDZ(false), 76 WorkGroupInfo(false), 77 PrivateSegmentWaveByteOffset(false), 78 WorkItemIDX(false), 79 WorkItemIDY(false), 80 WorkItemIDZ(false) { 81 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 82 const Function *F = MF.getFunction(); 83 84 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 85 86 const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 87 88 if (!AMDGPU::isShader(F->getCallingConv())) { 89 KernargSegmentPtr = true; 90 WorkGroupIDX = true; 91 WorkItemIDX = true; 92 } 93 94 if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue()) 95 WorkGroupIDY = true; 96 97 if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue()) 98 WorkGroupIDZ = true; 99 100 if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue()) 101 WorkItemIDY = true; 102 103 if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue()) 104 WorkItemIDZ = true; 105 106 // X, XY, and XYZ are the only supported combinations, so make sure Y is 107 // enabled if Z is. 108 if (WorkItemIDZ) 109 WorkItemIDY = true; 110 111 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 112 bool HasStackObjects = FrameInfo.hasStackObjects(); 113 114 if (HasStackObjects || MaySpill) 115 PrivateSegmentWaveByteOffset = true; 116 117 if (ST.isAmdHsaOS()) { 118 if (HasStackObjects || MaySpill) 119 PrivateSegmentBuffer = true; 120 121 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 122 DispatchPtr = true; 123 124 if (F->hasFnAttribute("amdgpu-queue-ptr")) 125 QueuePtr = true; 126 127 if (F->hasFnAttribute("amdgpu-dispatch-id")) 128 DispatchID = true; 129 } 130 131 // We don't need to worry about accessing spills with flat instructions. 132 // TODO: On VI where we must use flat for global, we should be able to omit 133 // this if it is never used for generic access. 134 if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS && 135 ST.isAmdHsaOS()) 136 FlatScratchInit = true; 137 138 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); 139 WavesPerEU = ST.getWavesPerEU(*F); 140 } 141 142 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 143 const SIRegisterInfo &TRI) { 144 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 145 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 146 NumUserSGPRs += 4; 147 return PrivateSegmentBufferUserSGPR; 148 } 149 150 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 151 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 152 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 153 NumUserSGPRs += 2; 154 return DispatchPtrUserSGPR; 155 } 156 157 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 158 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 159 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 160 NumUserSGPRs += 2; 161 return QueuePtrUserSGPR; 162 } 163 164 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 165 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 166 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 167 NumUserSGPRs += 2; 168 return KernargSegmentPtrUserSGPR; 169 } 170 171 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 172 DispatchIDUserSGPR = TRI.getMatchingSuperReg( 173 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 174 NumUserSGPRs += 2; 175 return DispatchIDUserSGPR; 176 } 177 178 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 179 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 180 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 181 NumUserSGPRs += 2; 182 return FlatScratchInitUserSGPR; 183 } 184 185 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( 186 MachineFunction *MF, 187 unsigned FrameIndex, 188 unsigned SubIdx) { 189 if (!EnableSpillSGPRToVGPR) 190 return SpilledReg(); 191 192 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 193 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 194 195 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 196 MachineRegisterInfo &MRI = MF->getRegInfo(); 197 int64_t Offset = FrameInfo.getObjectOffset(FrameIndex); 198 Offset += SubIdx * 4; 199 200 unsigned LaneVGPRIdx = Offset / (64 * 4); 201 unsigned Lane = (Offset / 4) % 64; 202 203 struct SpilledReg Spill; 204 Spill.Lane = Lane; 205 206 if (!LaneVGPRs.count(LaneVGPRIdx)) { 207 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, 208 *MF); 209 210 if (LaneVGPR == AMDGPU::NoRegister) 211 // We have no VGPRs left for spilling SGPRs. 212 return Spill; 213 214 LaneVGPRs[LaneVGPRIdx] = LaneVGPR; 215 216 // Add this register as live-in to all blocks to avoid machine verifer 217 // complaining about use of an undefined physical register. 218 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); 219 BI != BE; ++BI) { 220 BI->addLiveIn(LaneVGPR); 221 } 222 } 223 224 Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; 225 return Spill; 226 } 227