1 //===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //===----------------------------------------------------------------------===// 10 11 12 #include "SIMachineFunctionInfo.h" 13 #include "AMDGPUSubtarget.h" 14 #include "SIInstrInfo.h" 15 #include "llvm/CodeGen/MachineInstrBuilder.h" 16 #include "llvm/CodeGen/MachineFrameInfo.h" 17 #include "llvm/CodeGen/MachineRegisterInfo.h" 18 #include "llvm/IR/Function.h" 19 #include "llvm/IR/LLVMContext.h" 20 21 #define MAX_LANES 64 22 23 using namespace llvm; 24 25 26 // Pin the vtable to this file. 27 void SIMachineFunctionInfo::anchor() {} 28 29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 30 : AMDGPUMachineFunction(MF), 31 TIDReg(AMDGPU::NoRegister), 32 ScratchRSrcReg(AMDGPU::NoRegister), 33 ScratchWaveOffsetReg(AMDGPU::NoRegister), 34 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 35 DispatchPtrUserSGPR(AMDGPU::NoRegister), 36 QueuePtrUserSGPR(AMDGPU::NoRegister), 37 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 38 DispatchIDUserSGPR(AMDGPU::NoRegister), 39 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 40 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 41 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 42 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 43 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 44 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 45 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 46 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 47 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 48 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 49 PSInputAddr(0), 50 ReturnsVoid(true), 51 MaximumWorkGroupSize(0), 52 DebuggerReservedVGPRCount(0), 53 LDSWaveSpillSize(0), 54 PSInputEna(0), 55 NumUserSGPRs(0), 56 NumSystemSGPRs(0), 57 HasSpilledSGPRs(false), 58 HasSpilledVGPRs(false), 59 HasNonSpillStackObjects(false), 60 HasFlatInstructions(false), 61 PrivateSegmentBuffer(false), 62 DispatchPtr(false), 63 QueuePtr(false), 64 DispatchID(false), 65 KernargSegmentPtr(false), 66 FlatScratchInit(false), 67 GridWorkgroupCountX(false), 68 GridWorkgroupCountY(false), 69 GridWorkgroupCountZ(false), 70 WorkGroupIDX(false), 71 WorkGroupIDY(false), 72 WorkGroupIDZ(false), 73 WorkGroupInfo(false), 74 PrivateSegmentWaveByteOffset(false), 75 WorkItemIDX(false), 76 WorkItemIDY(false), 77 WorkItemIDZ(false) { 78 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 79 const Function *F = MF.getFunction(); 80 81 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 82 83 const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 84 85 if (!AMDGPU::isShader(F->getCallingConv())) { 86 KernargSegmentPtr = true; 87 WorkGroupIDX = true; 88 WorkItemIDX = true; 89 } 90 91 if (F->hasFnAttribute("amdgpu-work-group-id-y")) 92 WorkGroupIDY = true; 93 94 if (F->hasFnAttribute("amdgpu-work-group-id-z")) 95 WorkGroupIDZ = true; 96 97 if (F->hasFnAttribute("amdgpu-work-item-id-y")) 98 WorkItemIDY = true; 99 100 if (F->hasFnAttribute("amdgpu-work-item-id-z")) 101 WorkItemIDZ = true; 102 103 // X, XY, and XYZ are the only supported combinations, so make sure Y is 104 // enabled if Z is. 105 if (WorkItemIDZ) 106 WorkItemIDY = true; 107 108 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 109 bool HasStackObjects = FrameInfo->hasStackObjects(); 110 111 if (HasStackObjects || MaySpill) 112 PrivateSegmentWaveByteOffset = true; 113 114 if (ST.isAmdHsaOS()) { 115 if (HasStackObjects || MaySpill) 116 PrivateSegmentBuffer = true; 117 118 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 119 DispatchPtr = true; 120 121 if (F->hasFnAttribute("amdgpu-queue-ptr")) 122 QueuePtr = true; 123 } 124 125 // We don't need to worry about accessing spills with flat instructions. 126 // TODO: On VI where we must use flat for global, we should be able to omit 127 // this if it is never used for generic access. 128 if (HasStackObjects && ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS && 129 ST.isAmdHsaOS()) 130 FlatScratchInit = true; 131 132 if (AMDGPU::isCompute(F->getCallingConv())) 133 MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F); 134 else 135 MaximumWorkGroupSize = ST.getWavefrontSize(); 136 137 if (ST.debuggerReserveRegs()) 138 DebuggerReservedVGPRCount = 4; 139 } 140 141 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 142 const SIRegisterInfo &TRI) { 143 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 144 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 145 NumUserSGPRs += 4; 146 return PrivateSegmentBufferUserSGPR; 147 } 148 149 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 150 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 151 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 152 NumUserSGPRs += 2; 153 return DispatchPtrUserSGPR; 154 } 155 156 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 157 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 158 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 159 NumUserSGPRs += 2; 160 return QueuePtrUserSGPR; 161 } 162 163 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 164 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 165 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 166 NumUserSGPRs += 2; 167 return KernargSegmentPtrUserSGPR; 168 } 169 170 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 171 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 172 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 173 NumUserSGPRs += 2; 174 return FlatScratchInitUserSGPR; 175 } 176 177 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( 178 MachineFunction *MF, 179 unsigned FrameIndex, 180 unsigned SubIdx) { 181 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 182 const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( 183 MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo()); 184 MachineRegisterInfo &MRI = MF->getRegInfo(); 185 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex); 186 Offset += SubIdx * 4; 187 188 unsigned LaneVGPRIdx = Offset / (64 * 4); 189 unsigned Lane = (Offset / 4) % 64; 190 191 struct SpilledReg Spill; 192 Spill.Lane = Lane; 193 194 if (!LaneVGPRs.count(LaneVGPRIdx)) { 195 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); 196 197 if (LaneVGPR == AMDGPU::NoRegister) 198 // We have no VGPRs left for spilling SGPRs. 199 return Spill; 200 201 202 LaneVGPRs[LaneVGPRIdx] = LaneVGPR; 203 204 // Add this register as live-in to all blocks to avoid machine verifer 205 // complaining about use of an undefined physical register. 206 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); 207 BI != BE; ++BI) { 208 BI->addLiveIn(LaneVGPR); 209 } 210 } 211 212 Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; 213 return Spill; 214 } 215 216 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize( 217 const MachineFunction &MF) const { 218 return MaximumWorkGroupSize; 219 } 220