1 //===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //===----------------------------------------------------------------------===// 10 11 12 #include "SIMachineFunctionInfo.h" 13 #include "AMDGPUSubtarget.h" 14 #include "SIInstrInfo.h" 15 #include "llvm/CodeGen/MachineInstrBuilder.h" 16 #include "llvm/CodeGen/MachineFrameInfo.h" 17 #include "llvm/CodeGen/MachineRegisterInfo.h" 18 #include "llvm/IR/Function.h" 19 #include "llvm/IR/LLVMContext.h" 20 21 #define MAX_LANES 64 22 23 using namespace llvm; 24 25 26 // Pin the vtable to this file. 27 void SIMachineFunctionInfo::anchor() {} 28 29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 30 : AMDGPUMachineFunction(MF), 31 TIDReg(AMDGPU::NoRegister), 32 ScratchRSrcReg(AMDGPU::NoRegister), 33 ScratchWaveOffsetReg(AMDGPU::NoRegister), 34 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 35 DispatchPtrUserSGPR(AMDGPU::NoRegister), 36 QueuePtrUserSGPR(AMDGPU::NoRegister), 37 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 38 DispatchIDUserSGPR(AMDGPU::NoRegister), 39 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 40 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 41 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 42 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 43 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 44 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 45 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 46 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 47 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 48 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 49 PSInputAddr(0), 50 ReturnsVoid(true), 51 LDSWaveSpillSize(0), 52 PSInputEna(0), 53 NumUserSGPRs(0), 54 NumSystemSGPRs(0), 55 HasSpilledSGPRs(false), 56 HasSpilledVGPRs(false), 57 HasNonSpillStackObjects(false), 58 HasFlatInstructions(false), 59 PrivateSegmentBuffer(false), 60 DispatchPtr(false), 61 QueuePtr(false), 62 DispatchID(false), 63 KernargSegmentPtr(false), 64 FlatScratchInit(false), 65 GridWorkgroupCountX(false), 66 GridWorkgroupCountY(false), 67 GridWorkgroupCountZ(false), 68 WorkGroupIDX(true), 69 WorkGroupIDY(false), 70 WorkGroupIDZ(false), 71 WorkGroupInfo(false), 72 PrivateSegmentWaveByteOffset(false), 73 WorkItemIDX(true), 74 WorkItemIDY(false), 75 WorkItemIDZ(false) { 76 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 77 const Function *F = MF.getFunction(); 78 79 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 80 81 const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 82 83 if (getShaderType() == ShaderType::COMPUTE) 84 KernargSegmentPtr = true; 85 86 if (F->hasFnAttribute("amdgpu-work-group-id-y")) 87 WorkGroupIDY = true; 88 89 if (F->hasFnAttribute("amdgpu-work-group-id-z")) 90 WorkGroupIDZ = true; 91 92 if (F->hasFnAttribute("amdgpu-work-item-id-y")) 93 WorkItemIDY = true; 94 95 if (F->hasFnAttribute("amdgpu-work-item-id-z")) 96 WorkItemIDZ = true; 97 98 // X, XY, and XYZ are the only supported combinations, so make sure Y is 99 // enabled if Z is. 100 if (WorkItemIDZ) 101 WorkItemIDY = true; 102 103 bool MaySpill = ST.isVGPRSpillingEnabled(this); 104 bool HasStackObjects = FrameInfo->hasStackObjects(); 105 106 if (HasStackObjects || MaySpill) 107 PrivateSegmentWaveByteOffset = true; 108 109 if (ST.isAmdHsaOS()) { 110 if (HasStackObjects || MaySpill) 111 PrivateSegmentBuffer = true; 112 113 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 114 DispatchPtr = true; 115 } 116 117 // We don't need to worry about accessing spills with flat instructions. 118 // TODO: On VI where we must use flat for global, we should be able to omit 119 // this if it is never used for generic access. 120 if (HasStackObjects && ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS && 121 ST.isAmdHsaOS()) 122 FlatScratchInit = true; 123 } 124 125 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 126 const SIRegisterInfo &TRI) { 127 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 128 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 129 NumUserSGPRs += 4; 130 return PrivateSegmentBufferUserSGPR; 131 } 132 133 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 134 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 135 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 136 NumUserSGPRs += 2; 137 return DispatchPtrUserSGPR; 138 } 139 140 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 141 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 142 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 143 NumUserSGPRs += 2; 144 return QueuePtrUserSGPR; 145 } 146 147 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 148 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 149 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 150 NumUserSGPRs += 2; 151 return KernargSegmentPtrUserSGPR; 152 } 153 154 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 155 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 156 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 157 NumUserSGPRs += 2; 158 return FlatScratchInitUserSGPR; 159 } 160 161 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( 162 MachineFunction *MF, 163 unsigned FrameIndex, 164 unsigned SubIdx) { 165 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 166 const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( 167 MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo()); 168 MachineRegisterInfo &MRI = MF->getRegInfo(); 169 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex); 170 Offset += SubIdx * 4; 171 172 unsigned LaneVGPRIdx = Offset / (64 * 4); 173 unsigned Lane = (Offset / 4) % 64; 174 175 struct SpilledReg Spill; 176 Spill.Lane = Lane; 177 178 if (!LaneVGPRs.count(LaneVGPRIdx)) { 179 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); 180 181 if (LaneVGPR == AMDGPU::NoRegister) 182 // We have no VGPRs left for spilling SGPRs. 183 return Spill; 184 185 186 LaneVGPRs[LaneVGPRIdx] = LaneVGPR; 187 188 // Add this register as live-in to all blocks to avoid machine verifer 189 // complaining about use of an undefined physical register. 190 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); 191 BI != BE; ++BI) { 192 BI->addLiveIn(LaneVGPR); 193 } 194 } 195 196 Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; 197 return Spill; 198 } 199 200 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize( 201 const MachineFunction &MF) const { 202 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 203 // FIXME: We should get this information from kernel attributes if it 204 // is available. 205 return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize(); 206 } 207