1 //===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //===----------------------------------------------------------------------===// 10 11 12 #include "SIMachineFunctionInfo.h" 13 #include "AMDGPUSubtarget.h" 14 #include "SIInstrInfo.h" 15 #include "llvm/CodeGen/MachineInstrBuilder.h" 16 #include "llvm/CodeGen/MachineFrameInfo.h" 17 #include "llvm/CodeGen/MachineRegisterInfo.h" 18 #include "llvm/IR/Function.h" 19 #include "llvm/IR/LLVMContext.h" 20 21 #define MAX_LANES 64 22 23 using namespace llvm; 24 25 26 // Pin the vtable to this file. 27 void SIMachineFunctionInfo::anchor() {} 28 29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 30 : AMDGPUMachineFunction(MF), 31 TIDReg(AMDGPU::NoRegister), 32 ScratchRSrcReg(AMDGPU::NoRegister), 33 ScratchWaveOffsetReg(AMDGPU::NoRegister), 34 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 35 DispatchPtrUserSGPR(AMDGPU::NoRegister), 36 QueuePtrUserSGPR(AMDGPU::NoRegister), 37 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 38 DispatchIDUserSGPR(AMDGPU::NoRegister), 39 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 40 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 41 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 42 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 43 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 44 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 45 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 46 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 47 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 48 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 49 PSInputAddr(0), 50 ReturnsVoid(true), 51 LDSWaveSpillSize(0), 52 PSInputEna(0), 53 NumUserSGPRs(0), 54 NumSystemSGPRs(0), 55 HasSpilledSGPRs(false), 56 HasSpilledVGPRs(false), 57 PrivateSegmentBuffer(false), 58 DispatchPtr(false), 59 QueuePtr(false), 60 DispatchID(false), 61 KernargSegmentPtr(false), 62 FlatScratchInit(false), 63 GridWorkgroupCountX(false), 64 GridWorkgroupCountY(false), 65 GridWorkgroupCountZ(false), 66 WorkGroupIDX(true), 67 WorkGroupIDY(false), 68 WorkGroupIDZ(false), 69 WorkGroupInfo(false), 70 PrivateSegmentWaveByteOffset(false), 71 WorkItemIDX(true), 72 WorkItemIDY(false), 73 WorkItemIDZ(false) { 74 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 75 const Function *F = MF.getFunction(); 76 77 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 78 79 const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 80 81 if (getShaderType() == ShaderType::COMPUTE) 82 KernargSegmentPtr = true; 83 84 if (F->hasFnAttribute("amdgpu-work-group-id-y")) 85 WorkGroupIDY = true; 86 87 if (F->hasFnAttribute("amdgpu-work-group-id-z")) 88 WorkGroupIDZ = true; 89 90 if (F->hasFnAttribute("amdgpu-work-item-id-y")) 91 WorkItemIDY = true; 92 93 if (F->hasFnAttribute("amdgpu-work-item-id-z")) 94 WorkItemIDZ = true; 95 96 bool MaySpill = ST.isVGPRSpillingEnabled(this); 97 bool HasStackObjects = FrameInfo->hasStackObjects(); 98 99 if (HasStackObjects || MaySpill) 100 PrivateSegmentWaveByteOffset = true; 101 102 if (ST.isAmdHsaOS()) { 103 if (HasStackObjects || MaySpill) 104 PrivateSegmentBuffer = true; 105 106 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 107 DispatchPtr = true; 108 } 109 110 // X, XY, and XYZ are the only supported combinations, so make sure Y is 111 // enabled if Z is. 112 if (WorkItemIDZ) 113 WorkItemIDY = true; 114 } 115 116 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 117 const SIRegisterInfo &TRI) { 118 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 119 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 120 NumUserSGPRs += 4; 121 return PrivateSegmentBufferUserSGPR; 122 } 123 124 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 125 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 126 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 127 NumUserSGPRs += 2; 128 return DispatchPtrUserSGPR; 129 } 130 131 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 132 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 133 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 134 NumUserSGPRs += 2; 135 return QueuePtrUserSGPR; 136 } 137 138 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 139 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 140 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 141 NumUserSGPRs += 2; 142 return KernargSegmentPtrUserSGPR; 143 } 144 145 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( 146 MachineFunction *MF, 147 unsigned FrameIndex, 148 unsigned SubIdx) { 149 const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 150 const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( 151 MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo()); 152 MachineRegisterInfo &MRI = MF->getRegInfo(); 153 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex); 154 Offset += SubIdx * 4; 155 156 unsigned LaneVGPRIdx = Offset / (64 * 4); 157 unsigned Lane = (Offset / 4) % 64; 158 159 struct SpilledReg Spill; 160 161 if (!LaneVGPRs.count(LaneVGPRIdx)) { 162 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); 163 164 if (LaneVGPR == AMDGPU::NoRegister) { 165 LLVMContext &Ctx = MF->getFunction()->getContext(); 166 Ctx.emitError("Ran out of VGPRs for spilling SGPR"); 167 168 // When compiling from inside Mesa, the compilation continues. 169 // Select an arbitrary register to avoid triggering assertions 170 // during subsequent passes. 171 LaneVGPR = AMDGPU::VGPR0; 172 } 173 174 LaneVGPRs[LaneVGPRIdx] = LaneVGPR; 175 176 // Add this register as live-in to all blocks to avoid machine verifer 177 // complaining about use of an undefined physical register. 178 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); 179 BI != BE; ++BI) { 180 BI->addLiveIn(LaneVGPR); 181 } 182 } 183 184 Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; 185 Spill.Lane = Lane; 186 return Spill; 187 } 188 189 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize( 190 const MachineFunction &MF) const { 191 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 192 // FIXME: We should get this information from kernel attributes if it 193 // is available. 194 return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize(); 195 } 196