1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "AMDGPUCallLowering.h" 17 #include "R600ISelLowering.h" 18 #include "R600InstrInfo.h" 19 #include "R600MachineScheduler.h" 20 #include "SIFrameLowering.h" 21 #include "SIISelLowering.h" 22 #include "SIInstrInfo.h" 23 #include "SIMachineFunctionInfo.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/CodeGen/MachineScheduler.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "amdgpu-subtarget" 30 31 #define GET_SUBTARGETINFO_ENUM 32 #define GET_SUBTARGETINFO_TARGET_DESC 33 #define GET_SUBTARGETINFO_CTOR 34 #include "AMDGPUGenSubtargetInfo.inc" 35 36 #ifdef LLVM_BUILD_GLOBAL_ISEL 37 namespace { 38 struct AMDGPUGISelActualAccessor : public GISelAccessor { 39 std::unique_ptr<CallLowering> CallLoweringInfo; 40 const CallLowering *getCallLowering() const override { 41 return CallLoweringInfo.get(); 42 } 43 }; 44 } // End anonymous namespace. 45 #endif 46 47 AMDGPUSubtarget & 48 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 49 StringRef GPU, StringRef FS) { 50 // Determine default and user-specified characteristics 51 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 52 // enabled, but some instructions do not respect them and they run at the 53 // double precision rate, so don't enable by default. 54 // 55 // We want to be able to turn these off, but making this a subtarget feature 56 // for SI has the unhelpful behavior that it unsets everything else if you 57 // disable it. 58 59 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,"); 60 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 61 FullFS += "+flat-for-global,"; 62 FullFS += FS; 63 64 ParseSubtargetFeatures(GPU, FullFS); 65 66 // FIXME: I don't think think Evergreen has any useful support for 67 // denormals, but should be checked. Should we issue a warning somewhere 68 // if someone tries to enable these? 69 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 70 FP32Denormals = false; 71 FP64Denormals = false; 72 } 73 74 // Set defaults if needed. 75 if (MaxPrivateElementSize == 0) 76 MaxPrivateElementSize = 4; 77 78 return *this; 79 } 80 81 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 82 TargetMachine &TM) 83 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 84 DumpCode(false), R600ALUInst(false), HasVertexCache(false), 85 TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false), 86 FP64Denormals(false), FP32Denormals(false), FPExceptions(false), 87 FastFMAF32(false), HalfRate64Ops(false), CaymanISA(false), 88 FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true), 89 EnablePromoteAlloca(false), 90 EnableIfCvt(true), EnableLoadStoreOpt(false), 91 EnableUnsafeDSOffsetFolding(false), 92 EnableXNACK(false), 93 WavefrontSize(0), CFALUBug(false), 94 LocalMemorySize(0), MaxPrivateElementSize(0), 95 EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), 96 GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), 97 HasSMemRealTime(false), Has16BitInsts(false), 98 LDSBankCount(0), 99 IsaVersion(ISAVersion0_0_0), 100 EnableSIScheduler(false), 101 DebuggerInsertNops(false), DebuggerReserveRegs(false), 102 FrameLowering(nullptr), 103 GISel(), 104 InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) { 105 106 initializeSubtargetDependencies(TT, GPU, FS); 107 108 const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16) 109 110 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 111 InstrInfo.reset(new R600InstrInfo(*this)); 112 TLInfo.reset(new R600TargetLowering(TM, *this)); 113 114 // FIXME: Should have R600 specific FrameLowering 115 FrameLowering.reset(new AMDGPUFrameLowering( 116 TargetFrameLowering::StackGrowsUp, 117 MaxStackAlign, 118 0)); 119 } else { 120 InstrInfo.reset(new SIInstrInfo(*this)); 121 TLInfo.reset(new SITargetLowering(TM, *this)); 122 FrameLowering.reset(new SIFrameLowering( 123 TargetFrameLowering::StackGrowsUp, 124 MaxStackAlign, 125 0)); 126 #ifndef LLVM_BUILD_GLOBAL_ISEL 127 GISelAccessor *GISel = new GISelAccessor(); 128 #else 129 AMDGPUGISelActualAccessor *GISel = 130 new AMDGPUGISelActualAccessor(); 131 GISel->CallLoweringInfo.reset( 132 new AMDGPUCallLowering(*getTargetLowering())); 133 #endif 134 setGISelAccessor(*GISel); 135 } 136 } 137 138 const CallLowering *AMDGPUSubtarget::getCallLowering() const { 139 assert(GISel && "Access to GlobalISel APIs not set"); 140 return GISel->getCallLowering(); 141 } 142 143 unsigned AMDGPUSubtarget::getStackEntrySize() const { 144 assert(getGeneration() <= NORTHERN_ISLANDS); 145 switch(getWavefrontSize()) { 146 case 16: 147 return 8; 148 case 32: 149 return hasCaymanISA() ? 4 : 8; 150 case 64: 151 return 4; 152 default: 153 llvm_unreachable("Illegal wavefront size."); 154 } 155 } 156 157 // FIXME: These limits are for SI. Did they change with the larger maximum LDS 158 // size? 159 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const { 160 switch (NWaves) { 161 case 10: 162 return 1638; 163 case 9: 164 return 1820; 165 case 8: 166 return 2048; 167 case 7: 168 return 2340; 169 case 6: 170 return 2730; 171 case 5: 172 return 3276; 173 case 4: 174 return 4096; 175 case 3: 176 return 5461; 177 case 2: 178 return 8192; 179 default: 180 return getLocalMemorySize(); 181 } 182 } 183 184 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const { 185 if (Bytes <= 1638) 186 return 10; 187 188 if (Bytes <= 1820) 189 return 9; 190 191 if (Bytes <= 2048) 192 return 8; 193 194 if (Bytes <= 2340) 195 return 7; 196 197 if (Bytes <= 2730) 198 return 6; 199 200 if (Bytes <= 3276) 201 return 5; 202 203 if (Bytes <= 4096) 204 return 4; 205 206 if (Bytes <= 5461) 207 return 3; 208 209 if (Bytes <= 8192) 210 return 2; 211 212 return 1; 213 } 214 215 unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const { 216 switch(getGeneration()) { 217 default: llvm_unreachable("ChipID unknown"); 218 case SEA_ISLANDS: return 12; 219 } 220 } 221 222 AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const { 223 return AMDGPU::getIsaVersion(getFeatureBits()); 224 } 225 226 bool AMDGPUSubtarget::isVGPRSpillingEnabled(const Function& F) const { 227 return !AMDGPU::isShader(F.getCallingConv()) || EnableVGPRSpilling; 228 } 229 230 void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 231 MachineInstr *begin, 232 MachineInstr *end, 233 unsigned NumRegionInstrs) const { 234 if (getGeneration() >= SOUTHERN_ISLANDS) { 235 236 // Track register pressure so the scheduler can try to decrease 237 // pressure once register usage is above the threshold defined by 238 // SIRegisterInfo::getRegPressureSetLimit() 239 Policy.ShouldTrackPressure = true; 240 241 // Enabling both top down and bottom up scheduling seems to give us less 242 // register spills than just using one of these approaches on its own. 243 Policy.OnlyTopDown = false; 244 Policy.OnlyBottomUp = false; 245 246 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 247 if (!enableSIScheduler()) 248 Policy.ShouldTrackLaneMasks = true; 249 } 250 } 251 252