1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "R600ISelLowering.h" 17 #include "R600InstrInfo.h" 18 #include "SIFrameLowering.h" 19 #include "SIISelLowering.h" 20 #include "SIInstrInfo.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/CodeGen/MachineScheduler.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "amdgpu-subtarget" 28 29 #define GET_SUBTARGETINFO_ENUM 30 #define GET_SUBTARGETINFO_TARGET_DESC 31 #define GET_SUBTARGETINFO_CTOR 32 #include "AMDGPUGenSubtargetInfo.inc" 33 34 AMDGPUSubtarget::~AMDGPUSubtarget() {} 35 36 AMDGPUSubtarget & 37 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 38 StringRef GPU, StringRef FS) { 39 // Determine default and user-specified characteristics 40 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 41 // enabled, but some instructions do not respect them and they run at the 42 // double precision rate, so don't enable by default. 43 // 44 // We want to be able to turn these off, but making this a subtarget feature 45 // for SI has the unhelpful behavior that it unsets everything else if you 46 // disable it. 47 48 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,"); 49 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 50 FullFS += "+flat-for-global,"; 51 FullFS += FS; 52 53 ParseSubtargetFeatures(GPU, FullFS); 54 55 // FIXME: I don't think think Evergreen has any useful support for 56 // denormals, but should be checked. Should we issue a warning somewhere 57 // if someone tries to enable these? 58 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 59 FP32Denormals = false; 60 FP64Denormals = false; 61 } 62 63 // Set defaults if needed. 64 if (MaxPrivateElementSize == 0) 65 MaxPrivateElementSize = 4; 66 67 return *this; 68 } 69 70 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 71 const TargetMachine &TM) 72 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 73 TargetTriple(TT), 74 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), 75 IsaVersion(ISAVersion0_0_0), 76 WavefrontSize(64), 77 LocalMemorySize(0), 78 LDSBankCount(0), 79 MaxPrivateElementSize(0), 80 81 FastFMAF32(false), 82 HalfRate64Ops(false), 83 84 FP32Denormals(false), 85 FP64Denormals(false), 86 FPExceptions(false), 87 FlatForGlobal(false), 88 EnableXNACK(false), 89 DebuggerInsertNops(false), 90 DebuggerReserveRegs(false), 91 DebuggerEmitPrologue(false), 92 93 EnableVGPRSpilling(false), 94 EnablePromoteAlloca(false), 95 EnableLoadStoreOpt(false), 96 EnableUnsafeDSOffsetFolding(false), 97 EnableSIScheduler(false), 98 DumpCode(false), 99 100 FP64(false), 101 IsGCN(false), 102 GCN1Encoding(false), 103 GCN3Encoding(false), 104 CIInsts(false), 105 SGPRInitBug(false), 106 HasSMemRealTime(false), 107 Has16BitInsts(false), 108 FlatAddressSpace(false), 109 110 R600ALUInst(false), 111 CaymanISA(false), 112 CFALUBug(false), 113 HasVertexCache(false), 114 TexVTXClauseSize(0), 115 116 FeatureDisable(false), 117 118 InstrItins(getInstrItineraryForCPU(GPU)) { 119 initializeSubtargetDependencies(TT, GPU, FS); 120 } 121 122 // FIXME: These limits are for SI. Did they change with the larger maximum LDS 123 // size? 124 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const { 125 switch (NWaves) { 126 case 10: 127 return 1638; 128 case 9: 129 return 1820; 130 case 8: 131 return 2048; 132 case 7: 133 return 2340; 134 case 6: 135 return 2730; 136 case 5: 137 return 3276; 138 case 4: 139 return 4096; 140 case 3: 141 return 5461; 142 case 2: 143 return 8192; 144 default: 145 return getLocalMemorySize(); 146 } 147 } 148 149 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const { 150 if (Bytes <= 1638) 151 return 10; 152 153 if (Bytes <= 1820) 154 return 9; 155 156 if (Bytes <= 2048) 157 return 8; 158 159 if (Bytes <= 2340) 160 return 7; 161 162 if (Bytes <= 2730) 163 return 6; 164 165 if (Bytes <= 3276) 166 return 5; 167 168 if (Bytes <= 4096) 169 return 4; 170 171 if (Bytes <= 5461) 172 return 3; 173 174 if (Bytes <= 8192) 175 return 2; 176 177 return 1; 178 } 179 180 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, 181 const TargetMachine &TM) : 182 AMDGPUSubtarget(TT, GPU, FS, TM), 183 InstrInfo(*this), 184 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 185 TLInfo(TM, *this) {} 186 187 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, 188 const TargetMachine &TM) : 189 AMDGPUSubtarget(TT, GPU, FS, TM), 190 InstrInfo(*this), 191 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 192 TLInfo(TM, *this), 193 GISel() {} 194 195 unsigned R600Subtarget::getStackEntrySize() const { 196 switch (getWavefrontSize()) { 197 case 16: 198 return 8; 199 case 32: 200 return hasCaymanISA() ? 4 : 8; 201 case 64: 202 return 4; 203 default: 204 llvm_unreachable("Illegal wavefront size."); 205 } 206 } 207 208 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 209 MachineInstr *begin, 210 MachineInstr *end, 211 unsigned NumRegionInstrs) const { 212 // Track register pressure so the scheduler can try to decrease 213 // pressure once register usage is above the threshold defined by 214 // SIRegisterInfo::getRegPressureSetLimit() 215 Policy.ShouldTrackPressure = true; 216 217 // Enabling both top down and bottom up scheduling seems to give us less 218 // register spills than just using one of these approaches on its own. 219 Policy.OnlyTopDown = false; 220 Policy.OnlyBottomUp = false; 221 222 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 223 if (!enableSIScheduler()) 224 Policy.ShouldTrackLaneMasks = true; 225 } 226 227 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { 228 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); 229 } 230 231 unsigned SISubtarget::getAmdKernelCodeChipID() const { 232 switch (getGeneration()) { 233 case SEA_ISLANDS: 234 return 12; 235 default: 236 llvm_unreachable("ChipID unknown"); 237 } 238 } 239 240 AMDGPU::IsaVersion SISubtarget::getIsaVersion() const { 241 return AMDGPU::getIsaVersion(getFeatureBits()); 242 } 243