1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "AMDGPUCallLowering.h" 17 #include "R600ISelLowering.h" 18 #include "R600InstrInfo.h" 19 #include "R600MachineScheduler.h" 20 #include "SIFrameLowering.h" 21 #include "SIISelLowering.h" 22 #include "SIInstrInfo.h" 23 #include "SIMachineFunctionInfo.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/CodeGen/MachineScheduler.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "amdgpu-subtarget" 30 31 #define GET_SUBTARGETINFO_ENUM 32 #define GET_SUBTARGETINFO_TARGET_DESC 33 #define GET_SUBTARGETINFO_CTOR 34 #include "AMDGPUGenSubtargetInfo.inc" 35 36 #ifdef LLVM_BUILD_GLOBAL_ISEL 37 namespace { 38 struct AMDGPUGISelActualAccessor : public GISelAccessor { 39 std::unique_ptr<CallLowering> CallLoweringInfo; 40 const CallLowering *getCallLowering() const override { 41 return CallLoweringInfo.get(); 42 } 43 }; 44 } // End anonymous namespace. 45 #endif 46 47 AMDGPUSubtarget & 48 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 49 StringRef GPU, StringRef FS) { 50 // Determine default and user-specified characteristics 51 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 52 // enabled, but some instructions do not respect them and they run at the 53 // double precision rate, so don't enable by default. 54 // 55 // We want to be able to turn these off, but making this a subtarget feature 56 // for SI has the unhelpful behavior that it unsets everything else if you 57 // disable it. 58 59 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,"); 60 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 61 FullFS += "+flat-for-global,"; 62 FullFS += FS; 63 64 ParseSubtargetFeatures(GPU, FullFS); 65 66 // FIXME: I don't think think Evergreen has any useful support for 67 // denormals, but should be checked. Should we issue a warning somewhere 68 // if someone tries to enable these? 69 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 70 FP32Denormals = false; 71 FP64Denormals = false; 72 } 73 74 // Set defaults if needed. 75 if (MaxPrivateElementSize == 0) 76 MaxPrivateElementSize = 4; 77 78 return *this; 79 } 80 81 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 82 TargetMachine &TM) 83 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 84 DumpCode(false), R600ALUInst(false), HasVertexCache(false), 85 TexVTXClauseSize(0), 86 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), 87 FP64(false), 88 FP64Denormals(false), FP32Denormals(false), FPExceptions(false), 89 FastFMAF32(false), HalfRate64Ops(false), CaymanISA(false), 90 FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true), 91 EnablePromoteAlloca(false), 92 EnableIfCvt(true), EnableLoadStoreOpt(false), 93 EnableUnsafeDSOffsetFolding(false), 94 EnableXNACK(false), 95 WavefrontSize(64), CFALUBug(false), 96 LocalMemorySize(0), MaxPrivateElementSize(0), 97 EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), 98 GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), 99 HasSMemRealTime(false), Has16BitInsts(false), 100 LDSBankCount(0), 101 IsaVersion(ISAVersion0_0_0), 102 EnableSIScheduler(false), 103 DebuggerInsertNops(false), DebuggerReserveRegs(false), 104 FrameLowering(nullptr), 105 GISel(), 106 InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) { 107 108 initializeSubtargetDependencies(TT, GPU, FS); 109 110 const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16) 111 112 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 113 InstrInfo.reset(new R600InstrInfo(*this)); 114 TLInfo.reset(new R600TargetLowering(TM, *this)); 115 116 // FIXME: Should have R600 specific FrameLowering 117 FrameLowering.reset(new AMDGPUFrameLowering( 118 TargetFrameLowering::StackGrowsUp, 119 MaxStackAlign, 120 0)); 121 } else { 122 InstrInfo.reset(new SIInstrInfo(*this)); 123 TLInfo.reset(new SITargetLowering(TM, *this)); 124 FrameLowering.reset(new SIFrameLowering( 125 TargetFrameLowering::StackGrowsUp, 126 MaxStackAlign, 127 0)); 128 #ifndef LLVM_BUILD_GLOBAL_ISEL 129 GISelAccessor *GISel = new GISelAccessor(); 130 #else 131 AMDGPUGISelActualAccessor *GISel = 132 new AMDGPUGISelActualAccessor(); 133 GISel->CallLoweringInfo.reset( 134 new AMDGPUCallLowering(*getTargetLowering())); 135 #endif 136 setGISelAccessor(*GISel); 137 } 138 } 139 140 const CallLowering *AMDGPUSubtarget::getCallLowering() const { 141 assert(GISel && "Access to GlobalISel APIs not set"); 142 return GISel->getCallLowering(); 143 } 144 145 unsigned AMDGPUSubtarget::getStackEntrySize() const { 146 assert(getGeneration() <= NORTHERN_ISLANDS); 147 switch(getWavefrontSize()) { 148 case 16: 149 return 8; 150 case 32: 151 return hasCaymanISA() ? 4 : 8; 152 case 64: 153 return 4; 154 default: 155 llvm_unreachable("Illegal wavefront size."); 156 } 157 } 158 159 // FIXME: These limits are for SI. Did they change with the larger maximum LDS 160 // size? 161 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const { 162 switch (NWaves) { 163 case 10: 164 return 1638; 165 case 9: 166 return 1820; 167 case 8: 168 return 2048; 169 case 7: 170 return 2340; 171 case 6: 172 return 2730; 173 case 5: 174 return 3276; 175 case 4: 176 return 4096; 177 case 3: 178 return 5461; 179 case 2: 180 return 8192; 181 default: 182 return getLocalMemorySize(); 183 } 184 } 185 186 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const { 187 if (Bytes <= 1638) 188 return 10; 189 190 if (Bytes <= 1820) 191 return 9; 192 193 if (Bytes <= 2048) 194 return 8; 195 196 if (Bytes <= 2340) 197 return 7; 198 199 if (Bytes <= 2730) 200 return 6; 201 202 if (Bytes <= 3276) 203 return 5; 204 205 if (Bytes <= 4096) 206 return 4; 207 208 if (Bytes <= 5461) 209 return 3; 210 211 if (Bytes <= 8192) 212 return 2; 213 214 return 1; 215 } 216 217 unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const { 218 switch(getGeneration()) { 219 default: llvm_unreachable("ChipID unknown"); 220 case SEA_ISLANDS: return 12; 221 } 222 } 223 224 AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const { 225 return AMDGPU::getIsaVersion(getFeatureBits()); 226 } 227 228 bool AMDGPUSubtarget::isVGPRSpillingEnabled(const Function& F) const { 229 return !AMDGPU::isShader(F.getCallingConv()) || EnableVGPRSpilling; 230 } 231 232 void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 233 MachineInstr *begin, 234 MachineInstr *end, 235 unsigned NumRegionInstrs) const { 236 if (getGeneration() >= SOUTHERN_ISLANDS) { 237 238 // Track register pressure so the scheduler can try to decrease 239 // pressure once register usage is above the threshold defined by 240 // SIRegisterInfo::getRegPressureSetLimit() 241 Policy.ShouldTrackPressure = true; 242 243 // Enabling both top down and bottom up scheduling seems to give us less 244 // register spills than just using one of these approaches on its own. 245 Policy.OnlyTopDown = false; 246 Policy.OnlyBottomUp = false; 247 248 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 249 if (!enableSIScheduler()) 250 Policy.ShouldTrackLaneMasks = true; 251 } 252 } 253 254