1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "R600ISelLowering.h" 17 #include "R600InstrInfo.h" 18 #include "SIFrameLowering.h" 19 #include "SIISelLowering.h" 20 #include "SIInstrInfo.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/CodeGen/MachineScheduler.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "amdgpu-subtarget" 28 29 #define GET_SUBTARGETINFO_ENUM 30 #define GET_SUBTARGETINFO_TARGET_DESC 31 #define GET_SUBTARGETINFO_CTOR 32 #include "AMDGPUGenSubtargetInfo.inc" 33 34 AMDGPUSubtarget::~AMDGPUSubtarget() {} 35 36 AMDGPUSubtarget & 37 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 38 StringRef GPU, StringRef FS) { 39 // Determine default and user-specified characteristics 40 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 41 // enabled, but some instructions do not respect them and they run at the 42 // double precision rate, so don't enable by default. 43 // 44 // We want to be able to turn these off, but making this a subtarget feature 45 // for SI has the unhelpful behavior that it unsets everything else if you 46 // disable it. 47 48 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,"); 49 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 50 FullFS += "+flat-for-global,+unaligned-buffer-access,"; 51 FullFS += FS; 52 53 ParseSubtargetFeatures(GPU, FullFS); 54 55 // FIXME: I don't think think Evergreen has any useful support for 56 // denormals, but should be checked. Should we issue a warning somewhere 57 // if someone tries to enable these? 58 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 59 FP32Denormals = false; 60 FP64Denormals = false; 61 } 62 63 // Set defaults if needed. 64 if (MaxPrivateElementSize == 0) 65 MaxPrivateElementSize = 4; 66 67 return *this; 68 } 69 70 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 71 const TargetMachine &TM) 72 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 73 TargetTriple(TT), 74 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), 75 IsaVersion(ISAVersion0_0_0), 76 WavefrontSize(64), 77 LocalMemorySize(0), 78 LDSBankCount(0), 79 MaxPrivateElementSize(0), 80 81 FastFMAF32(false), 82 HalfRate64Ops(false), 83 84 FP32Denormals(false), 85 FP64Denormals(false), 86 FPExceptions(false), 87 FlatForGlobal(false), 88 UnalignedBufferAccess(false), 89 90 EnableXNACK(false), 91 DebuggerInsertNops(false), 92 DebuggerReserveRegs(false), 93 DebuggerEmitPrologue(false), 94 95 EnableVGPRSpilling(false), 96 EnablePromoteAlloca(false), 97 EnableLoadStoreOpt(false), 98 EnableUnsafeDSOffsetFolding(false), 99 EnableSIScheduler(false), 100 DumpCode(false), 101 102 FP64(false), 103 IsGCN(false), 104 GCN1Encoding(false), 105 GCN3Encoding(false), 106 CIInsts(false), 107 SGPRInitBug(false), 108 HasSMemRealTime(false), 109 Has16BitInsts(false), 110 FlatAddressSpace(false), 111 112 R600ALUInst(false), 113 CaymanISA(false), 114 CFALUBug(false), 115 HasVertexCache(false), 116 TexVTXClauseSize(0), 117 118 FeatureDisable(false), 119 InstrItins(getInstrItineraryForCPU(GPU)) { 120 initializeSubtargetDependencies(TT, GPU, FS); 121 } 122 123 // FIXME: These limits are for SI. Did they change with the larger maximum LDS 124 // size? 125 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const { 126 switch (NWaves) { 127 case 10: 128 return 1638; 129 case 9: 130 return 1820; 131 case 8: 132 return 2048; 133 case 7: 134 return 2340; 135 case 6: 136 return 2730; 137 case 5: 138 return 3276; 139 case 4: 140 return 4096; 141 case 3: 142 return 5461; 143 case 2: 144 return 8192; 145 default: 146 return getLocalMemorySize(); 147 } 148 } 149 150 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const { 151 if (Bytes <= 1638) 152 return 10; 153 154 if (Bytes <= 1820) 155 return 9; 156 157 if (Bytes <= 2048) 158 return 8; 159 160 if (Bytes <= 2340) 161 return 7; 162 163 if (Bytes <= 2730) 164 return 6; 165 166 if (Bytes <= 3276) 167 return 5; 168 169 if (Bytes <= 4096) 170 return 4; 171 172 if (Bytes <= 5461) 173 return 3; 174 175 if (Bytes <= 8192) 176 return 2; 177 178 return 1; 179 } 180 181 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, 182 const TargetMachine &TM) : 183 AMDGPUSubtarget(TT, GPU, FS, TM), 184 InstrInfo(*this), 185 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 186 TLInfo(TM, *this) {} 187 188 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, 189 const TargetMachine &TM) : 190 AMDGPUSubtarget(TT, GPU, FS, TM), 191 InstrInfo(*this), 192 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 193 TLInfo(TM, *this), 194 GISel() {} 195 196 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 197 unsigned NumRegionInstrs) const { 198 // Track register pressure so the scheduler can try to decrease 199 // pressure once register usage is above the threshold defined by 200 // SIRegisterInfo::getRegPressureSetLimit() 201 Policy.ShouldTrackPressure = true; 202 203 // Enabling both top down and bottom up scheduling seems to give us less 204 // register spills than just using one of these approaches on its own. 205 Policy.OnlyTopDown = false; 206 Policy.OnlyBottomUp = false; 207 208 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 209 if (!enableSIScheduler()) 210 Policy.ShouldTrackLaneMasks = true; 211 } 212 213 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { 214 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); 215 } 216