1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "AMDGPUCallLowering.h" 17 #include "R600ISelLowering.h" 18 #include "R600InstrInfo.h" 19 #include "R600MachineScheduler.h" 20 #include "SIFrameLowering.h" 21 #include "SIISelLowering.h" 22 #include "SIInstrInfo.h" 23 #include "SIMachineFunctionInfo.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/CodeGen/MachineScheduler.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "amdgpu-subtarget" 30 31 #define GET_SUBTARGETINFO_ENUM 32 #define GET_SUBTARGETINFO_TARGET_DESC 33 #define GET_SUBTARGETINFO_CTOR 34 #include "AMDGPUGenSubtargetInfo.inc" 35 36 #ifdef LLVM_BUILD_GLOBAL_ISEL 37 namespace { 38 struct AMDGPUGISelActualAccessor : public GISelAccessor { 39 std::unique_ptr<CallLowering> CallLoweringInfo; 40 const CallLowering *getCallLowering() const override { 41 return CallLoweringInfo.get(); 42 } 43 }; 44 } // End anonymous namespace. 45 #endif 46 47 AMDGPUSubtarget & 48 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 49 StringRef GPU, StringRef FS) { 50 // Determine default and user-specified characteristics 51 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 52 // enabled, but some instructions do not respect them and they run at the 53 // double precision rate, so don't enable by default. 54 // 55 // We want to be able to turn these off, but making this a subtarget feature 56 // for SI has the unhelpful behavior that it unsets everything else if you 57 // disable it. 58 59 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,"); 60 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 61 FullFS += "+flat-for-global,"; 62 FullFS += FS; 63 64 ParseSubtargetFeatures(GPU, FullFS); 65 66 // FIXME: I don't think think Evergreen has any useful support for 67 // denormals, but should be checked. Should we issue a warning somewhere 68 // if someone tries to enable these? 69 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 70 FP32Denormals = false; 71 FP64Denormals = false; 72 } 73 74 // Set defaults if needed. 75 if (MaxPrivateElementSize == 0) 76 MaxPrivateElementSize = 16; 77 78 return *this; 79 } 80 81 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 82 TargetMachine &TM) 83 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 84 DumpCode(false), R600ALUInst(false), HasVertexCache(false), 85 TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false), 86 FP64Denormals(false), FP32Denormals(false), FPExceptions(false), 87 FastFMAF32(false), HalfRate64Ops(false), CaymanISA(false), 88 FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true), 89 EnablePromoteAlloca(false), 90 EnableIfCvt(true), EnableLoadStoreOpt(false), 91 EnableUnsafeDSOffsetFolding(false), 92 EnableXNACK(false), 93 WavefrontSize(0), CFALUBug(false), 94 LocalMemorySize(0), MaxPrivateElementSize(0), 95 EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), 96 GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), 97 HasSMemRealTime(false), Has16BitInsts(false), 98 LDSBankCount(0), 99 IsaVersion(ISAVersion0_0_0), 100 EnableSIScheduler(false), FrameLowering(nullptr), 101 GISel(), 102 InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) { 103 104 initializeSubtargetDependencies(TT, GPU, FS); 105 106 const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16) 107 108 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 109 InstrInfo.reset(new R600InstrInfo(*this)); 110 TLInfo.reset(new R600TargetLowering(TM, *this)); 111 112 // FIXME: Should have R600 specific FrameLowering 113 FrameLowering.reset(new AMDGPUFrameLowering( 114 TargetFrameLowering::StackGrowsUp, 115 MaxStackAlign, 116 0)); 117 } else { 118 InstrInfo.reset(new SIInstrInfo(*this)); 119 TLInfo.reset(new SITargetLowering(TM, *this)); 120 FrameLowering.reset(new SIFrameLowering( 121 TargetFrameLowering::StackGrowsUp, 122 MaxStackAlign, 123 0)); 124 #ifndef LLVM_BUILD_GLOBAL_ISEL 125 GISelAccessor *GISel = new GISelAccessor(); 126 #else 127 AMDGPUGISelActualAccessor *GISel = 128 new AMDGPUGISelActualAccessor(); 129 GISel->CallLoweringInfo.reset( 130 new AMDGPUCallLowering(*getTargetLowering())); 131 #endif 132 setGISelAccessor(*GISel); 133 } 134 } 135 136 const CallLowering *AMDGPUSubtarget::getCallLowering() const { 137 assert(GISel && "Access to GlobalISel APIs not set"); 138 return GISel->getCallLowering(); 139 } 140 141 unsigned AMDGPUSubtarget::getStackEntrySize() const { 142 assert(getGeneration() <= NORTHERN_ISLANDS); 143 switch(getWavefrontSize()) { 144 case 16: 145 return 8; 146 case 32: 147 return hasCaymanISA() ? 4 : 8; 148 case 64: 149 return 4; 150 default: 151 llvm_unreachable("Illegal wavefront size."); 152 } 153 } 154 155 unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const { 156 switch(getGeneration()) { 157 default: llvm_unreachable("ChipID unknown"); 158 case SEA_ISLANDS: return 12; 159 } 160 } 161 162 AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const { 163 return AMDGPU::getIsaVersion(getFeatureBits()); 164 } 165 166 bool AMDGPUSubtarget::isVGPRSpillingEnabled(const Function& F) const { 167 return !AMDGPU::isShader(F.getCallingConv()) || EnableVGPRSpilling; 168 } 169 170 void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 171 MachineInstr *begin, 172 MachineInstr *end, 173 unsigned NumRegionInstrs) const { 174 if (getGeneration() >= SOUTHERN_ISLANDS) { 175 176 // Track register pressure so the scheduler can try to decrease 177 // pressure once register usage is above the threshold defined by 178 // SIRegisterInfo::getRegPressureSetLimit() 179 Policy.ShouldTrackPressure = true; 180 181 // Enabling both top down and bottom up scheduling seems to give us less 182 // register spills than just using one of these approaches on its own. 183 Policy.OnlyTopDown = false; 184 Policy.OnlyBottomUp = false; 185 186 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 187 if (!enableSIScheduler()) 188 Policy.ShouldTrackLaneMasks = true; 189 } 190 } 191 192