1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "AMDGPUCallLowering.h" 17 #include "R600ISelLowering.h" 18 #include "R600InstrInfo.h" 19 #include "R600MachineScheduler.h" 20 #include "SIFrameLowering.h" 21 #include "SIISelLowering.h" 22 #include "SIInstrInfo.h" 23 #include "SIMachineFunctionInfo.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/CodeGen/MachineScheduler.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "amdgpu-subtarget" 30 31 #define GET_SUBTARGETINFO_ENUM 32 #define GET_SUBTARGETINFO_TARGET_DESC 33 #define GET_SUBTARGETINFO_CTOR 34 #include "AMDGPUGenSubtargetInfo.inc" 35 36 #ifdef LLVM_BUILD_GLOBAL_ISEL 37 namespace { 38 struct AMDGPUGISelActualAccessor : public GISelAccessor { 39 std::unique_ptr<CallLowering> CallLoweringInfo; 40 const CallLowering *getCallLowering() const override { 41 return CallLoweringInfo.get(); 42 } 43 }; 44 } // End anonymous namespace. 45 #endif 46 47 AMDGPUSubtarget & 48 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 49 StringRef GPU, StringRef FS) { 50 // Determine default and user-specified characteristics 51 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 52 // enabled, but some instructions do not respect them and they run at the 53 // double precision rate, so don't enable by default. 54 // 55 // We want to be able to turn these off, but making this a subtarget feature 56 // for SI has the unhelpful behavior that it unsets everything else if you 57 // disable it. 58 59 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,"); 60 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 61 FullFS += "+flat-for-global,"; 62 FullFS += FS; 63 64 ParseSubtargetFeatures(GPU, FullFS); 65 66 // FIXME: I don't think think Evergreen has any useful support for 67 // denormals, but should be checked. Should we issue a warning somewhere 68 // if someone tries to enable these? 69 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 70 FP32Denormals = false; 71 FP64Denormals = false; 72 } 73 74 // Set defaults if needed. 75 if (MaxPrivateElementSize == 0) 76 MaxPrivateElementSize = 16; 77 78 return *this; 79 } 80 81 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 82 TargetMachine &TM) 83 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 84 DumpCode(false), R600ALUInst(false), HasVertexCache(false), 85 TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false), 86 FP64Denormals(false), FP32Denormals(false), FPExceptions(false), 87 FastFMAF32(false), HalfRate64Ops(false), CaymanISA(false), 88 FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true), 89 EnablePromoteAlloca(false), 90 EnableIfCvt(true), EnableLoadStoreOpt(false), 91 EnableUnsafeDSOffsetFolding(false), 92 EnableXNACK(false), 93 WavefrontSize(0), CFALUBug(false), 94 LocalMemorySize(0), MaxPrivateElementSize(0), 95 EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), 96 GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), 97 HasSMemRealTime(false), Has16BitInsts(false), 98 LDSBankCount(0), 99 IsaVersion(ISAVersion0_0_0), 100 EnableSIScheduler(false), 101 DebuggerInsertNops(false), 102 FrameLowering(nullptr), 103 GISel(), 104 InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) { 105 106 initializeSubtargetDependencies(TT, GPU, FS); 107 108 const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16) 109 110 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 111 InstrInfo.reset(new R600InstrInfo(*this)); 112 TLInfo.reset(new R600TargetLowering(TM, *this)); 113 114 // FIXME: Should have R600 specific FrameLowering 115 FrameLowering.reset(new AMDGPUFrameLowering( 116 TargetFrameLowering::StackGrowsUp, 117 MaxStackAlign, 118 0)); 119 } else { 120 InstrInfo.reset(new SIInstrInfo(*this)); 121 TLInfo.reset(new SITargetLowering(TM, *this)); 122 FrameLowering.reset(new SIFrameLowering( 123 TargetFrameLowering::StackGrowsUp, 124 MaxStackAlign, 125 0)); 126 #ifndef LLVM_BUILD_GLOBAL_ISEL 127 GISelAccessor *GISel = new GISelAccessor(); 128 #else 129 AMDGPUGISelActualAccessor *GISel = 130 new AMDGPUGISelActualAccessor(); 131 GISel->CallLoweringInfo.reset( 132 new AMDGPUCallLowering(*getTargetLowering())); 133 #endif 134 setGISelAccessor(*GISel); 135 } 136 } 137 138 const CallLowering *AMDGPUSubtarget::getCallLowering() const { 139 assert(GISel && "Access to GlobalISel APIs not set"); 140 return GISel->getCallLowering(); 141 } 142 143 unsigned AMDGPUSubtarget::getStackEntrySize() const { 144 assert(getGeneration() <= NORTHERN_ISLANDS); 145 switch(getWavefrontSize()) { 146 case 16: 147 return 8; 148 case 32: 149 return hasCaymanISA() ? 4 : 8; 150 case 64: 151 return 4; 152 default: 153 llvm_unreachable("Illegal wavefront size."); 154 } 155 } 156 157 unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const { 158 switch(getGeneration()) { 159 default: llvm_unreachable("ChipID unknown"); 160 case SEA_ISLANDS: return 12; 161 } 162 } 163 164 AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const { 165 return AMDGPU::getIsaVersion(getFeatureBits()); 166 } 167 168 bool AMDGPUSubtarget::isVGPRSpillingEnabled(const Function& F) const { 169 return !AMDGPU::isShader(F.getCallingConv()) || EnableVGPRSpilling; 170 } 171 172 void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 173 MachineInstr *begin, 174 MachineInstr *end, 175 unsigned NumRegionInstrs) const { 176 if (getGeneration() >= SOUTHERN_ISLANDS) { 177 178 // Track register pressure so the scheduler can try to decrease 179 // pressure once register usage is above the threshold defined by 180 // SIRegisterInfo::getRegPressureSetLimit() 181 Policy.ShouldTrackPressure = true; 182 183 // Enabling both top down and bottom up scheduling seems to give us less 184 // register spills than just using one of these approaches on its own. 185 Policy.OnlyTopDown = false; 186 Policy.OnlyBottomUp = false; 187 188 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 189 if (!enableSIScheduler()) 190 Policy.ShouldTrackLaneMasks = true; 191 } 192 } 193 194