1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUSubtarget.h"
16 #include "R600ISelLowering.h"
17 #include "R600InstrInfo.h"
18 #include "SIFrameLowering.h"
19 #include "SIISelLowering.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/CodeGen/MachineScheduler.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "amdgpu-subtarget"
28 
29 #define GET_SUBTARGETINFO_ENUM
30 #define GET_SUBTARGETINFO_TARGET_DESC
31 #define GET_SUBTARGETINFO_CTOR
32 #include "AMDGPUGenSubtargetInfo.inc"
33 
34 AMDGPUSubtarget::~AMDGPUSubtarget() {}
35 
36 AMDGPUSubtarget &
37 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
38                                                  StringRef GPU, StringRef FS) {
39   // Determine default and user-specified characteristics
40   // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
41   // enabled, but some instructions do not respect them and they run at the
42   // double precision rate, so don't enable by default.
43   //
44   // We want to be able to turn these off, but making this a subtarget feature
45   // for SI has the unhelpful behavior that it unsets everything else if you
46   // disable it.
47 
48   SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
49   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
50     FullFS += "+flat-for-global,";
51   FullFS += FS;
52 
53   ParseSubtargetFeatures(GPU, FullFS);
54 
55   // FIXME: I don't think think Evergreen has any useful support for
56   // denormals, but should be checked. Should we issue a warning somewhere
57   // if someone tries to enable these?
58   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
59     FP32Denormals = false;
60     FP64Denormals = false;
61   }
62 
63   // Set defaults if needed.
64   if (MaxPrivateElementSize == 0)
65     MaxPrivateElementSize = 4;
66 
67   return *this;
68 }
69 
70 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
71                                  const TargetMachine &TM)
72   : AMDGPUGenSubtargetInfo(TT, GPU, FS),
73     TargetTriple(TT),
74     Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
75     IsaVersion(ISAVersion0_0_0),
76     WavefrontSize(64),
77     LocalMemorySize(0),
78     LDSBankCount(0),
79     MaxPrivateElementSize(0),
80 
81     FastFMAF32(false),
82     HalfRate64Ops(false),
83 
84     FP32Denormals(false),
85     FP64Denormals(false),
86     FPExceptions(false),
87     FlatForGlobal(false),
88     EnableXNACK(false),
89     DebuggerInsertNops(false),
90     DebuggerReserveRegs(false),
91     DebuggerEmitPrologue(false),
92 
93     EnableVGPRSpilling(false),
94     EnablePromoteAlloca(false),
95     EnableLoadStoreOpt(false),
96     EnableUnsafeDSOffsetFolding(false),
97     EnableSIScheduler(false),
98     DumpCode(false),
99 
100     FP64(false),
101     IsGCN(false),
102     GCN1Encoding(false),
103     GCN3Encoding(false),
104     CIInsts(false),
105     SGPRInitBug(false),
106     HasSMemRealTime(false),
107     Has16BitInsts(false),
108     FlatAddressSpace(false),
109 
110     R600ALUInst(false),
111     CaymanISA(false),
112     CFALUBug(false),
113     HasVertexCache(false),
114     TexVTXClauseSize(0),
115 
116     FeatureDisable(false),
117 
118     InstrItins(getInstrItineraryForCPU(GPU)) {
119   initializeSubtargetDependencies(TT, GPU, FS);
120 }
121 
122 // FIXME: These limits are for SI. Did they change with the larger maximum LDS
123 // size?
124 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
125   switch (NWaves) {
126   case 10:
127     return 1638;
128   case 9:
129     return 1820;
130   case 8:
131     return 2048;
132   case 7:
133     return 2340;
134   case 6:
135     return 2730;
136   case 5:
137     return 3276;
138   case 4:
139     return 4096;
140   case 3:
141     return 5461;
142   case 2:
143     return 8192;
144   default:
145     return getLocalMemorySize();
146   }
147 }
148 
149 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
150   if (Bytes <= 1638)
151     return 10;
152 
153   if (Bytes <= 1820)
154     return 9;
155 
156   if (Bytes <= 2048)
157     return 8;
158 
159   if (Bytes <= 2340)
160     return 7;
161 
162   if (Bytes <= 2730)
163     return 6;
164 
165   if (Bytes <= 3276)
166     return 5;
167 
168   if (Bytes <= 4096)
169     return 4;
170 
171   if (Bytes <= 5461)
172     return 3;
173 
174   if (Bytes <= 8192)
175     return 2;
176 
177   return 1;
178 }
179 
180 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
181                              const TargetMachine &TM) :
182   AMDGPUSubtarget(TT, GPU, FS, TM),
183   InstrInfo(*this),
184   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
185   TLInfo(TM, *this) {}
186 
187 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
188                          const TargetMachine &TM) :
189   AMDGPUSubtarget(TT, GPU, FS, TM),
190   InstrInfo(*this),
191   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
192   TLInfo(TM, *this),
193   GISel() {}
194 
195 unsigned R600Subtarget::getStackEntrySize() const {
196   switch (getWavefrontSize()) {
197   case 16:
198     return 8;
199   case 32:
200     return hasCaymanISA() ? 4 : 8;
201   case 64:
202     return 4;
203   default:
204     llvm_unreachable("Illegal wavefront size.");
205   }
206 }
207 
208 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
209                                       MachineInstr *begin,
210                                       MachineInstr *end,
211                                       unsigned NumRegionInstrs) const {
212   // Track register pressure so the scheduler can try to decrease
213   // pressure once register usage is above the threshold defined by
214   // SIRegisterInfo::getRegPressureSetLimit()
215   Policy.ShouldTrackPressure = true;
216 
217   // Enabling both top down and bottom up scheduling seems to give us less
218   // register spills than just using one of these approaches on its own.
219   Policy.OnlyTopDown = false;
220   Policy.OnlyBottomUp = false;
221 
222   // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
223   if (!enableSIScheduler())
224     Policy.ShouldTrackLaneMasks = true;
225 }
226 
227 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
228   return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
229 }
230 
231 unsigned SISubtarget::getAmdKernelCodeChipID() const {
232   switch (getGeneration()) {
233   case SEA_ISLANDS:
234     return 12;
235   default:
236     llvm_unreachable("ChipID unknown");
237   }
238 }
239 
240 AMDGPU::IsaVersion SISubtarget::getIsaVersion() const {
241   return AMDGPU::getIsaVersion(getFeatureBits());
242 }
243