1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUSubtarget.h"
16 #include "R600ISelLowering.h"
17 #include "R600InstrInfo.h"
18 #include "SIFrameLowering.h"
19 #include "SIISelLowering.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/CodeGen/MachineScheduler.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "amdgpu-subtarget"
28 
29 #define GET_SUBTARGETINFO_ENUM
30 #define GET_SUBTARGETINFO_TARGET_DESC
31 #define GET_SUBTARGETINFO_CTOR
32 #include "AMDGPUGenSubtargetInfo.inc"
33 
34 AMDGPUSubtarget::~AMDGPUSubtarget() {}
35 
36 AMDGPUSubtarget &
37 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
38                                                  StringRef GPU, StringRef FS) {
39   // Determine default and user-specified characteristics
40   // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
41   // enabled, but some instructions do not respect them and they run at the
42   // double precision rate, so don't enable by default.
43   //
44   // We want to be able to turn these off, but making this a subtarget feature
45   // for SI has the unhelpful behavior that it unsets everything else if you
46   // disable it.
47 
48   SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
49   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
50     FullFS += "+flat-for-global,+unaligned-buffer-access,";
51   FullFS += FS;
52 
53   ParseSubtargetFeatures(GPU, FullFS);
54 
55   // FIXME: I don't think think Evergreen has any useful support for
56   // denormals, but should be checked. Should we issue a warning somewhere
57   // if someone tries to enable these?
58   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
59     FP32Denormals = false;
60     FP64Denormals = false;
61   }
62 
63   // Set defaults if needed.
64   if (MaxPrivateElementSize == 0)
65     MaxPrivateElementSize = 4;
66 
67   return *this;
68 }
69 
70 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
71                                  const TargetMachine &TM)
72   : AMDGPUGenSubtargetInfo(TT, GPU, FS),
73     TargetTriple(TT),
74     Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
75     IsaVersion(ISAVersion0_0_0),
76     WavefrontSize(64),
77     LocalMemorySize(0),
78     LDSBankCount(0),
79     MaxPrivateElementSize(0),
80 
81     FastFMAF32(false),
82     HalfRate64Ops(false),
83 
84     FP32Denormals(false),
85     FP64Denormals(false),
86     FPExceptions(false),
87     FlatForGlobal(false),
88     UnalignedBufferAccess(false),
89 
90     EnableXNACK(false),
91     DebuggerInsertNops(false),
92     DebuggerReserveRegs(false),
93     DebuggerEmitPrologue(false),
94 
95     EnableVGPRSpilling(false),
96     EnablePromoteAlloca(false),
97     EnableLoadStoreOpt(false),
98     EnableUnsafeDSOffsetFolding(false),
99     EnableSIScheduler(false),
100     DumpCode(false),
101 
102     FP64(false),
103     IsGCN(false),
104     GCN1Encoding(false),
105     GCN3Encoding(false),
106     CIInsts(false),
107     SGPRInitBug(false),
108     HasSMemRealTime(false),
109     Has16BitInsts(false),
110     FlatAddressSpace(false),
111 
112     R600ALUInst(false),
113     CaymanISA(false),
114     CFALUBug(false),
115     HasVertexCache(false),
116     TexVTXClauseSize(0),
117 
118     FeatureDisable(false),
119     InstrItins(getInstrItineraryForCPU(GPU)) {
120   initializeSubtargetDependencies(TT, GPU, FS);
121 }
122 
123 // FIXME: These limits are for SI. Did they change with the larger maximum LDS
124 // size?
125 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
126   switch (NWaves) {
127   case 10:
128     return 1638;
129   case 9:
130     return 1820;
131   case 8:
132     return 2048;
133   case 7:
134     return 2340;
135   case 6:
136     return 2730;
137   case 5:
138     return 3276;
139   case 4:
140     return 4096;
141   case 3:
142     return 5461;
143   case 2:
144     return 8192;
145   default:
146     return getLocalMemorySize();
147   }
148 }
149 
150 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
151   if (Bytes <= 1638)
152     return 10;
153 
154   if (Bytes <= 1820)
155     return 9;
156 
157   if (Bytes <= 2048)
158     return 8;
159 
160   if (Bytes <= 2340)
161     return 7;
162 
163   if (Bytes <= 2730)
164     return 6;
165 
166   if (Bytes <= 3276)
167     return 5;
168 
169   if (Bytes <= 4096)
170     return 4;
171 
172   if (Bytes <= 5461)
173     return 3;
174 
175   if (Bytes <= 8192)
176     return 2;
177 
178   return 1;
179 }
180 
181 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
182                              const TargetMachine &TM) :
183   AMDGPUSubtarget(TT, GPU, FS, TM),
184   InstrInfo(*this),
185   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
186   TLInfo(TM, *this) {}
187 
188 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
189                          const TargetMachine &TM) :
190   AMDGPUSubtarget(TT, GPU, FS, TM),
191   InstrInfo(*this),
192   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
193   TLInfo(TM, *this),
194   GISel() {}
195 
196 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
197                                       unsigned NumRegionInstrs) const {
198   // Track register pressure so the scheduler can try to decrease
199   // pressure once register usage is above the threshold defined by
200   // SIRegisterInfo::getRegPressureSetLimit()
201   Policy.ShouldTrackPressure = true;
202 
203   // Enabling both top down and bottom up scheduling seems to give us less
204   // register spills than just using one of these approaches on its own.
205   Policy.OnlyTopDown = false;
206   Policy.OnlyBottomUp = false;
207 
208   // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
209   if (!enableSIScheduler())
210     Policy.ShouldTrackLaneMasks = true;
211 }
212 
213 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
214   return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
215 }
216