1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUSubtarget.h"
16 #include "AMDGPUCallLowering.h"
17 #include "R600ISelLowering.h"
18 #include "R600InstrInfo.h"
19 #include "R600MachineScheduler.h"
20 #include "SIFrameLowering.h"
21 #include "SIISelLowering.h"
22 #include "SIInstrInfo.h"
23 #include "SIMachineFunctionInfo.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/CodeGen/MachineScheduler.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "amdgpu-subtarget"
30 
31 #define GET_SUBTARGETINFO_ENUM
32 #define GET_SUBTARGETINFO_TARGET_DESC
33 #define GET_SUBTARGETINFO_CTOR
34 #include "AMDGPUGenSubtargetInfo.inc"
35 
36 #ifdef LLVM_BUILD_GLOBAL_ISEL
37 namespace {
38 struct AMDGPUGISelActualAccessor : public GISelAccessor {
39   std::unique_ptr<CallLowering> CallLoweringInfo;
40   const CallLowering *getCallLowering() const override {
41     return CallLoweringInfo.get();
42   }
43 };
44 } // End anonymous namespace.
45 #endif
46 
47 AMDGPUSubtarget &
48 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
49                                                  StringRef GPU, StringRef FS) {
50   // Determine default and user-specified characteristics
51   // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
52   // enabled, but some instructions do not respect them and they run at the
53   // double precision rate, so don't enable by default.
54   //
55   // We want to be able to turn these off, but making this a subtarget feature
56   // for SI has the unhelpful behavior that it unsets everything else if you
57   // disable it.
58 
59   SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
60   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
61     FullFS += "+flat-for-global,";
62   FullFS += FS;
63 
64   ParseSubtargetFeatures(GPU, FullFS);
65 
66   // FIXME: I don't think think Evergreen has any useful support for
67   // denormals, but should be checked. Should we issue a warning somewhere
68   // if someone tries to enable these?
69   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
70     FP32Denormals = false;
71     FP64Denormals = false;
72   }
73 
74   // Set defaults if needed.
75   if (MaxPrivateElementSize == 0)
76     MaxPrivateElementSize = 4;
77 
78   return *this;
79 }
80 
81 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
82                                  TargetMachine &TM)
83     : AMDGPUGenSubtargetInfo(TT, GPU, FS),
84       DumpCode(false), R600ALUInst(false), HasVertexCache(false),
85       TexVTXClauseSize(0),
86       Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
87       FP64(false),
88       FP64Denormals(false), FP32Denormals(false), FPExceptions(false),
89       FastFMAF32(false), HalfRate64Ops(false), CaymanISA(false),
90       FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true),
91       EnablePromoteAlloca(false),
92       EnableIfCvt(true), EnableLoadStoreOpt(false),
93       EnableUnsafeDSOffsetFolding(false),
94       EnableXNACK(false),
95       WavefrontSize(64), CFALUBug(false),
96       LocalMemorySize(0), MaxPrivateElementSize(0),
97       EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
98       GCN1Encoding(false), GCN3Encoding(false), CIInsts(false),
99       HasSMemRealTime(false), Has16BitInsts(false),
100       LDSBankCount(0),
101       IsaVersion(ISAVersion0_0_0),
102       EnableSIScheduler(false),
103       DebuggerInsertNops(false), DebuggerReserveRegs(false),
104       FrameLowering(nullptr),
105       GISel(),
106       InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
107 
108   initializeSubtargetDependencies(TT, GPU, FS);
109 
110   const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16)
111 
112   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
113     InstrInfo.reset(new R600InstrInfo(*this));
114     TLInfo.reset(new R600TargetLowering(TM, *this));
115 
116     // FIXME: Should have R600 specific FrameLowering
117     FrameLowering.reset(new AMDGPUFrameLowering(
118                           TargetFrameLowering::StackGrowsUp,
119                           MaxStackAlign,
120                           0));
121   } else {
122     InstrInfo.reset(new SIInstrInfo(*this));
123     TLInfo.reset(new SITargetLowering(TM, *this));
124     FrameLowering.reset(new SIFrameLowering(
125                           TargetFrameLowering::StackGrowsUp,
126                           MaxStackAlign,
127                           0));
128 #ifndef LLVM_BUILD_GLOBAL_ISEL
129     GISelAccessor *GISel = new GISelAccessor();
130 #else
131     AMDGPUGISelActualAccessor *GISel =
132         new AMDGPUGISelActualAccessor();
133     GISel->CallLoweringInfo.reset(
134         new AMDGPUCallLowering(*getTargetLowering()));
135 #endif
136     setGISelAccessor(*GISel);
137   }
138 }
139 
140 const CallLowering *AMDGPUSubtarget::getCallLowering() const {
141   assert(GISel && "Access to GlobalISel APIs not set");
142   return GISel->getCallLowering();
143 }
144 
145 unsigned AMDGPUSubtarget::getStackEntrySize() const {
146   assert(getGeneration() <= NORTHERN_ISLANDS);
147   switch(getWavefrontSize()) {
148   case 16:
149     return 8;
150   case 32:
151     return hasCaymanISA() ? 4 : 8;
152   case 64:
153     return 4;
154   default:
155     llvm_unreachable("Illegal wavefront size.");
156   }
157 }
158 
159 // FIXME: These limits are for SI. Did they change with the larger maximum LDS
160 // size?
161 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
162   switch (NWaves) {
163   case 10:
164     return 1638;
165   case 9:
166     return 1820;
167   case 8:
168     return 2048;
169   case 7:
170     return 2340;
171   case 6:
172     return 2730;
173   case 5:
174     return 3276;
175   case 4:
176     return 4096;
177   case 3:
178     return 5461;
179   case 2:
180     return 8192;
181   default:
182     return getLocalMemorySize();
183   }
184 }
185 
186 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
187   if (Bytes <= 1638)
188     return 10;
189 
190   if (Bytes <= 1820)
191     return 9;
192 
193   if (Bytes <= 2048)
194     return 8;
195 
196   if (Bytes <= 2340)
197     return 7;
198 
199   if (Bytes <= 2730)
200     return 6;
201 
202   if (Bytes <= 3276)
203     return 5;
204 
205   if (Bytes <= 4096)
206     return 4;
207 
208   if (Bytes <= 5461)
209     return 3;
210 
211   if (Bytes <= 8192)
212     return 2;
213 
214   return 1;
215 }
216 
217 unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
218   switch(getGeneration()) {
219   default: llvm_unreachable("ChipID unknown");
220   case SEA_ISLANDS: return 12;
221   }
222 }
223 
224 AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const {
225   return AMDGPU::getIsaVersion(getFeatureBits());
226 }
227 
228 bool AMDGPUSubtarget::isVGPRSpillingEnabled(const Function& F) const {
229   return !AMDGPU::isShader(F.getCallingConv()) || EnableVGPRSpilling;
230 }
231 
232 void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
233                                           MachineInstr *begin,
234                                           MachineInstr *end,
235                                           unsigned NumRegionInstrs) const {
236   if (getGeneration() >= SOUTHERN_ISLANDS) {
237 
238     // Track register pressure so the scheduler can try to decrease
239     // pressure once register usage is above the threshold defined by
240     // SIRegisterInfo::getRegPressureSetLimit()
241     Policy.ShouldTrackPressure = true;
242 
243     // Enabling both top down and bottom up scheduling seems to give us less
244     // register spills than just using one of these approaches on its own.
245     Policy.OnlyTopDown = false;
246     Policy.OnlyBottomUp = false;
247 
248     // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
249     if (!enableSIScheduler())
250       Policy.ShouldTrackLaneMasks = true;
251   }
252 }
253 
254