1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "clang/Frontend/CodeGenOptions.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
34 
35 static const char *const DataLayoutStringSIPrivateIsZero =
36     "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
39 
40 static const char *const DataLayoutStringSIGenericIsZero =
41     "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
42     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
43     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
44 
45 static const LangAS::Map AMDGPUPrivIsZeroDefIsGenMap = {
46     4, // Default
47     1, // opencl_global
48     3, // opencl_local
49     2, // opencl_constant
50     0, // opencl_private
51     4, // opencl_generic
52     1, // cuda_device
53     2, // cuda_constant
54     3  // cuda_shared
55 };
56 
57 static const LangAS::Map AMDGPUGenIsZeroDefIsGenMap = {
58     0, // Default
59     1, // opencl_global
60     3, // opencl_local
61     2, // opencl_constant
62     5, // opencl_private
63     0, // opencl_generic
64     1, // cuda_device
65     2, // cuda_constant
66     3  // cuda_shared
67 };
68 
69 static const LangAS::Map AMDGPUPrivIsZeroDefIsPrivMap = {
70     0, // Default
71     1, // opencl_global
72     3, // opencl_local
73     2, // opencl_constant
74     0, // opencl_private
75     4, // opencl_generic
76     1, // cuda_device
77     2, // cuda_constant
78     3  // cuda_shared
79 };
80 
81 static const LangAS::Map AMDGPUGenIsZeroDefIsPrivMap = {
82     5, // Default
83     1, // opencl_global
84     3, // opencl_local
85     2, // opencl_constant
86     5, // opencl_private
87     0, // opencl_generic
88     1, // cuda_device
89     2, // cuda_constant
90     3  // cuda_shared
91 };
92 } // namespace targets
93 } // namespace clang
94 
95 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
96 #define BUILTIN(ID, TYPE, ATTRS)                                               \
97   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
98 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
99   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
100 #include "clang/Basic/BuiltinsAMDGPU.def"
101 };
102 
103 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
104   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
105   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
106   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
107   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
108   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
109   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
110   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
111   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
112   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
113   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
114   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
115   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
116   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
117   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
118   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
119   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
120   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
121   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
122   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
123   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
124   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
125   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
126   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
127   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
128   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
129   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
130   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
131   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
132   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
133   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
134   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
135   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
136   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
137   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
138   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
139   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
140   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
141   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
142   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
143   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
144   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
145   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
146   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
147   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
148   "flat_scratch_lo", "flat_scratch_hi"
149 };
150 
151 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
152   return llvm::makeArrayRef(GCCRegNames);
153 }
154 
155 bool AMDGPUTargetInfo::initFeatureMap(
156     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
157     const std::vector<std::string> &FeatureVec) const {
158 
159   // XXX - What does the member GPU mean if device name string passed here?
160   if (getTriple().getArch() == llvm::Triple::amdgcn) {
161     if (CPU.empty())
162       CPU = "tahiti";
163 
164     switch (parseAMDGCNName(CPU)) {
165     case GK_GFX6:
166     case GK_GFX7:
167       break;
168 
169     case GK_GFX9:
170       Features["gfx9-insts"] = true;
171       LLVM_FALLTHROUGH;
172     case GK_GFX8:
173       Features["s-memrealtime"] = true;
174       Features["16-bit-insts"] = true;
175       Features["dpp"] = true;
176       break;
177 
178     case GK_NONE:
179       return false;
180     default:
181       llvm_unreachable("unhandled subtarget");
182     }
183   } else {
184     if (CPU.empty())
185       CPU = "r600";
186 
187     switch (parseR600Name(CPU)) {
188     case GK_R600:
189     case GK_R700:
190     case GK_EVERGREEN:
191     case GK_NORTHERN_ISLANDS:
192       break;
193     case GK_R600_DOUBLE_OPS:
194     case GK_R700_DOUBLE_OPS:
195     case GK_EVERGREEN_DOUBLE_OPS:
196     case GK_CAYMAN:
197       Features["fp64"] = true;
198       break;
199     case GK_NONE:
200       return false;
201     default:
202       llvm_unreachable("unhandled subtarget");
203     }
204   }
205 
206   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
207 }
208 
209 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
210                                            TargetOptions &TargetOpts) const {
211   bool hasFP32Denormals = false;
212   bool hasFP64Denormals = false;
213   for (auto &I : TargetOpts.FeaturesAsWritten) {
214     if (I == "+fp32-denormals" || I == "-fp32-denormals")
215       hasFP32Denormals = true;
216     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
217       hasFP64Denormals = true;
218   }
219   if (!hasFP32Denormals)
220     TargetOpts.Features.push_back(
221         (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm
222                    ? '+'
223                    : '-') +
224          Twine("fp32-denormals"))
225             .str());
226   // Always do not flush fp64 or fp16 denorms.
227   if (!hasFP64Denormals && hasFP64)
228     TargetOpts.Features.push_back("+fp64-fp16-denormals");
229 }
230 
231 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) {
232   return llvm::StringSwitch<GPUKind>(Name)
233       .Case("r600", GK_R600)
234       .Case("rv610", GK_R600)
235       .Case("rv620", GK_R600)
236       .Case("rv630", GK_R600)
237       .Case("rv635", GK_R600)
238       .Case("rs780", GK_R600)
239       .Case("rs880", GK_R600)
240       .Case("rv670", GK_R600_DOUBLE_OPS)
241       .Case("rv710", GK_R700)
242       .Case("rv730", GK_R700)
243       .Case("rv740", GK_R700_DOUBLE_OPS)
244       .Case("rv770", GK_R700_DOUBLE_OPS)
245       .Case("palm", GK_EVERGREEN)
246       .Case("cedar", GK_EVERGREEN)
247       .Case("sumo", GK_EVERGREEN)
248       .Case("sumo2", GK_EVERGREEN)
249       .Case("redwood", GK_EVERGREEN)
250       .Case("juniper", GK_EVERGREEN)
251       .Case("hemlock", GK_EVERGREEN_DOUBLE_OPS)
252       .Case("cypress", GK_EVERGREEN_DOUBLE_OPS)
253       .Case("barts", GK_NORTHERN_ISLANDS)
254       .Case("turks", GK_NORTHERN_ISLANDS)
255       .Case("caicos", GK_NORTHERN_ISLANDS)
256       .Case("cayman", GK_CAYMAN)
257       .Case("aruba", GK_CAYMAN)
258       .Default(GK_NONE);
259 }
260 
261 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
262   return llvm::StringSwitch<GPUKind>(Name)
263       .Case("gfx600", GK_GFX6)
264       .Case("tahiti", GK_GFX6)
265       .Case("gfx601", GK_GFX6)
266       .Case("pitcairn", GK_GFX6)
267       .Case("verde", GK_GFX6)
268       .Case("oland", GK_GFX6)
269       .Case("hainan", GK_GFX6)
270       .Case("gfx700", GK_GFX7)
271       .Case("bonaire", GK_GFX7)
272       .Case("kaveri", GK_GFX7)
273       .Case("gfx701", GK_GFX7)
274       .Case("hawaii", GK_GFX7)
275       .Case("gfx702", GK_GFX7)
276       .Case("gfx703", GK_GFX7)
277       .Case("kabini", GK_GFX7)
278       .Case("mullins", GK_GFX7)
279       .Case("gfx800", GK_GFX8)
280       .Case("iceland", GK_GFX8)
281       .Case("gfx801", GK_GFX8)
282       .Case("carrizo", GK_GFX8)
283       .Case("gfx802", GK_GFX8)
284       .Case("tonga", GK_GFX8)
285       .Case("gfx803", GK_GFX8)
286       .Case("fiji", GK_GFX8)
287       .Case("polaris10", GK_GFX8)
288       .Case("polaris11", GK_GFX8)
289       .Case("gfx804", GK_GFX8)
290       .Case("gfx810", GK_GFX8)
291       .Case("stoney", GK_GFX8)
292       .Case("gfx900", GK_GFX9)
293       .Case("gfx901", GK_GFX9)
294       .Case("gfx902", GK_GFX9)
295       .Case("gfx903", GK_GFX9)
296       .Default(GK_NONE);
297 }
298 
299 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
300   if (isGenericZero(getTriple())) {
301     AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap
302                                     : &AMDGPUGenIsZeroDefIsGenMap;
303   } else {
304     AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap
305                                     : &AMDGPUPrivIsZeroDefIsGenMap;
306   }
307 }
308 
309 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
310                                    const TargetOptions &Opts)
311     : TargetInfo(Triple), GPU(isAMDGCN(Triple) ? GK_GFX6 : GK_R600),
312       hasFP64(false), hasFMAF(false), hasLDEXPF(false),
313       AS(isGenericZero(Triple)) {
314   if (getTriple().getArch() == llvm::Triple::amdgcn) {
315     hasFP64 = true;
316     hasFMAF = true;
317     hasLDEXPF = true;
318   }
319   auto IsGenericZero = isGenericZero(Triple);
320   resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn
321                       ? (IsGenericZero ? DataLayoutStringSIGenericIsZero
322                                        : DataLayoutStringSIPrivateIsZero)
323                       : DataLayoutStringR600);
324   assert(DataLayout->getAllocaAddrSpace() == AS.Private);
325 
326   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
327                      Triple.getEnvironment() == llvm::Triple::OpenCL ||
328                      Triple.getEnvironmentName() == "amdgizcl" ||
329                      !isAMDGCN(Triple));
330   UseAddrSpaceMapMangling = true;
331 
332   // Set pointer width and alignment for target address space 0.
333   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
334   if (getMaxPointerWidth() == 64) {
335     LongWidth = LongAlign = 64;
336     SizeType = UnsignedLong;
337     PtrDiffType = SignedLong;
338     IntPtrType = SignedLong;
339   }
340 
341   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
342 }
343 
344 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
345   TargetInfo::adjust(Opts);
346   setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple()));
347 }
348 
349 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
350   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
351                                              Builtin::FirstTSBuiltin);
352 }
353 
354 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
355                                         MacroBuilder &Builder) const {
356   if (getTriple().getArch() == llvm::Triple::amdgcn)
357     Builder.defineMacro("__AMDGCN__");
358   else
359     Builder.defineMacro("__R600__");
360 
361   if (hasFMAF)
362     Builder.defineMacro("__HAS_FMAF__");
363   if (hasLDEXPF)
364     Builder.defineMacro("__HAS_LDEXPF__");
365   if (hasFP64)
366     Builder.defineMacro("__HAS_FP64__");
367 }
368