1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "clang/Frontend/CodeGenOptions.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
34 
35 static const char *const DataLayoutStringSIPrivateIsZero =
36     "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
39 
40 static const char *const DataLayoutStringSIGenericIsZero =
41     "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
42     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
43     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
44 
45 static const LangASMap AMDGPUPrivIsZeroDefIsGenMap = {
46     4, // Default
47     1, // opencl_global
48     3, // opencl_local
49     2, // opencl_constant
50     0, // opencl_private
51     4, // opencl_generic
52     1, // cuda_device
53     2, // cuda_constant
54     3  // cuda_shared
55 };
56 
57 static const LangASMap AMDGPUGenIsZeroDefIsGenMap = {
58     0, // Default
59     1, // opencl_global
60     3, // opencl_local
61     2, // opencl_constant
62     5, // opencl_private
63     0, // opencl_generic
64     1, // cuda_device
65     2, // cuda_constant
66     3  // cuda_shared
67 };
68 
69 static const LangASMap AMDGPUPrivIsZeroDefIsPrivMap = {
70     0, // Default
71     1, // opencl_global
72     3, // opencl_local
73     2, // opencl_constant
74     0, // opencl_private
75     4, // opencl_generic
76     1, // cuda_device
77     2, // cuda_constant
78     3  // cuda_shared
79 };
80 
81 static const LangASMap AMDGPUGenIsZeroDefIsPrivMap = {
82     5, // Default
83     1, // opencl_global
84     3, // opencl_local
85     2, // opencl_constant
86     5, // opencl_private
87     0, // opencl_generic
88     1, // cuda_device
89     2, // cuda_constant
90     3  // cuda_shared
91 };
92 } // namespace targets
93 } // namespace clang
94 
95 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
96 #define BUILTIN(ID, TYPE, ATTRS)                                               \
97   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
98 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
99   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
100 #include "clang/Basic/BuiltinsAMDGPU.def"
101 };
102 
103 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
104   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
105   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
106   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
107   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
108   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
109   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
110   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
111   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
112   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
113   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
114   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
115   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
116   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
117   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
118   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
119   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
120   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
121   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
122   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
123   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
124   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
125   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
126   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
127   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
128   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
129   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
130   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
131   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
132   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
133   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
134   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
135   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
136   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
137   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
138   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
139   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
140   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
141   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
142   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
143   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
144   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
145   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
146   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
147   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
148   "flat_scratch_lo", "flat_scratch_hi"
149 };
150 
151 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
152   return llvm::makeArrayRef(GCCRegNames);
153 }
154 
155 bool AMDGPUTargetInfo::initFeatureMap(
156     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
157     const std::vector<std::string> &FeatureVec) const {
158 
159   // XXX - What does the member GPU mean if device name string passed here?
160   if (getTriple().getArch() == llvm::Triple::amdgcn) {
161     if (CPU.empty())
162       CPU = "tahiti";
163 
164     switch (parseAMDGCNName(CPU)) {
165     case GK_GFX6:
166     case GK_GFX7:
167       break;
168 
169     case GK_GFX9:
170       Features["gfx9-insts"] = true;
171       LLVM_FALLTHROUGH;
172     case GK_GFX8:
173       Features["s-memrealtime"] = true;
174       Features["16-bit-insts"] = true;
175       Features["dpp"] = true;
176       break;
177 
178     case GK_NONE:
179       return false;
180     default:
181       llvm_unreachable("unhandled subtarget");
182     }
183   } else {
184     if (CPU.empty())
185       CPU = "r600";
186 
187     switch (parseR600Name(CPU)) {
188     case GK_R600:
189     case GK_R700:
190     case GK_EVERGREEN:
191     case GK_NORTHERN_ISLANDS:
192       break;
193     case GK_R600_DOUBLE_OPS:
194     case GK_R700_DOUBLE_OPS:
195     case GK_EVERGREEN_DOUBLE_OPS:
196     case GK_CAYMAN:
197       Features["fp64"] = true;
198       break;
199     case GK_NONE:
200       return false;
201     default:
202       llvm_unreachable("unhandled subtarget");
203     }
204   }
205 
206   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
207 }
208 
209 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
210                                            TargetOptions &TargetOpts) const {
211   bool hasFP32Denormals = false;
212   bool hasFP64Denormals = false;
213   for (auto &I : TargetOpts.FeaturesAsWritten) {
214     if (I == "+fp32-denormals" || I == "-fp32-denormals")
215       hasFP32Denormals = true;
216     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
217       hasFP64Denormals = true;
218   }
219   if (!hasFP32Denormals)
220     TargetOpts.Features.push_back(
221         (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm
222                    ? '+'
223                    : '-') +
224          Twine("fp32-denormals"))
225             .str());
226   // Always do not flush fp64 or fp16 denorms.
227   if (!hasFP64Denormals && hasFP64)
228     TargetOpts.Features.push_back("+fp64-fp16-denormals");
229 }
230 
231 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) {
232   return llvm::StringSwitch<GPUKind>(Name)
233       .Case("r600", GK_R600)
234       .Case("rv610", GK_R600)
235       .Case("rv620", GK_R600)
236       .Case("rv630", GK_R600)
237       .Case("rv635", GK_R600)
238       .Case("rs780", GK_R600)
239       .Case("rs880", GK_R600)
240       .Case("rv670", GK_R600_DOUBLE_OPS)
241       .Case("rv710", GK_R700)
242       .Case("rv730", GK_R700)
243       .Case("rv740", GK_R700_DOUBLE_OPS)
244       .Case("rv770", GK_R700_DOUBLE_OPS)
245       .Case("palm", GK_EVERGREEN)
246       .Case("cedar", GK_EVERGREEN)
247       .Case("sumo", GK_EVERGREEN)
248       .Case("sumo2", GK_EVERGREEN)
249       .Case("redwood", GK_EVERGREEN)
250       .Case("juniper", GK_EVERGREEN)
251       .Case("hemlock", GK_EVERGREEN_DOUBLE_OPS)
252       .Case("cypress", GK_EVERGREEN_DOUBLE_OPS)
253       .Case("barts", GK_NORTHERN_ISLANDS)
254       .Case("turks", GK_NORTHERN_ISLANDS)
255       .Case("caicos", GK_NORTHERN_ISLANDS)
256       .Case("cayman", GK_CAYMAN)
257       .Case("aruba", GK_CAYMAN)
258       .Default(GK_NONE);
259 }
260 
261 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
262   return llvm::StringSwitch<GPUKind>(Name)
263       .Case("gfx600", GK_GFX6)
264       .Case("tahiti", GK_GFX6)
265       .Case("gfx601", GK_GFX6)
266       .Case("pitcairn", GK_GFX6)
267       .Case("verde", GK_GFX6)
268       .Case("oland", GK_GFX6)
269       .Case("hainan", GK_GFX6)
270       .Case("gfx700", GK_GFX7)
271       .Case("bonaire", GK_GFX7)
272       .Case("kaveri", GK_GFX7)
273       .Case("gfx701", GK_GFX7)
274       .Case("hawaii", GK_GFX7)
275       .Case("gfx702", GK_GFX7)
276       .Case("gfx703", GK_GFX7)
277       .Case("kabini", GK_GFX7)
278       .Case("mullins", GK_GFX7)
279       .Case("gfx800", GK_GFX8)
280       .Case("iceland", GK_GFX8)
281       .Case("gfx801", GK_GFX8)
282       .Case("carrizo", GK_GFX8)
283       .Case("gfx802", GK_GFX8)
284       .Case("tonga", GK_GFX8)
285       .Case("gfx803", GK_GFX8)
286       .Case("fiji", GK_GFX8)
287       .Case("polaris10", GK_GFX8)
288       .Case("polaris11", GK_GFX8)
289       .Case("gfx804", GK_GFX8)
290       .Case("gfx810", GK_GFX8)
291       .Case("stoney", GK_GFX8)
292       .Case("gfx900", GK_GFX9)
293       .Case("gfx901", GK_GFX9)
294       .Case("gfx902", GK_GFX9)
295       .Case("gfx903", GK_GFX9)
296       .Default(GK_NONE);
297 }
298 
299 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
300   if (isGenericZero(getTriple())) {
301     AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap
302                                     : &AMDGPUGenIsZeroDefIsGenMap;
303   } else {
304     AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap
305                                     : &AMDGPUPrivIsZeroDefIsGenMap;
306   }
307 }
308 
309 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
310                                    const TargetOptions &Opts)
311     : TargetInfo(Triple),
312       GPU(isAMDGCN(Triple) ? GK_GFX6 : parseR600Name(Opts.CPU)),
313       hasFP64(false), hasFMAF(false), hasLDEXPF(false),
314       AS(isGenericZero(Triple)) {
315   if (getTriple().getArch() == llvm::Triple::amdgcn) {
316     hasFP64 = true;
317     hasFMAF = true;
318     hasLDEXPF = true;
319   }
320   if (getTriple().getArch() == llvm::Triple::r600) {
321     if (GPU == GK_EVERGREEN_DOUBLE_OPS || GPU == GK_CAYMAN) {
322       hasFMAF = true;
323     }
324   }
325   auto IsGenericZero = isGenericZero(Triple);
326   resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn
327                       ? (IsGenericZero ? DataLayoutStringSIGenericIsZero
328                                        : DataLayoutStringSIPrivateIsZero)
329                       : DataLayoutStringR600);
330   assert(DataLayout->getAllocaAddrSpace() == AS.Private);
331 
332   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
333                      Triple.getEnvironment() == llvm::Triple::OpenCL ||
334                      Triple.getEnvironmentName() == "amdgizcl" ||
335                      !isAMDGCN(Triple));
336   UseAddrSpaceMapMangling = true;
337 
338   // Set pointer width and alignment for target address space 0.
339   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
340   if (getMaxPointerWidth() == 64) {
341     LongWidth = LongAlign = 64;
342     SizeType = UnsignedLong;
343     PtrDiffType = SignedLong;
344     IntPtrType = SignedLong;
345   }
346 
347   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
348 }
349 
350 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
351   TargetInfo::adjust(Opts);
352   setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple()));
353 }
354 
355 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
356   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
357                                              Builtin::FirstTSBuiltin);
358 }
359 
360 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
361                                         MacroBuilder &Builder) const {
362   if (getTriple().getArch() == llvm::Triple::amdgcn)
363     Builder.defineMacro("__AMDGCN__");
364   else
365     Builder.defineMacro("__R600__");
366 
367   if (hasFMAF)
368     Builder.defineMacro("__HAS_FMAF__");
369   if (hasLDEXPF)
370     Builder.defineMacro("__HAS_LDEXPF__");
371   if (hasFP64)
372     Builder.defineMacro("__HAS_FP64__");
373 }
374