1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // opencl_global_device
50     Global,   // opencl_global_host
51     Global,   // cuda_device
52     Constant, // cuda_constant
53     Local,    // cuda_shared
54     Generic,  // ptr32_sptr
55     Generic,  // ptr32_uptr
56     Generic   // ptr64
57 };
58 
59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60     Private,  // Default
61     Global,   // opencl_global
62     Local,    // opencl_local
63     Constant, // opencl_constant
64     Private,  // opencl_private
65     Generic,  // opencl_generic
66     Global,   // opencl_global_device
67     Global,   // opencl_global_host
68     Global,   // cuda_device
69     Constant, // cuda_constant
70     Local,    // cuda_shared
71     Generic,  // ptr32_sptr
72     Generic,  // ptr32_uptr
73     Generic   // ptr64
74 
75 };
76 } // namespace targets
77 } // namespace clang
78 
79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
80 #define BUILTIN(ID, TYPE, ATTRS)                                               \
81   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
83   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
84 #include "clang/Basic/BuiltinsAMDGPU.def"
85 };
86 
87 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
88   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
89   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
90   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
91   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
92   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
93   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
94   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
95   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
96   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
97   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
98   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
99   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
100   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
101   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
102   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
103   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
104   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
105   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
106   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
107   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
108   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
109   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
110   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
111   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
112   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
113   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
114   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
115   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
116   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
117   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
118   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
119   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
120   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
121   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
122   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
123   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
124   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
125   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
126   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
127   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
128   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
129   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
130   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
131   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
132   "flat_scratch_lo", "flat_scratch_hi",
133   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
134   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
135   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
136   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
137   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
138   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
139   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
140   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
141   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
142   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
143   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
144   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
145   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
146   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
147   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
148   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
149   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
150   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
151   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
152   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
153   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
154   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
155   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
156   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
157   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
158   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
159   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
160   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
161   "a252", "a253", "a254", "a255"
162 };
163 
164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
165   return llvm::makeArrayRef(GCCRegNames);
166 }
167 
168 bool AMDGPUTargetInfo::initFeatureMap(
169     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
170     const std::vector<std::string> &FeatureVec) const {
171 
172   using namespace llvm::AMDGPU;
173 
174   // XXX - What does the member GPU mean if device name string passed here?
175   if (isAMDGCN(getTriple())) {
176     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
177     case GK_GFX1030:
178       Features["ci-insts"] = true;
179       Features["dot1-insts"] = true;
180       Features["dot2-insts"] = true;
181       Features["dot5-insts"] = true;
182       Features["dot6-insts"] = true;
183       Features["dl-insts"] = true;
184       Features["flat-address-space"] = true;
185       Features["16-bit-insts"] = true;
186       Features["dpp"] = true;
187       Features["gfx8-insts"] = true;
188       Features["gfx9-insts"] = true;
189       Features["gfx10-insts"] = true;
190       Features["gfx10-3-insts"] = true;
191       Features["s-memrealtime"] = true;
192       break;
193     case GK_GFX1012:
194     case GK_GFX1011:
195       Features["dot1-insts"] = true;
196       Features["dot2-insts"] = true;
197       Features["dot5-insts"] = true;
198       Features["dot6-insts"] = true;
199       LLVM_FALLTHROUGH;
200     case GK_GFX1010:
201       Features["dl-insts"] = true;
202       Features["ci-insts"] = true;
203       Features["flat-address-space"] = true;
204       Features["16-bit-insts"] = true;
205       Features["dpp"] = true;
206       Features["gfx8-insts"] = true;
207       Features["gfx9-insts"] = true;
208       Features["gfx10-insts"] = true;
209       Features["s-memrealtime"] = true;
210       break;
211     case GK_GFX908:
212       Features["dot3-insts"] = true;
213       Features["dot4-insts"] = true;
214       Features["dot5-insts"] = true;
215       Features["dot6-insts"] = true;
216       Features["mai-insts"] = true;
217       LLVM_FALLTHROUGH;
218     case GK_GFX906:
219       Features["dl-insts"] = true;
220       Features["dot1-insts"] = true;
221       Features["dot2-insts"] = true;
222       LLVM_FALLTHROUGH;
223     case GK_GFX909:
224     case GK_GFX904:
225     case GK_GFX902:
226     case GK_GFX900:
227       Features["gfx9-insts"] = true;
228       LLVM_FALLTHROUGH;
229     case GK_GFX810:
230     case GK_GFX803:
231     case GK_GFX802:
232     case GK_GFX801:
233       Features["gfx8-insts"] = true;
234       Features["16-bit-insts"] = true;
235       Features["dpp"] = true;
236       Features["s-memrealtime"] = true;
237       LLVM_FALLTHROUGH;
238     case GK_GFX704:
239     case GK_GFX703:
240     case GK_GFX702:
241     case GK_GFX701:
242     case GK_GFX700:
243       Features["ci-insts"] = true;
244       Features["flat-address-space"] = true;
245       LLVM_FALLTHROUGH;
246     case GK_GFX601:
247     case GK_GFX600:
248       break;
249     case GK_NONE:
250       break;
251     default:
252       llvm_unreachable("Unhandled GPU!");
253     }
254   } else {
255     if (CPU.empty())
256       CPU = "r600";
257 
258     switch (llvm::AMDGPU::parseArchR600(CPU)) {
259     case GK_CAYMAN:
260     case GK_CYPRESS:
261     case GK_RV770:
262     case GK_RV670:
263       // TODO: Add fp64 when implemented.
264       break;
265     case GK_TURKS:
266     case GK_CAICOS:
267     case GK_BARTS:
268     case GK_SUMO:
269     case GK_REDWOOD:
270     case GK_JUNIPER:
271     case GK_CEDAR:
272     case GK_RV730:
273     case GK_RV710:
274     case GK_RS880:
275     case GK_R630:
276     case GK_R600:
277       break;
278     default:
279       llvm_unreachable("Unhandled GPU!");
280     }
281   }
282 
283   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
284 }
285 
286 void AMDGPUTargetInfo::fillValidCPUList(
287     SmallVectorImpl<StringRef> &Values) const {
288   if (isAMDGCN(getTriple()))
289     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
290   else
291     llvm::AMDGPU::fillValidArchListR600(Values);
292 }
293 
294 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
295   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
296 }
297 
298 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
299                                    const TargetOptions &Opts)
300     : TargetInfo(Triple),
301       GPUKind(isAMDGCN(Triple) ?
302               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
303               llvm::AMDGPU::parseArchR600(Opts.CPU)),
304       GPUFeatures(isAMDGCN(Triple) ?
305                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
306                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
307   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
308                                         : DataLayoutStringR600);
309   assert(DataLayout->getAllocaAddrSpace() == Private);
310   GridValues = llvm::omp::AMDGPUGpuGridValues;
311 
312   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
313                      !isAMDGCN(Triple));
314   UseAddrSpaceMapMangling = true;
315 
316   HasLegalHalfType = true;
317   HasFloat16 = true;
318 
319   // Set pointer width and alignment for target address space 0.
320   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
321   if (getMaxPointerWidth() == 64) {
322     LongWidth = LongAlign = 64;
323     SizeType = UnsignedLong;
324     PtrDiffType = SignedLong;
325     IntPtrType = SignedLong;
326   }
327 
328   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
329 }
330 
331 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
332   TargetInfo::adjust(Opts);
333   // ToDo: There are still a few places using default address space as private
334   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
335   // can be removed from the following line.
336   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
337                      !isAMDGCN(getTriple()));
338 }
339 
340 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
341   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
342                                              Builtin::FirstTSBuiltin);
343 }
344 
345 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
346                                         MacroBuilder &Builder) const {
347   Builder.defineMacro("__AMD__");
348   Builder.defineMacro("__AMDGPU__");
349 
350   if (isAMDGCN(getTriple()))
351     Builder.defineMacro("__AMDGCN__");
352   else
353     Builder.defineMacro("__R600__");
354 
355   if (GPUKind != llvm::AMDGPU::GK_NONE) {
356     StringRef CanonName = isAMDGCN(getTriple()) ?
357       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
358     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
359   }
360 
361   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
362   // removed in the near future.
363   if (hasFMAF())
364     Builder.defineMacro("__HAS_FMAF__");
365   if (hasFastFMAF())
366     Builder.defineMacro("FP_FAST_FMAF");
367   if (hasLDEXPF())
368     Builder.defineMacro("__HAS_LDEXPF__");
369   if (hasFP64())
370     Builder.defineMacro("__HAS_FP64__");
371   if (hasFastFMA())
372     Builder.defineMacro("FP_FAST_FMA");
373 }
374 
375 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
376   assert(HalfFormat == Aux->HalfFormat);
377   assert(FloatFormat == Aux->FloatFormat);
378   assert(DoubleFormat == Aux->DoubleFormat);
379 
380   // On x86_64 long double is 80-bit extended precision format, which is
381   // not supported by AMDGPU. 128-bit floating point format is also not
382   // supported by AMDGPU. Therefore keep its own format for these two types.
383   auto SaveLongDoubleFormat = LongDoubleFormat;
384   auto SaveFloat128Format = Float128Format;
385   copyAuxTarget(Aux);
386   LongDoubleFormat = SaveLongDoubleFormat;
387   Float128Format = SaveFloat128Format;
388   // For certain builtin types support on the host target, claim they are
389   // support to pass the compilation of the host code during the device-side
390   // compilation.
391   // FIXME: As the side effect, we also accept `__float128` uses in the device
392   // code. To rejct these builtin types supported in the host target but not in
393   // the device target, one approach would support `device_builtin` attribute
394   // so that we could tell the device builtin types from the host ones. The
395   // also solves the different representations of the same builtin type, such
396   // as `size_t` in the MSVC environment.
397   if (Aux->hasFloat128Type()) {
398     HasFloat128 = true;
399     Float128Format = DoubleFormat;
400   }
401 }
402