1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39     "-ni:7";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     Generic,  // Default
43     Global,   // opencl_global
44     Local,    // opencl_local
45     Constant, // opencl_constant
46     Private,  // opencl_private
47     Generic,  // opencl_generic
48     Global,   // opencl_global_device
49     Global,   // opencl_global_host
50     Global,   // cuda_device
51     Constant, // cuda_constant
52     Local,    // cuda_shared
53     Global,   // sycl_global
54     Local,    // sycl_local
55     Private,  // sycl_private
56     Generic,  // ptr32_sptr
57     Generic,  // ptr32_uptr
58     Generic   // ptr64
59 };
60 
61 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
62     Private,  // Default
63     Global,   // opencl_global
64     Local,    // opencl_local
65     Constant, // opencl_constant
66     Private,  // opencl_private
67     Generic,  // opencl_generic
68     Global,   // opencl_global_device
69     Global,   // opencl_global_host
70     Global,   // cuda_device
71     Constant, // cuda_constant
72     Local,    // cuda_shared
73     // SYCL address space values for this map are dummy
74     Generic,  // sycl_global
75     Generic,  // sycl_local
76     Generic,  // sycl_private
77     Generic,  // ptr32_sptr
78     Generic,  // ptr32_uptr
79     Generic   // ptr64
80 
81 };
82 } // namespace targets
83 } // namespace clang
84 
85 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
86 #define BUILTIN(ID, TYPE, ATTRS)                                               \
87   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
88 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
89   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
90 #include "clang/Basic/BuiltinsAMDGPU.def"
91 };
92 
93 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
94   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
95   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
96   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
97   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
98   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
99   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
100   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
101   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
102   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
103   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
104   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
105   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
106   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
107   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
108   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
109   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
110   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
111   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
112   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
113   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
114   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
115   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
116   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
117   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
118   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
119   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
120   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
121   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
122   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
123   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
124   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
125   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
126   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
127   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
128   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
129   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
130   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
131   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
132   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
133   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
134   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
135   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
136   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
137   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
138   "flat_scratch_lo", "flat_scratch_hi",
139   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
140   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
141   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
142   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
143   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
144   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
145   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
146   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
147   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
148   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
149   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
150   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
151   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
152   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
153   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
154   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
155   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
156   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
157   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
158   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
159   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
160   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
161   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
162   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
163   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
164   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
165   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
166   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
167   "a252", "a253", "a254", "a255"
168 };
169 
170 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
171   return llvm::makeArrayRef(GCCRegNames);
172 }
173 
174 bool AMDGPUTargetInfo::initFeatureMap(
175     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
176     const std::vector<std::string> &FeatureVec) const {
177 
178   using namespace llvm::AMDGPU;
179 
180   // XXX - What does the member GPU mean if device name string passed here?
181   if (isAMDGCN(getTriple())) {
182     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
183     case GK_GFX1034:
184     case GK_GFX1033:
185     case GK_GFX1032:
186     case GK_GFX1031:
187     case GK_GFX1030:
188       Features["ci-insts"] = true;
189       Features["dot1-insts"] = true;
190       Features["dot2-insts"] = true;
191       Features["dot5-insts"] = true;
192       Features["dot6-insts"] = true;
193       Features["dot7-insts"] = true;
194       Features["dl-insts"] = true;
195       Features["flat-address-space"] = true;
196       Features["16-bit-insts"] = true;
197       Features["dpp"] = true;
198       Features["gfx8-insts"] = true;
199       Features["gfx9-insts"] = true;
200       Features["gfx10-insts"] = true;
201       Features["gfx10-3-insts"] = true;
202       Features["s-memrealtime"] = true;
203       Features["s-memtime-inst"] = true;
204       break;
205     case GK_GFX1012:
206     case GK_GFX1011:
207       Features["dot1-insts"] = true;
208       Features["dot2-insts"] = true;
209       Features["dot5-insts"] = true;
210       Features["dot6-insts"] = true;
211       Features["dot7-insts"] = true;
212       LLVM_FALLTHROUGH;
213     case GK_GFX1010:
214       Features["dl-insts"] = true;
215       Features["ci-insts"] = true;
216       Features["flat-address-space"] = true;
217       Features["16-bit-insts"] = true;
218       Features["dpp"] = true;
219       Features["gfx8-insts"] = true;
220       Features["gfx9-insts"] = true;
221       Features["gfx10-insts"] = true;
222       Features["s-memrealtime"] = true;
223       Features["s-memtime-inst"] = true;
224       break;
225     case GK_GFX90A:
226       Features["gfx90a-insts"] = true;
227       LLVM_FALLTHROUGH;
228     case GK_GFX908:
229       Features["dot3-insts"] = true;
230       Features["dot4-insts"] = true;
231       Features["dot5-insts"] = true;
232       Features["dot6-insts"] = true;
233       Features["mai-insts"] = true;
234       LLVM_FALLTHROUGH;
235     case GK_GFX906:
236       Features["dl-insts"] = true;
237       Features["dot1-insts"] = true;
238       Features["dot2-insts"] = true;
239       Features["dot7-insts"] = true;
240       LLVM_FALLTHROUGH;
241     case GK_GFX90C:
242     case GK_GFX909:
243     case GK_GFX904:
244     case GK_GFX902:
245     case GK_GFX900:
246       Features["gfx9-insts"] = true;
247       LLVM_FALLTHROUGH;
248     case GK_GFX810:
249     case GK_GFX805:
250     case GK_GFX803:
251     case GK_GFX802:
252     case GK_GFX801:
253       Features["gfx8-insts"] = true;
254       Features["16-bit-insts"] = true;
255       Features["dpp"] = true;
256       Features["s-memrealtime"] = true;
257       LLVM_FALLTHROUGH;
258     case GK_GFX705:
259     case GK_GFX704:
260     case GK_GFX703:
261     case GK_GFX702:
262     case GK_GFX701:
263     case GK_GFX700:
264       Features["ci-insts"] = true;
265       Features["flat-address-space"] = true;
266       LLVM_FALLTHROUGH;
267     case GK_GFX602:
268     case GK_GFX601:
269     case GK_GFX600:
270       Features["s-memtime-inst"] = true;
271       break;
272     case GK_NONE:
273       break;
274     default:
275       llvm_unreachable("Unhandled GPU!");
276     }
277   } else {
278     if (CPU.empty())
279       CPU = "r600";
280 
281     switch (llvm::AMDGPU::parseArchR600(CPU)) {
282     case GK_CAYMAN:
283     case GK_CYPRESS:
284     case GK_RV770:
285     case GK_RV670:
286       // TODO: Add fp64 when implemented.
287       break;
288     case GK_TURKS:
289     case GK_CAICOS:
290     case GK_BARTS:
291     case GK_SUMO:
292     case GK_REDWOOD:
293     case GK_JUNIPER:
294     case GK_CEDAR:
295     case GK_RV730:
296     case GK_RV710:
297     case GK_RS880:
298     case GK_R630:
299     case GK_R600:
300       break;
301     default:
302       llvm_unreachable("Unhandled GPU!");
303     }
304   }
305 
306   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
307 }
308 
309 void AMDGPUTargetInfo::fillValidCPUList(
310     SmallVectorImpl<StringRef> &Values) const {
311   if (isAMDGCN(getTriple()))
312     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
313   else
314     llvm::AMDGPU::fillValidArchListR600(Values);
315 }
316 
317 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
318   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
319 }
320 
321 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
322                                    const TargetOptions &Opts)
323     : TargetInfo(Triple),
324       GPUKind(isAMDGCN(Triple) ?
325               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
326               llvm::AMDGPU::parseArchR600(Opts.CPU)),
327       GPUFeatures(isAMDGCN(Triple) ?
328                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
329                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
330   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
331                                         : DataLayoutStringR600);
332   GridValues = llvm::omp::AMDGPUGpuGridValues;
333 
334   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
335                      !isAMDGCN(Triple));
336   UseAddrSpaceMapMangling = true;
337 
338   HasLegalHalfType = true;
339   HasFloat16 = true;
340   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
341   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
342 
343   // Set pointer width and alignment for target address space 0.
344   PointerWidth = PointerAlign = getPointerWidthV(Generic);
345   if (getMaxPointerWidth() == 64) {
346     LongWidth = LongAlign = 64;
347     SizeType = UnsignedLong;
348     PtrDiffType = SignedLong;
349     IntPtrType = SignedLong;
350   }
351 
352   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
353 }
354 
355 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
356   TargetInfo::adjust(Opts);
357   // ToDo: There are still a few places using default address space as private
358   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
359   // can be removed from the following line.
360   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
361                      !isAMDGCN(getTriple()));
362 }
363 
364 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
365   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
366                                              Builtin::FirstTSBuiltin);
367 }
368 
369 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
370                                         MacroBuilder &Builder) const {
371   Builder.defineMacro("__AMD__");
372   Builder.defineMacro("__AMDGPU__");
373 
374   if (isAMDGCN(getTriple()))
375     Builder.defineMacro("__AMDGCN__");
376   else
377     Builder.defineMacro("__R600__");
378 
379   if (GPUKind != llvm::AMDGPU::GK_NONE) {
380     StringRef CanonName = isAMDGCN(getTriple()) ?
381       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
382     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
383     if (isAMDGCN(getTriple())) {
384       Builder.defineMacro("__amdgcn_processor__",
385                           Twine("\"") + Twine(CanonName) + Twine("\""));
386       Builder.defineMacro("__amdgcn_target_id__",
387                           Twine("\"") + Twine(getTargetID().getValue()) +
388                               Twine("\""));
389       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
390         auto Loc = OffloadArchFeatures.find(F);
391         if (Loc != OffloadArchFeatures.end()) {
392           std::string NewF = F.str();
393           std::replace(NewF.begin(), NewF.end(), '-', '_');
394           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
395                                   Twine("__"),
396                               Loc->second ? "1" : "0");
397         }
398       }
399     }
400   }
401 
402   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
403   // removed in the near future.
404   if (hasFMAF())
405     Builder.defineMacro("__HAS_FMAF__");
406   if (hasFastFMAF())
407     Builder.defineMacro("FP_FAST_FMAF");
408   if (hasLDEXPF())
409     Builder.defineMacro("__HAS_LDEXPF__");
410   if (hasFP64())
411     Builder.defineMacro("__HAS_FP64__");
412   if (hasFastFMA())
413     Builder.defineMacro("FP_FAST_FMA");
414 
415   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
416 }
417 
418 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
419   assert(HalfFormat == Aux->HalfFormat);
420   assert(FloatFormat == Aux->FloatFormat);
421   assert(DoubleFormat == Aux->DoubleFormat);
422 
423   // On x86_64 long double is 80-bit extended precision format, which is
424   // not supported by AMDGPU. 128-bit floating point format is also not
425   // supported by AMDGPU. Therefore keep its own format for these two types.
426   auto SaveLongDoubleFormat = LongDoubleFormat;
427   auto SaveFloat128Format = Float128Format;
428   copyAuxTarget(Aux);
429   LongDoubleFormat = SaveLongDoubleFormat;
430   Float128Format = SaveFloat128Format;
431   // For certain builtin types support on the host target, claim they are
432   // support to pass the compilation of the host code during the device-side
433   // compilation.
434   // FIXME: As the side effect, we also accept `__float128` uses in the device
435   // code. To rejct these builtin types supported in the host target but not in
436   // the device target, one approach would support `device_builtin` attribute
437   // so that we could tell the device builtin types from the host ones. The
438   // also solves the different representations of the same builtin type, such
439   // as `size_t` in the MSVC environment.
440   if (Aux->hasFloat128Type()) {
441     HasFloat128 = true;
442     Float128Format = DoubleFormat;
443   }
444 }
445