1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
38     "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // opencl_global_device
48     Global,   // opencl_global_host
49     Global,   // cuda_device
50     Constant, // cuda_constant
51     Local,    // cuda_shared
52     Global,   // sycl_global
53     Global,   // sycl_global_device
54     Global,   // sycl_global_host
55     Local,    // sycl_local
56     Private,  // sycl_private
57     Generic,  // ptr32_sptr
58     Generic,  // ptr32_uptr
59     Generic   // ptr64
60 };
61 
62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
63     Private,  // Default
64     Global,   // opencl_global
65     Local,    // opencl_local
66     Constant, // opencl_constant
67     Private,  // opencl_private
68     Generic,  // opencl_generic
69     Global,   // opencl_global_device
70     Global,   // opencl_global_host
71     Global,   // cuda_device
72     Constant, // cuda_constant
73     Local,    // cuda_shared
74     // SYCL address space values for this map are dummy
75     Generic,  // sycl_global
76     Generic,  // sycl_global_device
77     Generic,  // sycl_global_host
78     Generic,  // sycl_local
79     Generic,  // sycl_private
80     Generic,  // ptr32_sptr
81     Generic,  // ptr32_uptr
82     Generic   // ptr64
83 
84 };
85 } // namespace targets
86 } // namespace clang
87 
88 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
89 #define BUILTIN(ID, TYPE, ATTRS)                                               \
90   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
91 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
92   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
93 #include "clang/Basic/BuiltinsAMDGPU.def"
94 };
95 
96 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
97   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
98   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
99   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
100   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
101   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
102   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
103   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
104   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
105   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
106   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
107   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
108   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
109   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
110   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
111   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
112   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
113   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
114   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
115   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
116   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
117   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
118   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
119   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
120   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
121   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
122   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
123   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
124   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
125   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
126   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
127   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
128   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
129   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
130   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
131   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
132   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
133   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
134   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
135   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
136   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
137   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
138   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
139   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
140   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
141   "flat_scratch_lo", "flat_scratch_hi",
142   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
143   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
144   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
145   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
146   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
147   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
148   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
149   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
150   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
151   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
152   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
153   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
154   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
155   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
156   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
157   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
158   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
159   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
160   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
161   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
162   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
163   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
164   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
165   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
166   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
167   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
168   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
169   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
170   "a252", "a253", "a254", "a255"
171 };
172 
173 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
174   return llvm::makeArrayRef(GCCRegNames);
175 }
176 
177 bool AMDGPUTargetInfo::initFeatureMap(
178     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
179     const std::vector<std::string> &FeatureVec) const {
180 
181   using namespace llvm::AMDGPU;
182 
183   // XXX - What does the member GPU mean if device name string passed here?
184   if (isAMDGCN(getTriple())) {
185     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
186     case GK_GFX1036:
187     case GK_GFX1035:
188     case GK_GFX1034:
189     case GK_GFX1033:
190     case GK_GFX1032:
191     case GK_GFX1031:
192     case GK_GFX1030:
193       Features["ci-insts"] = true;
194       Features["dot1-insts"] = true;
195       Features["dot2-insts"] = true;
196       Features["dot5-insts"] = true;
197       Features["dot6-insts"] = true;
198       Features["dot7-insts"] = true;
199       Features["dl-insts"] = true;
200       Features["flat-address-space"] = true;
201       Features["16-bit-insts"] = true;
202       Features["dpp"] = true;
203       Features["gfx8-insts"] = true;
204       Features["gfx9-insts"] = true;
205       Features["gfx10-insts"] = true;
206       Features["gfx10-3-insts"] = true;
207       Features["s-memrealtime"] = true;
208       Features["s-memtime-inst"] = true;
209       break;
210     case GK_GFX1012:
211     case GK_GFX1011:
212       Features["dot1-insts"] = true;
213       Features["dot2-insts"] = true;
214       Features["dot5-insts"] = true;
215       Features["dot6-insts"] = true;
216       Features["dot7-insts"] = true;
217       LLVM_FALLTHROUGH;
218     case GK_GFX1013:
219     case GK_GFX1010:
220       Features["dl-insts"] = true;
221       Features["ci-insts"] = true;
222       Features["flat-address-space"] = true;
223       Features["16-bit-insts"] = true;
224       Features["dpp"] = true;
225       Features["gfx8-insts"] = true;
226       Features["gfx9-insts"] = true;
227       Features["gfx10-insts"] = true;
228       Features["s-memrealtime"] = true;
229       Features["s-memtime-inst"] = true;
230       break;
231     case GK_GFX940:
232       Features["gfx940-insts"] = true;
233       LLVM_FALLTHROUGH;
234     case GK_GFX90A:
235       Features["gfx90a-insts"] = true;
236       LLVM_FALLTHROUGH;
237     case GK_GFX908:
238       Features["dot3-insts"] = true;
239       Features["dot4-insts"] = true;
240       Features["dot5-insts"] = true;
241       Features["dot6-insts"] = true;
242       Features["mai-insts"] = true;
243       LLVM_FALLTHROUGH;
244     case GK_GFX906:
245       Features["dl-insts"] = true;
246       Features["dot1-insts"] = true;
247       Features["dot2-insts"] = true;
248       Features["dot7-insts"] = true;
249       LLVM_FALLTHROUGH;
250     case GK_GFX90C:
251     case GK_GFX909:
252     case GK_GFX904:
253     case GK_GFX902:
254     case GK_GFX900:
255       Features["gfx9-insts"] = true;
256       LLVM_FALLTHROUGH;
257     case GK_GFX810:
258     case GK_GFX805:
259     case GK_GFX803:
260     case GK_GFX802:
261     case GK_GFX801:
262       Features["gfx8-insts"] = true;
263       Features["16-bit-insts"] = true;
264       Features["dpp"] = true;
265       Features["s-memrealtime"] = true;
266       LLVM_FALLTHROUGH;
267     case GK_GFX705:
268     case GK_GFX704:
269     case GK_GFX703:
270     case GK_GFX702:
271     case GK_GFX701:
272     case GK_GFX700:
273       Features["ci-insts"] = true;
274       Features["flat-address-space"] = true;
275       LLVM_FALLTHROUGH;
276     case GK_GFX602:
277     case GK_GFX601:
278     case GK_GFX600:
279       Features["s-memtime-inst"] = true;
280       break;
281     case GK_NONE:
282       break;
283     default:
284       llvm_unreachable("Unhandled GPU!");
285     }
286   } else {
287     if (CPU.empty())
288       CPU = "r600";
289 
290     switch (llvm::AMDGPU::parseArchR600(CPU)) {
291     case GK_CAYMAN:
292     case GK_CYPRESS:
293     case GK_RV770:
294     case GK_RV670:
295       // TODO: Add fp64 when implemented.
296       break;
297     case GK_TURKS:
298     case GK_CAICOS:
299     case GK_BARTS:
300     case GK_SUMO:
301     case GK_REDWOOD:
302     case GK_JUNIPER:
303     case GK_CEDAR:
304     case GK_RV730:
305     case GK_RV710:
306     case GK_RS880:
307     case GK_R630:
308     case GK_R600:
309       break;
310     default:
311       llvm_unreachable("Unhandled GPU!");
312     }
313   }
314 
315   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
316 }
317 
318 void AMDGPUTargetInfo::fillValidCPUList(
319     SmallVectorImpl<StringRef> &Values) const {
320   if (isAMDGCN(getTriple()))
321     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
322   else
323     llvm::AMDGPU::fillValidArchListR600(Values);
324 }
325 
326 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
327   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
328 }
329 
330 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
331                                    const TargetOptions &Opts)
332     : TargetInfo(Triple),
333       GPUKind(isAMDGCN(Triple) ?
334               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
335               llvm::AMDGPU::parseArchR600(Opts.CPU)),
336       GPUFeatures(isAMDGCN(Triple) ?
337                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
338                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
339   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
340                                         : DataLayoutStringR600);
341 
342   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
343                      !isAMDGCN(Triple));
344   UseAddrSpaceMapMangling = true;
345 
346   HasLegalHalfType = true;
347   HasFloat16 = true;
348   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
349   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
350 
351   // Set pointer width and alignment for target address space 0.
352   PointerWidth = PointerAlign = getPointerWidthV(Generic);
353   if (getMaxPointerWidth() == 64) {
354     LongWidth = LongAlign = 64;
355     SizeType = UnsignedLong;
356     PtrDiffType = SignedLong;
357     IntPtrType = SignedLong;
358   }
359 
360   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
361 }
362 
363 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
364   TargetInfo::adjust(Diags, Opts);
365   // ToDo: There are still a few places using default address space as private
366   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
367   // can be removed from the following line.
368   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
369                      !isAMDGCN(getTriple()));
370 }
371 
372 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
373   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
374                                              Builtin::FirstTSBuiltin);
375 }
376 
377 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
378                                         MacroBuilder &Builder) const {
379   Builder.defineMacro("__AMD__");
380   Builder.defineMacro("__AMDGPU__");
381 
382   if (isAMDGCN(getTriple()))
383     Builder.defineMacro("__AMDGCN__");
384   else
385     Builder.defineMacro("__R600__");
386 
387   if (GPUKind != llvm::AMDGPU::GK_NONE) {
388     StringRef CanonName = isAMDGCN(getTriple()) ?
389       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
390     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
391     if (isAMDGCN(getTriple())) {
392       Builder.defineMacro("__amdgcn_processor__",
393                           Twine("\"") + Twine(CanonName) + Twine("\""));
394       Builder.defineMacro("__amdgcn_target_id__",
395                           Twine("\"") + Twine(getTargetID().getValue()) +
396                               Twine("\""));
397       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
398         auto Loc = OffloadArchFeatures.find(F);
399         if (Loc != OffloadArchFeatures.end()) {
400           std::string NewF = F.str();
401           std::replace(NewF.begin(), NewF.end(), '-', '_');
402           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
403                                   Twine("__"),
404                               Loc->second ? "1" : "0");
405         }
406       }
407     }
408   }
409 
410   if (AllowAMDGPUUnsafeFPAtomics)
411     Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
412 
413   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
414   // removed in the near future.
415   if (hasFMAF())
416     Builder.defineMacro("__HAS_FMAF__");
417   if (hasFastFMAF())
418     Builder.defineMacro("FP_FAST_FMAF");
419   if (hasLDEXPF())
420     Builder.defineMacro("__HAS_LDEXPF__");
421   if (hasFP64())
422     Builder.defineMacro("__HAS_FP64__");
423   if (hasFastFMA())
424     Builder.defineMacro("FP_FAST_FMA");
425 
426   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
427 }
428 
429 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
430   assert(HalfFormat == Aux->HalfFormat);
431   assert(FloatFormat == Aux->FloatFormat);
432   assert(DoubleFormat == Aux->DoubleFormat);
433 
434   // On x86_64 long double is 80-bit extended precision format, which is
435   // not supported by AMDGPU. 128-bit floating point format is also not
436   // supported by AMDGPU. Therefore keep its own format for these two types.
437   auto SaveLongDoubleFormat = LongDoubleFormat;
438   auto SaveFloat128Format = Float128Format;
439   copyAuxTarget(Aux);
440   LongDoubleFormat = SaveLongDoubleFormat;
441   Float128Format = SaveFloat128Format;
442   // For certain builtin types support on the host target, claim they are
443   // support to pass the compilation of the host code during the device-side
444   // compilation.
445   // FIXME: As the side effect, we also accept `__float128` uses in the device
446   // code. To rejct these builtin types supported in the host target but not in
447   // the device target, one approach would support `device_builtin` attribute
448   // so that we could tell the device builtin types from the host ones. The
449   // also solves the different representations of the same builtin type, such
450   // as `size_t` in the MSVC environment.
451   if (Aux->hasFloat128Type()) {
452     HasFloat128 = true;
453     Float128Format = DoubleFormat;
454   }
455 }
456