1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
38     "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // opencl_global_device
48     Global,   // opencl_global_host
49     Global,   // cuda_device
50     Constant, // cuda_constant
51     Local,    // cuda_shared
52     Global,   // sycl_global
53     Global,   // sycl_global_device
54     Global,   // sycl_global_host
55     Local,    // sycl_local
56     Private,  // sycl_private
57     Generic,  // ptr32_sptr
58     Generic,  // ptr32_uptr
59     Generic   // ptr64
60 };
61 
62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
63     Private,  // Default
64     Global,   // opencl_global
65     Local,    // opencl_local
66     Constant, // opencl_constant
67     Private,  // opencl_private
68     Generic,  // opencl_generic
69     Global,   // opencl_global_device
70     Global,   // opencl_global_host
71     Global,   // cuda_device
72     Constant, // cuda_constant
73     Local,    // cuda_shared
74     // SYCL address space values for this map are dummy
75     Generic,  // sycl_global
76     Generic,  // sycl_global_device
77     Generic,  // sycl_global_host
78     Generic,  // sycl_local
79     Generic,  // sycl_private
80     Generic,  // ptr32_sptr
81     Generic,  // ptr32_uptr
82     Generic   // ptr64
83 
84 };
85 } // namespace targets
86 } // namespace clang
87 
88 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
89 #define BUILTIN(ID, TYPE, ATTRS)                                               \
90   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
91 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
92   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
93 #include "clang/Basic/BuiltinsAMDGPU.def"
94 };
95 
96 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
97   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
98   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
99   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
100   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
101   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
102   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
103   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
104   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
105   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
106   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
107   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
108   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
109   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
110   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
111   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
112   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
113   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
114   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
115   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
116   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
117   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
118   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
119   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
120   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
121   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
122   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
123   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
124   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
125   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
126   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
127   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
128   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
129   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
130   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
131   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
132   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
133   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
134   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
135   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
136   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
137   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
138   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
139   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
140   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
141   "flat_scratch_lo", "flat_scratch_hi",
142   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
143   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
144   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
145   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
146   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
147   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
148   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
149   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
150   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
151   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
152   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
153   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
154   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
155   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
156   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
157   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
158   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
159   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
160   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
161   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
162   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
163   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
164   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
165   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
166   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
167   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
168   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
169   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
170   "a252", "a253", "a254", "a255"
171 };
172 
173 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
174   return llvm::makeArrayRef(GCCRegNames);
175 }
176 
177 bool AMDGPUTargetInfo::initFeatureMap(
178     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
179     const std::vector<std::string> &FeatureVec) const {
180 
181   using namespace llvm::AMDGPU;
182 
183   // XXX - What does the member GPU mean if device name string passed here?
184   if (isAMDGCN(getTriple())) {
185     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
186     case GK_GFX1035:
187     case GK_GFX1034:
188     case GK_GFX1033:
189     case GK_GFX1032:
190     case GK_GFX1031:
191     case GK_GFX1030:
192       Features["ci-insts"] = true;
193       Features["dot1-insts"] = true;
194       Features["dot2-insts"] = true;
195       Features["dot5-insts"] = true;
196       Features["dot6-insts"] = true;
197       Features["dot7-insts"] = true;
198       Features["dl-insts"] = true;
199       Features["flat-address-space"] = true;
200       Features["16-bit-insts"] = true;
201       Features["dpp"] = true;
202       Features["gfx8-insts"] = true;
203       Features["gfx9-insts"] = true;
204       Features["gfx10-insts"] = true;
205       Features["gfx10-3-insts"] = true;
206       Features["s-memrealtime"] = true;
207       Features["s-memtime-inst"] = true;
208       break;
209     case GK_GFX1012:
210     case GK_GFX1011:
211       Features["dot1-insts"] = true;
212       Features["dot2-insts"] = true;
213       Features["dot5-insts"] = true;
214       Features["dot6-insts"] = true;
215       Features["dot7-insts"] = true;
216       LLVM_FALLTHROUGH;
217     case GK_GFX1013:
218     case GK_GFX1010:
219       Features["dl-insts"] = true;
220       Features["ci-insts"] = true;
221       Features["flat-address-space"] = true;
222       Features["16-bit-insts"] = true;
223       Features["dpp"] = true;
224       Features["gfx8-insts"] = true;
225       Features["gfx9-insts"] = true;
226       Features["gfx10-insts"] = true;
227       Features["s-memrealtime"] = true;
228       Features["s-memtime-inst"] = true;
229       break;
230     case GK_GFX940:
231       Features["gfx940-insts"] = true;
232       LLVM_FALLTHROUGH;
233     case GK_GFX90A:
234       Features["gfx90a-insts"] = true;
235       LLVM_FALLTHROUGH;
236     case GK_GFX908:
237       Features["dot3-insts"] = true;
238       Features["dot4-insts"] = true;
239       Features["dot5-insts"] = true;
240       Features["dot6-insts"] = true;
241       Features["mai-insts"] = true;
242       LLVM_FALLTHROUGH;
243     case GK_GFX906:
244       Features["dl-insts"] = true;
245       Features["dot1-insts"] = true;
246       Features["dot2-insts"] = true;
247       Features["dot7-insts"] = true;
248       LLVM_FALLTHROUGH;
249     case GK_GFX90C:
250     case GK_GFX909:
251     case GK_GFX904:
252     case GK_GFX902:
253     case GK_GFX900:
254       Features["gfx9-insts"] = true;
255       LLVM_FALLTHROUGH;
256     case GK_GFX810:
257     case GK_GFX805:
258     case GK_GFX803:
259     case GK_GFX802:
260     case GK_GFX801:
261       Features["gfx8-insts"] = true;
262       Features["16-bit-insts"] = true;
263       Features["dpp"] = true;
264       Features["s-memrealtime"] = true;
265       LLVM_FALLTHROUGH;
266     case GK_GFX705:
267     case GK_GFX704:
268     case GK_GFX703:
269     case GK_GFX702:
270     case GK_GFX701:
271     case GK_GFX700:
272       Features["ci-insts"] = true;
273       Features["flat-address-space"] = true;
274       LLVM_FALLTHROUGH;
275     case GK_GFX602:
276     case GK_GFX601:
277     case GK_GFX600:
278       Features["s-memtime-inst"] = true;
279       break;
280     case GK_NONE:
281       break;
282     default:
283       llvm_unreachable("Unhandled GPU!");
284     }
285   } else {
286     if (CPU.empty())
287       CPU = "r600";
288 
289     switch (llvm::AMDGPU::parseArchR600(CPU)) {
290     case GK_CAYMAN:
291     case GK_CYPRESS:
292     case GK_RV770:
293     case GK_RV670:
294       // TODO: Add fp64 when implemented.
295       break;
296     case GK_TURKS:
297     case GK_CAICOS:
298     case GK_BARTS:
299     case GK_SUMO:
300     case GK_REDWOOD:
301     case GK_JUNIPER:
302     case GK_CEDAR:
303     case GK_RV730:
304     case GK_RV710:
305     case GK_RS880:
306     case GK_R630:
307     case GK_R600:
308       break;
309     default:
310       llvm_unreachable("Unhandled GPU!");
311     }
312   }
313 
314   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
315 }
316 
317 void AMDGPUTargetInfo::fillValidCPUList(
318     SmallVectorImpl<StringRef> &Values) const {
319   if (isAMDGCN(getTriple()))
320     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
321   else
322     llvm::AMDGPU::fillValidArchListR600(Values);
323 }
324 
325 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
326   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
327 }
328 
329 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
330                                    const TargetOptions &Opts)
331     : TargetInfo(Triple),
332       GPUKind(isAMDGCN(Triple) ?
333               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
334               llvm::AMDGPU::parseArchR600(Opts.CPU)),
335       GPUFeatures(isAMDGCN(Triple) ?
336                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
337                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
338   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
339                                         : DataLayoutStringR600);
340 
341   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
342                      !isAMDGCN(Triple));
343   UseAddrSpaceMapMangling = true;
344 
345   HasLegalHalfType = true;
346   HasFloat16 = true;
347   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
348   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
349 
350   // Set pointer width and alignment for target address space 0.
351   PointerWidth = PointerAlign = getPointerWidthV(Generic);
352   if (getMaxPointerWidth() == 64) {
353     LongWidth = LongAlign = 64;
354     SizeType = UnsignedLong;
355     PtrDiffType = SignedLong;
356     IntPtrType = SignedLong;
357   }
358 
359   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
360 }
361 
362 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
363   TargetInfo::adjust(Diags, Opts);
364   // ToDo: There are still a few places using default address space as private
365   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
366   // can be removed from the following line.
367   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
368                      !isAMDGCN(getTriple()));
369 }
370 
371 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
372   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
373                                              Builtin::FirstTSBuiltin);
374 }
375 
376 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
377                                         MacroBuilder &Builder) const {
378   Builder.defineMacro("__AMD__");
379   Builder.defineMacro("__AMDGPU__");
380 
381   if (isAMDGCN(getTriple()))
382     Builder.defineMacro("__AMDGCN__");
383   else
384     Builder.defineMacro("__R600__");
385 
386   if (GPUKind != llvm::AMDGPU::GK_NONE) {
387     StringRef CanonName = isAMDGCN(getTriple()) ?
388       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
389     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
390     if (isAMDGCN(getTriple())) {
391       Builder.defineMacro("__amdgcn_processor__",
392                           Twine("\"") + Twine(CanonName) + Twine("\""));
393       Builder.defineMacro("__amdgcn_target_id__",
394                           Twine("\"") + Twine(getTargetID().getValue()) +
395                               Twine("\""));
396       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
397         auto Loc = OffloadArchFeatures.find(F);
398         if (Loc != OffloadArchFeatures.end()) {
399           std::string NewF = F.str();
400           std::replace(NewF.begin(), NewF.end(), '-', '_');
401           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
402                                   Twine("__"),
403                               Loc->second ? "1" : "0");
404         }
405       }
406     }
407   }
408 
409   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
410   // removed in the near future.
411   if (hasFMAF())
412     Builder.defineMacro("__HAS_FMAF__");
413   if (hasFastFMAF())
414     Builder.defineMacro("FP_FAST_FMAF");
415   if (hasLDEXPF())
416     Builder.defineMacro("__HAS_LDEXPF__");
417   if (hasFP64())
418     Builder.defineMacro("__HAS_FP64__");
419   if (hasFastFMA())
420     Builder.defineMacro("FP_FAST_FMA");
421 
422   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
423 }
424 
425 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
426   assert(HalfFormat == Aux->HalfFormat);
427   assert(FloatFormat == Aux->FloatFormat);
428   assert(DoubleFormat == Aux->DoubleFormat);
429 
430   // On x86_64 long double is 80-bit extended precision format, which is
431   // not supported by AMDGPU. 128-bit floating point format is also not
432   // supported by AMDGPU. Therefore keep its own format for these two types.
433   auto SaveLongDoubleFormat = LongDoubleFormat;
434   auto SaveFloat128Format = Float128Format;
435   copyAuxTarget(Aux);
436   LongDoubleFormat = SaveLongDoubleFormat;
437   Float128Format = SaveFloat128Format;
438   // For certain builtin types support on the host target, claim they are
439   // support to pass the compilation of the host code during the device-side
440   // compilation.
441   // FIXME: As the side effect, we also accept `__float128` uses in the device
442   // code. To rejct these builtin types supported in the host target but not in
443   // the device target, one approach would support `device_builtin` attribute
444   // so that we could tell the device builtin types from the host ones. The
445   // also solves the different representations of the same builtin type, such
446   // as `size_t` in the MSVC environment.
447   if (Aux->hasFloat128Type()) {
448     HasFloat128 = true;
449     Float128Format = DoubleFormat;
450   }
451 }
452