1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // opencl_global_device
50     Global,   // opencl_global_host
51     Global,   // cuda_device
52     Constant, // cuda_constant
53     Local,    // cuda_shared
54     Global,   // sycl_global
55     Local,    // sycl_local
56     Private,  // sycl_private
57     Generic,  // ptr32_sptr
58     Generic,  // ptr32_uptr
59     Generic   // ptr64
60 };
61 
62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
63     Private,  // Default
64     Global,   // opencl_global
65     Local,    // opencl_local
66     Constant, // opencl_constant
67     Private,  // opencl_private
68     Generic,  // opencl_generic
69     Global,   // opencl_global_device
70     Global,   // opencl_global_host
71     Global,   // cuda_device
72     Constant, // cuda_constant
73     Local,    // cuda_shared
74     // SYCL address space values for this map are dummy
75     Generic,  // sycl_global
76     Generic,  // sycl_local
77     Generic,  // sycl_private
78     Generic,  // ptr32_sptr
79     Generic,  // ptr32_uptr
80     Generic   // ptr64
81 
82 };
83 } // namespace targets
84 } // namespace clang
85 
86 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
87 #define BUILTIN(ID, TYPE, ATTRS)                                               \
88   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
89 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
90   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
91 #include "clang/Basic/BuiltinsAMDGPU.def"
92 };
93 
94 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
95   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
96   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
97   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
98   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
99   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
100   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
101   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
102   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
103   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
104   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
105   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
106   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
107   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
108   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
109   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
110   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
111   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
112   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
113   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
114   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
115   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
116   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
117   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
118   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
119   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
120   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
121   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
122   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
123   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
124   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
125   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
126   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
127   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
128   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
129   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
130   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
131   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
132   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
133   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
134   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
135   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
136   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
137   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
138   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
139   "flat_scratch_lo", "flat_scratch_hi",
140   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
141   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
142   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
143   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
144   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
145   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
146   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
147   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
148   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
149   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
150   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
151   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
152   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
153   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
154   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
155   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
156   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
157   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
158   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
159   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
160   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
161   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
162   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
163   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
164   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
165   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
166   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
167   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
168   "a252", "a253", "a254", "a255"
169 };
170 
171 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
172   return llvm::makeArrayRef(GCCRegNames);
173 }
174 
175 bool AMDGPUTargetInfo::initFeatureMap(
176     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
177     const std::vector<std::string> &FeatureVec) const {
178 
179   using namespace llvm::AMDGPU;
180 
181   // XXX - What does the member GPU mean if device name string passed here?
182   if (isAMDGCN(getTriple())) {
183     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
184     case GK_GFX1033:
185     case GK_GFX1032:
186     case GK_GFX1031:
187     case GK_GFX1030:
188       Features["ci-insts"] = true;
189       Features["dot1-insts"] = true;
190       Features["dot2-insts"] = true;
191       Features["dot5-insts"] = true;
192       Features["dot6-insts"] = true;
193       Features["dot7-insts"] = true;
194       Features["dl-insts"] = true;
195       Features["flat-address-space"] = true;
196       Features["16-bit-insts"] = true;
197       Features["dpp"] = true;
198       Features["gfx8-insts"] = true;
199       Features["gfx9-insts"] = true;
200       Features["gfx10-insts"] = true;
201       Features["gfx10-3-insts"] = true;
202       Features["s-memrealtime"] = true;
203       Features["s-memtime-inst"] = true;
204       break;
205     case GK_GFX1012:
206     case GK_GFX1011:
207       Features["dot1-insts"] = true;
208       Features["dot2-insts"] = true;
209       Features["dot5-insts"] = true;
210       Features["dot6-insts"] = true;
211       Features["dot7-insts"] = true;
212       LLVM_FALLTHROUGH;
213     case GK_GFX1010:
214       Features["dl-insts"] = true;
215       Features["ci-insts"] = true;
216       Features["flat-address-space"] = true;
217       Features["16-bit-insts"] = true;
218       Features["dpp"] = true;
219       Features["gfx8-insts"] = true;
220       Features["gfx9-insts"] = true;
221       Features["gfx10-insts"] = true;
222       Features["s-memrealtime"] = true;
223       Features["s-memtime-inst"] = true;
224       break;
225     case GK_GFX90A:
226       Features["gfx90a-insts"] = true;
227       LLVM_FALLTHROUGH;
228     case GK_GFX908:
229       Features["dot3-insts"] = true;
230       Features["dot4-insts"] = true;
231       Features["dot5-insts"] = true;
232       Features["dot6-insts"] = true;
233       Features["mai-insts"] = true;
234       LLVM_FALLTHROUGH;
235     case GK_GFX906:
236       Features["dl-insts"] = true;
237       Features["dot1-insts"] = true;
238       Features["dot2-insts"] = true;
239       Features["dot7-insts"] = true;
240       LLVM_FALLTHROUGH;
241     case GK_GFX90C:
242     case GK_GFX909:
243     case GK_GFX904:
244     case GK_GFX902:
245     case GK_GFX900:
246       Features["gfx9-insts"] = true;
247       LLVM_FALLTHROUGH;
248     case GK_GFX810:
249     case GK_GFX805:
250     case GK_GFX803:
251     case GK_GFX802:
252     case GK_GFX801:
253       Features["gfx8-insts"] = true;
254       Features["16-bit-insts"] = true;
255       Features["dpp"] = true;
256       Features["s-memrealtime"] = true;
257       LLVM_FALLTHROUGH;
258     case GK_GFX705:
259     case GK_GFX704:
260     case GK_GFX703:
261     case GK_GFX702:
262     case GK_GFX701:
263     case GK_GFX700:
264       Features["ci-insts"] = true;
265       Features["flat-address-space"] = true;
266       LLVM_FALLTHROUGH;
267     case GK_GFX602:
268     case GK_GFX601:
269     case GK_GFX600:
270       Features["s-memtime-inst"] = true;
271       break;
272     case GK_NONE:
273       break;
274     default:
275       llvm_unreachable("Unhandled GPU!");
276     }
277   } else {
278     if (CPU.empty())
279       CPU = "r600";
280 
281     switch (llvm::AMDGPU::parseArchR600(CPU)) {
282     case GK_CAYMAN:
283     case GK_CYPRESS:
284     case GK_RV770:
285     case GK_RV670:
286       // TODO: Add fp64 when implemented.
287       break;
288     case GK_TURKS:
289     case GK_CAICOS:
290     case GK_BARTS:
291     case GK_SUMO:
292     case GK_REDWOOD:
293     case GK_JUNIPER:
294     case GK_CEDAR:
295     case GK_RV730:
296     case GK_RV710:
297     case GK_RS880:
298     case GK_R630:
299     case GK_R600:
300       break;
301     default:
302       llvm_unreachable("Unhandled GPU!");
303     }
304   }
305 
306   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
307 }
308 
309 void AMDGPUTargetInfo::fillValidCPUList(
310     SmallVectorImpl<StringRef> &Values) const {
311   if (isAMDGCN(getTriple()))
312     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
313   else
314     llvm::AMDGPU::fillValidArchListR600(Values);
315 }
316 
317 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
318   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
319 }
320 
321 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
322                                    const TargetOptions &Opts)
323     : TargetInfo(Triple),
324       GPUKind(isAMDGCN(Triple) ?
325               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
326               llvm::AMDGPU::parseArchR600(Opts.CPU)),
327       GPUFeatures(isAMDGCN(Triple) ?
328                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
329                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
330   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
331                                         : DataLayoutStringR600);
332   assert(DataLayout->getAllocaAddrSpace() == Private);
333   GridValues = llvm::omp::AMDGPUGpuGridValues;
334 
335   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
336                      !isAMDGCN(Triple));
337   UseAddrSpaceMapMangling = true;
338 
339   HasLegalHalfType = true;
340   HasFloat16 = true;
341   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
342   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
343 
344   // Set pointer width and alignment for target address space 0.
345   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
346   if (getMaxPointerWidth() == 64) {
347     LongWidth = LongAlign = 64;
348     SizeType = UnsignedLong;
349     PtrDiffType = SignedLong;
350     IntPtrType = SignedLong;
351   }
352 
353   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
354 }
355 
356 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
357   TargetInfo::adjust(Opts);
358   // ToDo: There are still a few places using default address space as private
359   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
360   // can be removed from the following line.
361   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
362                      !isAMDGCN(getTriple()));
363 }
364 
365 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
366   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
367                                              Builtin::FirstTSBuiltin);
368 }
369 
370 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
371                                         MacroBuilder &Builder) const {
372   Builder.defineMacro("__AMD__");
373   Builder.defineMacro("__AMDGPU__");
374 
375   if (isAMDGCN(getTriple()))
376     Builder.defineMacro("__AMDGCN__");
377   else
378     Builder.defineMacro("__R600__");
379 
380   if (GPUKind != llvm::AMDGPU::GK_NONE) {
381     StringRef CanonName = isAMDGCN(getTriple()) ?
382       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
383     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
384     if (isAMDGCN(getTriple())) {
385       Builder.defineMacro("__amdgcn_processor__",
386                           Twine("\"") + Twine(CanonName) + Twine("\""));
387       Builder.defineMacro("__amdgcn_target_id__",
388                           Twine("\"") + Twine(getTargetID().getValue()) +
389                               Twine("\""));
390       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
391         auto Loc = OffloadArchFeatures.find(F);
392         if (Loc != OffloadArchFeatures.end()) {
393           std::string NewF = F.str();
394           std::replace(NewF.begin(), NewF.end(), '-', '_');
395           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
396                                   Twine("__"),
397                               Loc->second ? "1" : "0");
398         }
399       }
400     }
401   }
402 
403   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
404   // removed in the near future.
405   if (hasFMAF())
406     Builder.defineMacro("__HAS_FMAF__");
407   if (hasFastFMAF())
408     Builder.defineMacro("FP_FAST_FMAF");
409   if (hasLDEXPF())
410     Builder.defineMacro("__HAS_LDEXPF__");
411   if (hasFP64())
412     Builder.defineMacro("__HAS_FP64__");
413   if (hasFastFMA())
414     Builder.defineMacro("FP_FAST_FMA");
415 
416   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
417 }
418 
419 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
420   assert(HalfFormat == Aux->HalfFormat);
421   assert(FloatFormat == Aux->FloatFormat);
422   assert(DoubleFormat == Aux->DoubleFormat);
423 
424   // On x86_64 long double is 80-bit extended precision format, which is
425   // not supported by AMDGPU. 128-bit floating point format is also not
426   // supported by AMDGPU. Therefore keep its own format for these two types.
427   auto SaveLongDoubleFormat = LongDoubleFormat;
428   auto SaveFloat128Format = Float128Format;
429   copyAuxTarget(Aux);
430   LongDoubleFormat = SaveLongDoubleFormat;
431   Float128Format = SaveFloat128Format;
432   // For certain builtin types support on the host target, claim they are
433   // support to pass the compilation of the host code during the device-side
434   // compilation.
435   // FIXME: As the side effect, we also accept `__float128` uses in the device
436   // code. To rejct these builtin types supported in the host target but not in
437   // the device target, one approach would support `device_builtin` attribute
438   // so that we could tell the device builtin types from the host ones. The
439   // also solves the different representations of the same builtin type, such
440   // as `size_t` in the MSVC environment.
441   if (Aux->hasFloat128Type()) {
442     HasFloat128 = true;
443     Float128Format = DoubleFormat;
444   }
445 }
446