1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39     "-ni:7";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     Generic,  // Default
43     Global,   // opencl_global
44     Local,    // opencl_local
45     Constant, // opencl_constant
46     Private,  // opencl_private
47     Generic,  // opencl_generic
48     Global,   // opencl_global_device
49     Global,   // opencl_global_host
50     Global,   // cuda_device
51     Constant, // cuda_constant
52     Local,    // cuda_shared
53     Global,   // sycl_global
54     Global,   // sycl_global_device
55     Global,   // sycl_global_host
56     Local,    // sycl_local
57     Private,  // sycl_private
58     Generic,  // ptr32_sptr
59     Generic,  // ptr32_uptr
60     Generic   // ptr64
61 };
62 
63 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
64     Private,  // Default
65     Global,   // opencl_global
66     Local,    // opencl_local
67     Constant, // opencl_constant
68     Private,  // opencl_private
69     Generic,  // opencl_generic
70     Global,   // opencl_global_device
71     Global,   // opencl_global_host
72     Global,   // cuda_device
73     Constant, // cuda_constant
74     Local,    // cuda_shared
75     // SYCL address space values for this map are dummy
76     Generic,  // sycl_global
77     Generic,  // sycl_global_device
78     Generic,  // sycl_global_host
79     Generic,  // sycl_local
80     Generic,  // sycl_private
81     Generic,  // ptr32_sptr
82     Generic,  // ptr32_uptr
83     Generic   // ptr64
84 
85 };
86 } // namespace targets
87 } // namespace clang
88 
89 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
90 #define BUILTIN(ID, TYPE, ATTRS)                                               \
91   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
92 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
93   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
94 #include "clang/Basic/BuiltinsAMDGPU.def"
95 };
96 
97 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
98   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
99   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
100   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
101   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
102   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
103   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
104   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
105   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
106   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
107   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
108   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
109   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
110   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
111   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
112   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
113   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
114   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
115   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
116   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
117   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
118   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
119   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
120   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
121   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
122   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
123   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
124   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
125   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
126   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
127   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
128   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
129   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
130   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
131   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
132   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
133   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
134   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
135   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
136   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
137   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
138   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
139   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
140   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
141   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
142   "flat_scratch_lo", "flat_scratch_hi",
143   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
144   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
145   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
146   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
147   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
148   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
149   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
150   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
151   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
152   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
153   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
154   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
155   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
156   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
157   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
158   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
159   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
160   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
161   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
162   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
163   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
164   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
165   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
166   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
167   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
168   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
169   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
170   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
171   "a252", "a253", "a254", "a255"
172 };
173 
174 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
175   return llvm::makeArrayRef(GCCRegNames);
176 }
177 
178 bool AMDGPUTargetInfo::initFeatureMap(
179     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
180     const std::vector<std::string> &FeatureVec) const {
181 
182   using namespace llvm::AMDGPU;
183 
184   // XXX - What does the member GPU mean if device name string passed here?
185   if (isAMDGCN(getTriple())) {
186     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
187     case GK_GFX1034:
188     case GK_GFX1033:
189     case GK_GFX1032:
190     case GK_GFX1031:
191     case GK_GFX1030:
192       Features["ci-insts"] = true;
193       Features["dot1-insts"] = true;
194       Features["dot2-insts"] = true;
195       Features["dot5-insts"] = true;
196       Features["dot6-insts"] = true;
197       Features["dot7-insts"] = true;
198       Features["dl-insts"] = true;
199       Features["flat-address-space"] = true;
200       Features["16-bit-insts"] = true;
201       Features["dpp"] = true;
202       Features["gfx8-insts"] = true;
203       Features["gfx9-insts"] = true;
204       Features["gfx10-insts"] = true;
205       Features["gfx10-3-insts"] = true;
206       Features["s-memrealtime"] = true;
207       Features["s-memtime-inst"] = true;
208       break;
209     case GK_GFX1012:
210     case GK_GFX1011:
211       Features["dot1-insts"] = true;
212       Features["dot2-insts"] = true;
213       Features["dot5-insts"] = true;
214       Features["dot6-insts"] = true;
215       Features["dot7-insts"] = true;
216       LLVM_FALLTHROUGH;
217     case GK_GFX1013:
218     case GK_GFX1010:
219       Features["dl-insts"] = true;
220       Features["ci-insts"] = true;
221       Features["flat-address-space"] = true;
222       Features["16-bit-insts"] = true;
223       Features["dpp"] = true;
224       Features["gfx8-insts"] = true;
225       Features["gfx9-insts"] = true;
226       Features["gfx10-insts"] = true;
227       Features["s-memrealtime"] = true;
228       Features["s-memtime-inst"] = true;
229       break;
230     case GK_GFX90A:
231       Features["gfx90a-insts"] = true;
232       LLVM_FALLTHROUGH;
233     case GK_GFX908:
234       Features["dot3-insts"] = true;
235       Features["dot4-insts"] = true;
236       Features["dot5-insts"] = true;
237       Features["dot6-insts"] = true;
238       Features["mai-insts"] = true;
239       LLVM_FALLTHROUGH;
240     case GK_GFX906:
241       Features["dl-insts"] = true;
242       Features["dot1-insts"] = true;
243       Features["dot2-insts"] = true;
244       Features["dot7-insts"] = true;
245       LLVM_FALLTHROUGH;
246     case GK_GFX90C:
247     case GK_GFX909:
248     case GK_GFX904:
249     case GK_GFX902:
250     case GK_GFX900:
251       Features["gfx9-insts"] = true;
252       LLVM_FALLTHROUGH;
253     case GK_GFX810:
254     case GK_GFX805:
255     case GK_GFX803:
256     case GK_GFX802:
257     case GK_GFX801:
258       Features["gfx8-insts"] = true;
259       Features["16-bit-insts"] = true;
260       Features["dpp"] = true;
261       Features["s-memrealtime"] = true;
262       LLVM_FALLTHROUGH;
263     case GK_GFX705:
264     case GK_GFX704:
265     case GK_GFX703:
266     case GK_GFX702:
267     case GK_GFX701:
268     case GK_GFX700:
269       Features["ci-insts"] = true;
270       Features["flat-address-space"] = true;
271       LLVM_FALLTHROUGH;
272     case GK_GFX602:
273     case GK_GFX601:
274     case GK_GFX600:
275       Features["s-memtime-inst"] = true;
276       break;
277     case GK_NONE:
278       break;
279     default:
280       llvm_unreachable("Unhandled GPU!");
281     }
282   } else {
283     if (CPU.empty())
284       CPU = "r600";
285 
286     switch (llvm::AMDGPU::parseArchR600(CPU)) {
287     case GK_CAYMAN:
288     case GK_CYPRESS:
289     case GK_RV770:
290     case GK_RV670:
291       // TODO: Add fp64 when implemented.
292       break;
293     case GK_TURKS:
294     case GK_CAICOS:
295     case GK_BARTS:
296     case GK_SUMO:
297     case GK_REDWOOD:
298     case GK_JUNIPER:
299     case GK_CEDAR:
300     case GK_RV730:
301     case GK_RV710:
302     case GK_RS880:
303     case GK_R630:
304     case GK_R600:
305       break;
306     default:
307       llvm_unreachable("Unhandled GPU!");
308     }
309   }
310 
311   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
312 }
313 
314 void AMDGPUTargetInfo::fillValidCPUList(
315     SmallVectorImpl<StringRef> &Values) const {
316   if (isAMDGCN(getTriple()))
317     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
318   else
319     llvm::AMDGPU::fillValidArchListR600(Values);
320 }
321 
322 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
323   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
324 }
325 
326 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
327                                    const TargetOptions &Opts)
328     : TargetInfo(Triple),
329       GPUKind(isAMDGCN(Triple) ?
330               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
331               llvm::AMDGPU::parseArchR600(Opts.CPU)),
332       GPUFeatures(isAMDGCN(Triple) ?
333                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
334                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
335   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
336                                         : DataLayoutStringR600);
337   GridValues = llvm::omp::AMDGPUGpuGridValues;
338 
339   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
340                      !isAMDGCN(Triple));
341   UseAddrSpaceMapMangling = true;
342 
343   HasLegalHalfType = true;
344   HasFloat16 = true;
345   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
346   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
347 
348   // Set pointer width and alignment for target address space 0.
349   PointerWidth = PointerAlign = getPointerWidthV(Generic);
350   if (getMaxPointerWidth() == 64) {
351     LongWidth = LongAlign = 64;
352     SizeType = UnsignedLong;
353     PtrDiffType = SignedLong;
354     IntPtrType = SignedLong;
355   }
356 
357   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
358 }
359 
360 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
361   TargetInfo::adjust(Opts);
362   // ToDo: There are still a few places using default address space as private
363   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
364   // can be removed from the following line.
365   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
366                      !isAMDGCN(getTriple()));
367 }
368 
369 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
370   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
371                                              Builtin::FirstTSBuiltin);
372 }
373 
374 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
375                                         MacroBuilder &Builder) const {
376   Builder.defineMacro("__AMD__");
377   Builder.defineMacro("__AMDGPU__");
378 
379   if (isAMDGCN(getTriple()))
380     Builder.defineMacro("__AMDGCN__");
381   else
382     Builder.defineMacro("__R600__");
383 
384   if (GPUKind != llvm::AMDGPU::GK_NONE) {
385     StringRef CanonName = isAMDGCN(getTriple()) ?
386       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
387     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
388     if (isAMDGCN(getTriple())) {
389       Builder.defineMacro("__amdgcn_processor__",
390                           Twine("\"") + Twine(CanonName) + Twine("\""));
391       Builder.defineMacro("__amdgcn_target_id__",
392                           Twine("\"") + Twine(getTargetID().getValue()) +
393                               Twine("\""));
394       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
395         auto Loc = OffloadArchFeatures.find(F);
396         if (Loc != OffloadArchFeatures.end()) {
397           std::string NewF = F.str();
398           std::replace(NewF.begin(), NewF.end(), '-', '_');
399           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
400                                   Twine("__"),
401                               Loc->second ? "1" : "0");
402         }
403       }
404     }
405   }
406 
407   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
408   // removed in the near future.
409   if (hasFMAF())
410     Builder.defineMacro("__HAS_FMAF__");
411   if (hasFastFMAF())
412     Builder.defineMacro("FP_FAST_FMAF");
413   if (hasLDEXPF())
414     Builder.defineMacro("__HAS_LDEXPF__");
415   if (hasFP64())
416     Builder.defineMacro("__HAS_FP64__");
417   if (hasFastFMA())
418     Builder.defineMacro("FP_FAST_FMA");
419 
420   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
421 }
422 
423 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
424   assert(HalfFormat == Aux->HalfFormat);
425   assert(FloatFormat == Aux->FloatFormat);
426   assert(DoubleFormat == Aux->DoubleFormat);
427 
428   // On x86_64 long double is 80-bit extended precision format, which is
429   // not supported by AMDGPU. 128-bit floating point format is also not
430   // supported by AMDGPU. Therefore keep its own format for these two types.
431   auto SaveLongDoubleFormat = LongDoubleFormat;
432   auto SaveFloat128Format = Float128Format;
433   copyAuxTarget(Aux);
434   LongDoubleFormat = SaveLongDoubleFormat;
435   Float128Format = SaveFloat128Format;
436   // For certain builtin types support on the host target, claim they are
437   // support to pass the compilation of the host code during the device-side
438   // compilation.
439   // FIXME: As the side effect, we also accept `__float128` uses in the device
440   // code. To rejct these builtin types supported in the host target but not in
441   // the device target, one approach would support `device_builtin` attribute
442   // so that we could tell the device builtin types from the host ones. The
443   // also solves the different representations of the same builtin type, such
444   // as `size_t` in the MSVC environment.
445   if (Aux->hasFloat128Type()) {
446     HasFloat128 = true;
447     Float128Format = DoubleFormat;
448   }
449 }
450