1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39     "-ni:7";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     Generic,  // Default
43     Global,   // opencl_global
44     Local,    // opencl_local
45     Constant, // opencl_constant
46     Private,  // opencl_private
47     Generic,  // opencl_generic
48     Global,   // opencl_global_device
49     Global,   // opencl_global_host
50     Global,   // cuda_device
51     Constant, // cuda_constant
52     Local,    // cuda_shared
53     Global,   // sycl_global
54     Local,    // sycl_local
55     Private,  // sycl_private
56     Generic,  // ptr32_sptr
57     Generic,  // ptr32_uptr
58     Generic   // ptr64
59 };
60 
61 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
62     Private,  // Default
63     Global,   // opencl_global
64     Local,    // opencl_local
65     Constant, // opencl_constant
66     Private,  // opencl_private
67     Generic,  // opencl_generic
68     Global,   // opencl_global_device
69     Global,   // opencl_global_host
70     Global,   // cuda_device
71     Constant, // cuda_constant
72     Local,    // cuda_shared
73     // SYCL address space values for this map are dummy
74     Generic,  // sycl_global
75     Generic,  // sycl_local
76     Generic,  // sycl_private
77     Generic,  // ptr32_sptr
78     Generic,  // ptr32_uptr
79     Generic   // ptr64
80 
81 };
82 } // namespace targets
83 } // namespace clang
84 
85 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
86 #define BUILTIN(ID, TYPE, ATTRS)                                               \
87   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
88 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
89   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
90 #include "clang/Basic/BuiltinsAMDGPU.def"
91 };
92 
93 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
94   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
95   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
96   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
97   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
98   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
99   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
100   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
101   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
102   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
103   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
104   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
105   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
106   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
107   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
108   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
109   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
110   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
111   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
112   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
113   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
114   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
115   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
116   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
117   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
118   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
119   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
120   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
121   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
122   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
123   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
124   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
125   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
126   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
127   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
128   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
129   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
130   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
131   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
132   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
133   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
134   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
135   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
136   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
137   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
138   "flat_scratch_lo", "flat_scratch_hi",
139   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
140   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
141   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
142   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
143   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
144   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
145   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
146   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
147   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
148   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
149   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
150   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
151   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
152   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
153   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
154   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
155   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
156   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
157   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
158   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
159   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
160   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
161   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
162   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
163   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
164   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
165   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
166   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
167   "a252", "a253", "a254", "a255"
168 };
169 
170 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
171   return llvm::makeArrayRef(GCCRegNames);
172 }
173 
174 bool AMDGPUTargetInfo::initFeatureMap(
175     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
176     const std::vector<std::string> &FeatureVec) const {
177 
178   using namespace llvm::AMDGPU;
179 
180   // XXX - What does the member GPU mean if device name string passed here?
181   if (isAMDGCN(getTriple())) {
182     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
183     case GK_GFX1033:
184     case GK_GFX1032:
185     case GK_GFX1031:
186     case GK_GFX1030:
187       Features["ci-insts"] = true;
188       Features["dot1-insts"] = true;
189       Features["dot2-insts"] = true;
190       Features["dot5-insts"] = true;
191       Features["dot6-insts"] = true;
192       Features["dot7-insts"] = true;
193       Features["dl-insts"] = true;
194       Features["flat-address-space"] = true;
195       Features["16-bit-insts"] = true;
196       Features["dpp"] = true;
197       Features["gfx8-insts"] = true;
198       Features["gfx9-insts"] = true;
199       Features["gfx10-insts"] = true;
200       Features["gfx10-3-insts"] = true;
201       Features["s-memrealtime"] = true;
202       Features["s-memtime-inst"] = true;
203       break;
204     case GK_GFX1012:
205     case GK_GFX1011:
206       Features["dot1-insts"] = true;
207       Features["dot2-insts"] = true;
208       Features["dot5-insts"] = true;
209       Features["dot6-insts"] = true;
210       Features["dot7-insts"] = true;
211       LLVM_FALLTHROUGH;
212     case GK_GFX1010:
213       Features["dl-insts"] = true;
214       Features["ci-insts"] = true;
215       Features["flat-address-space"] = true;
216       Features["16-bit-insts"] = true;
217       Features["dpp"] = true;
218       Features["gfx8-insts"] = true;
219       Features["gfx9-insts"] = true;
220       Features["gfx10-insts"] = true;
221       Features["s-memrealtime"] = true;
222       Features["s-memtime-inst"] = true;
223       break;
224     case GK_GFX90A:
225       Features["gfx90a-insts"] = true;
226       LLVM_FALLTHROUGH;
227     case GK_GFX908:
228       Features["dot3-insts"] = true;
229       Features["dot4-insts"] = true;
230       Features["dot5-insts"] = true;
231       Features["dot6-insts"] = true;
232       Features["mai-insts"] = true;
233       LLVM_FALLTHROUGH;
234     case GK_GFX906:
235       Features["dl-insts"] = true;
236       Features["dot1-insts"] = true;
237       Features["dot2-insts"] = true;
238       Features["dot7-insts"] = true;
239       LLVM_FALLTHROUGH;
240     case GK_GFX90C:
241     case GK_GFX909:
242     case GK_GFX904:
243     case GK_GFX902:
244     case GK_GFX900:
245       Features["gfx9-insts"] = true;
246       LLVM_FALLTHROUGH;
247     case GK_GFX810:
248     case GK_GFX805:
249     case GK_GFX803:
250     case GK_GFX802:
251     case GK_GFX801:
252       Features["gfx8-insts"] = true;
253       Features["16-bit-insts"] = true;
254       Features["dpp"] = true;
255       Features["s-memrealtime"] = true;
256       LLVM_FALLTHROUGH;
257     case GK_GFX705:
258     case GK_GFX704:
259     case GK_GFX703:
260     case GK_GFX702:
261     case GK_GFX701:
262     case GK_GFX700:
263       Features["ci-insts"] = true;
264       Features["flat-address-space"] = true;
265       LLVM_FALLTHROUGH;
266     case GK_GFX602:
267     case GK_GFX601:
268     case GK_GFX600:
269       Features["s-memtime-inst"] = true;
270       break;
271     case GK_NONE:
272       break;
273     default:
274       llvm_unreachable("Unhandled GPU!");
275     }
276   } else {
277     if (CPU.empty())
278       CPU = "r600";
279 
280     switch (llvm::AMDGPU::parseArchR600(CPU)) {
281     case GK_CAYMAN:
282     case GK_CYPRESS:
283     case GK_RV770:
284     case GK_RV670:
285       // TODO: Add fp64 when implemented.
286       break;
287     case GK_TURKS:
288     case GK_CAICOS:
289     case GK_BARTS:
290     case GK_SUMO:
291     case GK_REDWOOD:
292     case GK_JUNIPER:
293     case GK_CEDAR:
294     case GK_RV730:
295     case GK_RV710:
296     case GK_RS880:
297     case GK_R630:
298     case GK_R600:
299       break;
300     default:
301       llvm_unreachable("Unhandled GPU!");
302     }
303   }
304 
305   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
306 }
307 
308 void AMDGPUTargetInfo::fillValidCPUList(
309     SmallVectorImpl<StringRef> &Values) const {
310   if (isAMDGCN(getTriple()))
311     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
312   else
313     llvm::AMDGPU::fillValidArchListR600(Values);
314 }
315 
316 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
317   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
318 }
319 
320 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
321                                    const TargetOptions &Opts)
322     : TargetInfo(Triple),
323       GPUKind(isAMDGCN(Triple) ?
324               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
325               llvm::AMDGPU::parseArchR600(Opts.CPU)),
326       GPUFeatures(isAMDGCN(Triple) ?
327                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
328                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
329   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
330                                         : DataLayoutStringR600);
331   GridValues = llvm::omp::AMDGPUGpuGridValues;
332 
333   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
334                      !isAMDGCN(Triple));
335   UseAddrSpaceMapMangling = true;
336 
337   HasLegalHalfType = true;
338   HasFloat16 = true;
339   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
340   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
341 
342   // Set pointer width and alignment for target address space 0.
343   PointerWidth = PointerAlign = getPointerWidthV(Generic);
344   if (getMaxPointerWidth() == 64) {
345     LongWidth = LongAlign = 64;
346     SizeType = UnsignedLong;
347     PtrDiffType = SignedLong;
348     IntPtrType = SignedLong;
349   }
350 
351   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
352 }
353 
354 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
355   TargetInfo::adjust(Opts);
356   // ToDo: There are still a few places using default address space as private
357   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
358   // can be removed from the following line.
359   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
360                      !isAMDGCN(getTriple()));
361 }
362 
363 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
364   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
365                                              Builtin::FirstTSBuiltin);
366 }
367 
368 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
369                                         MacroBuilder &Builder) const {
370   Builder.defineMacro("__AMD__");
371   Builder.defineMacro("__AMDGPU__");
372 
373   if (isAMDGCN(getTriple()))
374     Builder.defineMacro("__AMDGCN__");
375   else
376     Builder.defineMacro("__R600__");
377 
378   if (GPUKind != llvm::AMDGPU::GK_NONE) {
379     StringRef CanonName = isAMDGCN(getTriple()) ?
380       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
381     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
382     if (isAMDGCN(getTriple())) {
383       Builder.defineMacro("__amdgcn_processor__",
384                           Twine("\"") + Twine(CanonName) + Twine("\""));
385       Builder.defineMacro("__amdgcn_target_id__",
386                           Twine("\"") + Twine(getTargetID().getValue()) +
387                               Twine("\""));
388       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
389         auto Loc = OffloadArchFeatures.find(F);
390         if (Loc != OffloadArchFeatures.end()) {
391           std::string NewF = F.str();
392           std::replace(NewF.begin(), NewF.end(), '-', '_');
393           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
394                                   Twine("__"),
395                               Loc->second ? "1" : "0");
396         }
397       }
398     }
399   }
400 
401   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
402   // removed in the near future.
403   if (hasFMAF())
404     Builder.defineMacro("__HAS_FMAF__");
405   if (hasFastFMAF())
406     Builder.defineMacro("FP_FAST_FMAF");
407   if (hasLDEXPF())
408     Builder.defineMacro("__HAS_LDEXPF__");
409   if (hasFP64())
410     Builder.defineMacro("__HAS_FP64__");
411   if (hasFastFMA())
412     Builder.defineMacro("FP_FAST_FMA");
413 
414   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
415 }
416 
417 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
418   assert(HalfFormat == Aux->HalfFormat);
419   assert(FloatFormat == Aux->FloatFormat);
420   assert(DoubleFormat == Aux->DoubleFormat);
421 
422   // On x86_64 long double is 80-bit extended precision format, which is
423   // not supported by AMDGPU. 128-bit floating point format is also not
424   // supported by AMDGPU. Therefore keep its own format for these two types.
425   auto SaveLongDoubleFormat = LongDoubleFormat;
426   auto SaveFloat128Format = Float128Format;
427   copyAuxTarget(Aux);
428   LongDoubleFormat = SaveLongDoubleFormat;
429   Float128Format = SaveFloat128Format;
430   // For certain builtin types support on the host target, claim they are
431   // support to pass the compilation of the host code during the device-side
432   // compilation.
433   // FIXME: As the side effect, we also accept `__float128` uses in the device
434   // code. To rejct these builtin types supported in the host target but not in
435   // the device target, one approach would support `device_builtin` attribute
436   // so that we could tell the device builtin types from the host ones. The
437   // also solves the different representations of the same builtin type, such
438   // as `size_t` in the MSVC environment.
439   if (Aux->hasFloat128Type()) {
440     HasFloat128 = true;
441     Float128Format = DoubleFormat;
442   }
443 }
444