1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // opencl_global_device
50     Global,   // opencl_global_host
51     Global,   // cuda_device
52     Constant, // cuda_constant
53     Local,    // cuda_shared
54     Generic,  // ptr32_sptr
55     Generic,  // ptr32_uptr
56     Generic   // ptr64
57 };
58 
59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60     Private,  // Default
61     Global,   // opencl_global
62     Local,    // opencl_local
63     Constant, // opencl_constant
64     Private,  // opencl_private
65     Generic,  // opencl_generic
66     Global,   // opencl_global_device
67     Global,   // opencl_global_host
68     Global,   // cuda_device
69     Constant, // cuda_constant
70     Local,    // cuda_shared
71     Generic,  // ptr32_sptr
72     Generic,  // ptr32_uptr
73     Generic   // ptr64
74 
75 };
76 } // namespace targets
77 } // namespace clang
78 
79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
80 #define BUILTIN(ID, TYPE, ATTRS)                                               \
81   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
83   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
84 #include "clang/Basic/BuiltinsAMDGPU.def"
85 };
86 
87 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
88   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
89   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
90   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
91   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
92   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
93   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
94   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
95   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
96   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
97   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
98   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
99   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
100   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
101   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
102   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
103   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
104   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
105   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
106   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
107   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
108   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
109   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
110   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
111   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
112   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
113   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
114   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
115   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
116   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
117   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
118   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
119   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
120   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
121   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
122   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
123   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
124   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
125   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
126   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
127   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
128   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
129   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
130   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
131   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
132   "flat_scratch_lo", "flat_scratch_hi",
133   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
134   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
135   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
136   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
137   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
138   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
139   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
140   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
141   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
142   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
143   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
144   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
145   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
146   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
147   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
148   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
149   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
150   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
151   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
152   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
153   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
154   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
155   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
156   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
157   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
158   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
159   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
160   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
161   "a252", "a253", "a254", "a255"
162 };
163 
164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
165   return llvm::makeArrayRef(GCCRegNames);
166 }
167 
168 bool AMDGPUTargetInfo::initFeatureMap(
169     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
170     const std::vector<std::string> &FeatureVec) const {
171 
172   using namespace llvm::AMDGPU;
173 
174   // XXX - What does the member GPU mean if device name string passed here?
175   if (isAMDGCN(getTriple())) {
176     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
177     case GK_GFX1033:
178     case GK_GFX1032:
179     case GK_GFX1031:
180     case GK_GFX1030:
181       Features["ci-insts"] = true;
182       Features["dot1-insts"] = true;
183       Features["dot2-insts"] = true;
184       Features["dot5-insts"] = true;
185       Features["dot6-insts"] = true;
186       Features["dl-insts"] = true;
187       Features["flat-address-space"] = true;
188       Features["16-bit-insts"] = true;
189       Features["dpp"] = true;
190       Features["gfx8-insts"] = true;
191       Features["gfx9-insts"] = true;
192       Features["gfx10-insts"] = true;
193       Features["gfx10-3-insts"] = true;
194       Features["s-memrealtime"] = true;
195       break;
196     case GK_GFX1012:
197     case GK_GFX1011:
198       Features["dot1-insts"] = true;
199       Features["dot2-insts"] = true;
200       Features["dot5-insts"] = true;
201       Features["dot6-insts"] = true;
202       LLVM_FALLTHROUGH;
203     case GK_GFX1010:
204       Features["dl-insts"] = true;
205       Features["ci-insts"] = true;
206       Features["flat-address-space"] = true;
207       Features["16-bit-insts"] = true;
208       Features["dpp"] = true;
209       Features["gfx8-insts"] = true;
210       Features["gfx9-insts"] = true;
211       Features["gfx10-insts"] = true;
212       Features["s-memrealtime"] = true;
213       Features["s-memtime-inst"] = true;
214       break;
215     case GK_GFX908:
216       Features["dot3-insts"] = true;
217       Features["dot4-insts"] = true;
218       Features["dot5-insts"] = true;
219       Features["dot6-insts"] = true;
220       Features["mai-insts"] = true;
221       LLVM_FALLTHROUGH;
222     case GK_GFX906:
223       Features["dl-insts"] = true;
224       Features["dot1-insts"] = true;
225       Features["dot2-insts"] = true;
226       LLVM_FALLTHROUGH;
227     case GK_GFX90C:
228     case GK_GFX909:
229     case GK_GFX904:
230     case GK_GFX902:
231     case GK_GFX900:
232       Features["gfx9-insts"] = true;
233       LLVM_FALLTHROUGH;
234     case GK_GFX810:
235     case GK_GFX805:
236     case GK_GFX803:
237     case GK_GFX802:
238     case GK_GFX801:
239       Features["gfx8-insts"] = true;
240       Features["16-bit-insts"] = true;
241       Features["dpp"] = true;
242       Features["s-memrealtime"] = true;
243       LLVM_FALLTHROUGH;
244     case GK_GFX705:
245     case GK_GFX704:
246     case GK_GFX703:
247     case GK_GFX702:
248     case GK_GFX701:
249     case GK_GFX700:
250       Features["ci-insts"] = true;
251       Features["flat-address-space"] = true;
252       LLVM_FALLTHROUGH;
253     case GK_GFX602:
254     case GK_GFX601:
255     case GK_GFX600:
256       Features["s-memtime-inst"] = true;
257       break;
258     case GK_NONE:
259       break;
260     default:
261       llvm_unreachable("Unhandled GPU!");
262     }
263   } else {
264     if (CPU.empty())
265       CPU = "r600";
266 
267     switch (llvm::AMDGPU::parseArchR600(CPU)) {
268     case GK_CAYMAN:
269     case GK_CYPRESS:
270     case GK_RV770:
271     case GK_RV670:
272       // TODO: Add fp64 when implemented.
273       break;
274     case GK_TURKS:
275     case GK_CAICOS:
276     case GK_BARTS:
277     case GK_SUMO:
278     case GK_REDWOOD:
279     case GK_JUNIPER:
280     case GK_CEDAR:
281     case GK_RV730:
282     case GK_RV710:
283     case GK_RS880:
284     case GK_R630:
285     case GK_R600:
286       break;
287     default:
288       llvm_unreachable("Unhandled GPU!");
289     }
290   }
291 
292   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
293 }
294 
295 void AMDGPUTargetInfo::fillValidCPUList(
296     SmallVectorImpl<StringRef> &Values) const {
297   if (isAMDGCN(getTriple()))
298     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
299   else
300     llvm::AMDGPU::fillValidArchListR600(Values);
301 }
302 
303 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
304   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
305 }
306 
307 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
308                                    const TargetOptions &Opts)
309     : TargetInfo(Triple),
310       GPUKind(isAMDGCN(Triple) ?
311               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
312               llvm::AMDGPU::parseArchR600(Opts.CPU)),
313       GPUFeatures(isAMDGCN(Triple) ?
314                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
315                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
316   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
317                                         : DataLayoutStringR600);
318   assert(DataLayout->getAllocaAddrSpace() == Private);
319   GridValues = llvm::omp::AMDGPUGpuGridValues;
320 
321   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
322                      !isAMDGCN(Triple));
323   UseAddrSpaceMapMangling = true;
324 
325   HasLegalHalfType = true;
326   HasFloat16 = true;
327   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
328   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
329 
330   // Set pointer width and alignment for target address space 0.
331   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
332   if (getMaxPointerWidth() == 64) {
333     LongWidth = LongAlign = 64;
334     SizeType = UnsignedLong;
335     PtrDiffType = SignedLong;
336     IntPtrType = SignedLong;
337   }
338 
339   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
340 }
341 
342 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
343   TargetInfo::adjust(Opts);
344   // ToDo: There are still a few places using default address space as private
345   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
346   // can be removed from the following line.
347   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
348                      !isAMDGCN(getTriple()));
349 }
350 
351 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
352   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
353                                              Builtin::FirstTSBuiltin);
354 }
355 
356 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
357                                         MacroBuilder &Builder) const {
358   Builder.defineMacro("__AMD__");
359   Builder.defineMacro("__AMDGPU__");
360 
361   if (isAMDGCN(getTriple()))
362     Builder.defineMacro("__AMDGCN__");
363   else
364     Builder.defineMacro("__R600__");
365 
366   if (GPUKind != llvm::AMDGPU::GK_NONE) {
367     StringRef CanonName = isAMDGCN(getTriple()) ?
368       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
369     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
370     if (isAMDGCN(getTriple())) {
371       Builder.defineMacro("__amdgcn_processor__",
372                           Twine("\"") + Twine(CanonName) + Twine("\""));
373       Builder.defineMacro("__amdgcn_target_id__",
374                           Twine("\"") + Twine(getTargetID().getValue()) +
375                               Twine("\""));
376       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
377         auto Loc = OffloadArchFeatures.find(F);
378         if (Loc != OffloadArchFeatures.end()) {
379           std::string NewF = F.str();
380           std::replace(NewF.begin(), NewF.end(), '-', '_');
381           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
382                                   Twine("__"),
383                               Loc->second ? "1" : "0");
384         }
385       }
386     }
387   }
388 
389   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
390   // removed in the near future.
391   if (hasFMAF())
392     Builder.defineMacro("__HAS_FMAF__");
393   if (hasFastFMAF())
394     Builder.defineMacro("FP_FAST_FMAF");
395   if (hasLDEXPF())
396     Builder.defineMacro("__HAS_LDEXPF__");
397   if (hasFP64())
398     Builder.defineMacro("__HAS_FP64__");
399   if (hasFastFMA())
400     Builder.defineMacro("FP_FAST_FMA");
401 
402   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
403 }
404 
405 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
406   assert(HalfFormat == Aux->HalfFormat);
407   assert(FloatFormat == Aux->FloatFormat);
408   assert(DoubleFormat == Aux->DoubleFormat);
409 
410   // On x86_64 long double is 80-bit extended precision format, which is
411   // not supported by AMDGPU. 128-bit floating point format is also not
412   // supported by AMDGPU. Therefore keep its own format for these two types.
413   auto SaveLongDoubleFormat = LongDoubleFormat;
414   auto SaveFloat128Format = Float128Format;
415   copyAuxTarget(Aux);
416   LongDoubleFormat = SaveLongDoubleFormat;
417   Float128Format = SaveFloat128Format;
418   // For certain builtin types support on the host target, claim they are
419   // support to pass the compilation of the host code during the device-side
420   // compilation.
421   // FIXME: As the side effect, we also accept `__float128` uses in the device
422   // code. To rejct these builtin types supported in the host target but not in
423   // the device target, one approach would support `device_builtin` attribute
424   // so that we could tell the device builtin types from the host ones. The
425   // also solves the different representations of the same builtin type, such
426   // as `size_t` in the MSVC environment.
427   if (Aux->hasFloat128Type()) {
428     HasFloat128 = true;
429     Float128Format = DoubleFormat;
430   }
431 }
432